{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7389493483810291, "eval_steps": 500, "global_step": 27500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013435442697836894, "grad_norm": 11.01321029663086, "learning_rate": 5.000000000000001e-07, "loss": 9.7918, "step": 5 }, { "epoch": 0.0002687088539567379, "grad_norm": 11.176947593688965, "learning_rate": 1.0000000000000002e-06, "loss": 9.6385, "step": 10 }, { "epoch": 0.00040306328093510683, "grad_norm": 10.01496410369873, "learning_rate": 1.5e-06, "loss": 9.5219, "step": 15 }, { "epoch": 0.0005374177079134758, "grad_norm": 7.92938756942749, "learning_rate": 2.0000000000000003e-06, "loss": 9.2768, "step": 20 }, { "epoch": 0.0006717721348918447, "grad_norm": 6.977506637573242, "learning_rate": 2.5e-06, "loss": 9.076, "step": 25 }, { "epoch": 0.0008061265618702137, "grad_norm": 5.584914684295654, "learning_rate": 3e-06, "loss": 8.8641, "step": 30 }, { "epoch": 0.0009404809888485826, "grad_norm": 4.832436561584473, "learning_rate": 3.5000000000000004e-06, "loss": 8.7415, "step": 35 }, { "epoch": 0.0010748354158269515, "grad_norm": 4.32351016998291, "learning_rate": 4.000000000000001e-06, "loss": 8.4831, "step": 40 }, { "epoch": 0.0012091898428053204, "grad_norm": 3.6954407691955566, "learning_rate": 4.5e-06, "loss": 8.343, "step": 45 }, { "epoch": 0.0013435442697836894, "grad_norm": 3.2270238399505615, "learning_rate": 5e-06, "loss": 8.1544, "step": 50 }, { "epoch": 0.0014778986967620583, "grad_norm": 2.855537176132202, "learning_rate": 5.500000000000001e-06, "loss": 8.0265, "step": 55 }, { "epoch": 0.0016122531237404273, "grad_norm": 2.827409505844116, "learning_rate": 6e-06, "loss": 7.9643, "step": 60 }, { "epoch": 0.0017466075507187962, "grad_norm": 2.896108627319336, "learning_rate": 6.5000000000000004e-06, "loss": 7.9864, "step": 65 }, { "epoch": 0.0018809619776971652, "grad_norm": 2.5346200466156006, "learning_rate": 7.000000000000001e-06, "loss": 7.7691, "step": 70 }, { "epoch": 0.002015316404675534, "grad_norm": 2.589860200881958, "learning_rate": 7.5e-06, "loss": 7.8421, "step": 75 }, { "epoch": 0.002149670831653903, "grad_norm": 2.595316171646118, "learning_rate": 8.000000000000001e-06, "loss": 7.8043, "step": 80 }, { "epoch": 0.002284025258632272, "grad_norm": 2.718700885772705, "learning_rate": 8.500000000000002e-06, "loss": 7.7434, "step": 85 }, { "epoch": 0.0024183796856106408, "grad_norm": 3.426335096359253, "learning_rate": 9e-06, "loss": 7.6604, "step": 90 }, { "epoch": 0.00255273411258901, "grad_norm": 2.714006185531616, "learning_rate": 9.5e-06, "loss": 7.4985, "step": 95 }, { "epoch": 0.002687088539567379, "grad_norm": 2.5804998874664307, "learning_rate": 1e-05, "loss": 7.5679, "step": 100 }, { "epoch": 0.0028214429665457475, "grad_norm": 2.799619436264038, "learning_rate": 1.05e-05, "loss": 7.4543, "step": 105 }, { "epoch": 0.0029557973935241165, "grad_norm": 2.6874120235443115, "learning_rate": 1.1000000000000001e-05, "loss": 7.5767, "step": 110 }, { "epoch": 0.0030901518205024856, "grad_norm": 2.5367376804351807, "learning_rate": 1.1500000000000002e-05, "loss": 7.4019, "step": 115 }, { "epoch": 0.0032245062474808546, "grad_norm": 2.844397783279419, "learning_rate": 1.2e-05, "loss": 7.4102, "step": 120 }, { "epoch": 0.0033588606744592233, "grad_norm": 3.0461487770080566, "learning_rate": 1.25e-05, "loss": 7.371, "step": 125 }, { "epoch": 0.0034932151014375923, "grad_norm": 2.188748359680176, "learning_rate": 1.3000000000000001e-05, "loss": 7.2679, "step": 130 }, { "epoch": 0.0036275695284159614, "grad_norm": 2.494464874267578, "learning_rate": 1.3500000000000001e-05, "loss": 7.2771, "step": 135 }, { "epoch": 0.0037619239553943304, "grad_norm": 2.836573362350464, "learning_rate": 1.4000000000000001e-05, "loss": 7.2517, "step": 140 }, { "epoch": 0.003896278382372699, "grad_norm": 3.0456364154815674, "learning_rate": 1.45e-05, "loss": 7.1899, "step": 145 }, { "epoch": 0.004030632809351068, "grad_norm": 2.731494903564453, "learning_rate": 1.5e-05, "loss": 7.1298, "step": 150 }, { "epoch": 0.004164987236329437, "grad_norm": 2.6526591777801514, "learning_rate": 1.55e-05, "loss": 7.1122, "step": 155 }, { "epoch": 0.004299341663307806, "grad_norm": 2.3740646839141846, "learning_rate": 1.6000000000000003e-05, "loss": 7.2048, "step": 160 }, { "epoch": 0.004433696090286175, "grad_norm": 2.8663558959960938, "learning_rate": 1.65e-05, "loss": 7.0526, "step": 165 }, { "epoch": 0.004568050517264544, "grad_norm": 2.5205307006835938, "learning_rate": 1.7000000000000003e-05, "loss": 7.0873, "step": 170 }, { "epoch": 0.0047024049442429125, "grad_norm": 2.7289395332336426, "learning_rate": 1.75e-05, "loss": 6.8913, "step": 175 }, { "epoch": 0.0048367593712212815, "grad_norm": 2.7674458026885986, "learning_rate": 1.8e-05, "loss": 7.0275, "step": 180 }, { "epoch": 0.004971113798199651, "grad_norm": 2.6979026794433594, "learning_rate": 1.85e-05, "loss": 7.0515, "step": 185 }, { "epoch": 0.00510546822517802, "grad_norm": 2.633976936340332, "learning_rate": 1.9e-05, "loss": 6.9048, "step": 190 }, { "epoch": 0.005239822652156389, "grad_norm": 2.978367567062378, "learning_rate": 1.9500000000000003e-05, "loss": 6.8549, "step": 195 }, { "epoch": 0.005374177079134758, "grad_norm": 2.6202895641326904, "learning_rate": 2e-05, "loss": 6.8476, "step": 200 }, { "epoch": 0.005508531506113127, "grad_norm": 3.0189406871795654, "learning_rate": 2.05e-05, "loss": 6.7656, "step": 205 }, { "epoch": 0.005642885933091495, "grad_norm": 2.660745143890381, "learning_rate": 2.1e-05, "loss": 6.7942, "step": 210 }, { "epoch": 0.005777240360069864, "grad_norm": 2.7300496101379395, "learning_rate": 2.15e-05, "loss": 6.8475, "step": 215 }, { "epoch": 0.005911594787048233, "grad_norm": 2.794863700866699, "learning_rate": 2.2000000000000003e-05, "loss": 6.72, "step": 220 }, { "epoch": 0.006045949214026602, "grad_norm": 3.3329789638519287, "learning_rate": 2.25e-05, "loss": 6.6472, "step": 225 }, { "epoch": 0.006180303641004971, "grad_norm": 3.2194576263427734, "learning_rate": 2.3000000000000003e-05, "loss": 6.7812, "step": 230 }, { "epoch": 0.00631465806798334, "grad_norm": 2.758150100708008, "learning_rate": 2.35e-05, "loss": 6.7237, "step": 235 }, { "epoch": 0.006449012494961709, "grad_norm": 2.5040268898010254, "learning_rate": 2.4e-05, "loss": 6.6982, "step": 240 }, { "epoch": 0.006583366921940078, "grad_norm": 2.948148727416992, "learning_rate": 2.45e-05, "loss": 6.501, "step": 245 }, { "epoch": 0.0067177213489184465, "grad_norm": 2.9348082542419434, "learning_rate": 2.5e-05, "loss": 6.7335, "step": 250 }, { "epoch": 0.0068520757758968156, "grad_norm": 2.4086520671844482, "learning_rate": 2.5500000000000003e-05, "loss": 6.6263, "step": 255 }, { "epoch": 0.006986430202875185, "grad_norm": 3.0014560222625732, "learning_rate": 2.6000000000000002e-05, "loss": 6.621, "step": 260 }, { "epoch": 0.007120784629853554, "grad_norm": 2.662444591522217, "learning_rate": 2.6500000000000004e-05, "loss": 6.5413, "step": 265 }, { "epoch": 0.007255139056831923, "grad_norm": 3.1769156455993652, "learning_rate": 2.7000000000000002e-05, "loss": 6.6365, "step": 270 }, { "epoch": 0.007389493483810292, "grad_norm": 2.5750327110290527, "learning_rate": 2.7500000000000004e-05, "loss": 6.5601, "step": 275 }, { "epoch": 0.007523847910788661, "grad_norm": 2.687174081802368, "learning_rate": 2.8000000000000003e-05, "loss": 6.7257, "step": 280 }, { "epoch": 0.007658202337767029, "grad_norm": 2.814142942428589, "learning_rate": 2.8499999999999998e-05, "loss": 6.6035, "step": 285 }, { "epoch": 0.007792556764745398, "grad_norm": 2.5843849182128906, "learning_rate": 2.9e-05, "loss": 6.5071, "step": 290 }, { "epoch": 0.007926911191723767, "grad_norm": 2.9305198192596436, "learning_rate": 2.95e-05, "loss": 6.632, "step": 295 }, { "epoch": 0.008061265618702136, "grad_norm": 2.8853163719177246, "learning_rate": 3e-05, "loss": 6.5922, "step": 300 }, { "epoch": 0.008195620045680505, "grad_norm": 2.5634713172912598, "learning_rate": 3.05e-05, "loss": 6.4593, "step": 305 }, { "epoch": 0.008329974472658874, "grad_norm": 2.38299298286438, "learning_rate": 3.1e-05, "loss": 6.5774, "step": 310 }, { "epoch": 0.008464328899637243, "grad_norm": 2.8081862926483154, "learning_rate": 3.15e-05, "loss": 6.4521, "step": 315 }, { "epoch": 0.008598683326615612, "grad_norm": 2.3867132663726807, "learning_rate": 3.2000000000000005e-05, "loss": 6.463, "step": 320 }, { "epoch": 0.008733037753593981, "grad_norm": 2.355497360229492, "learning_rate": 3.2500000000000004e-05, "loss": 6.453, "step": 325 }, { "epoch": 0.00886739218057235, "grad_norm": 2.6186928749084473, "learning_rate": 3.3e-05, "loss": 6.3419, "step": 330 }, { "epoch": 0.00900174660755072, "grad_norm": 2.305065631866455, "learning_rate": 3.35e-05, "loss": 6.437, "step": 335 }, { "epoch": 0.009136101034529089, "grad_norm": 2.9329586029052734, "learning_rate": 3.4000000000000007e-05, "loss": 6.4604, "step": 340 }, { "epoch": 0.009270455461507456, "grad_norm": 2.7121264934539795, "learning_rate": 3.45e-05, "loss": 6.1996, "step": 345 }, { "epoch": 0.009404809888485825, "grad_norm": 2.518139123916626, "learning_rate": 3.5e-05, "loss": 6.3747, "step": 350 }, { "epoch": 0.009539164315464194, "grad_norm": 2.612273931503296, "learning_rate": 3.55e-05, "loss": 6.4051, "step": 355 }, { "epoch": 0.009673518742442563, "grad_norm": 2.6475555896759033, "learning_rate": 3.6e-05, "loss": 6.3917, "step": 360 }, { "epoch": 0.009807873169420932, "grad_norm": 2.4955079555511475, "learning_rate": 3.65e-05, "loss": 6.3295, "step": 365 }, { "epoch": 0.009942227596399301, "grad_norm": 2.4744882583618164, "learning_rate": 3.7e-05, "loss": 6.2833, "step": 370 }, { "epoch": 0.01007658202337767, "grad_norm": 2.5392186641693115, "learning_rate": 3.7500000000000003e-05, "loss": 6.2492, "step": 375 }, { "epoch": 0.01021093645035604, "grad_norm": 2.6065213680267334, "learning_rate": 3.8e-05, "loss": 6.3539, "step": 380 }, { "epoch": 0.010345290877334408, "grad_norm": 2.6622867584228516, "learning_rate": 3.85e-05, "loss": 6.2336, "step": 385 }, { "epoch": 0.010479645304312777, "grad_norm": 2.3949472904205322, "learning_rate": 3.9000000000000006e-05, "loss": 6.2787, "step": 390 }, { "epoch": 0.010613999731291146, "grad_norm": 2.6841931343078613, "learning_rate": 3.9500000000000005e-05, "loss": 6.3568, "step": 395 }, { "epoch": 0.010748354158269515, "grad_norm": 2.763218402862549, "learning_rate": 4e-05, "loss": 6.1288, "step": 400 }, { "epoch": 0.010882708585247884, "grad_norm": 2.549846649169922, "learning_rate": 4.05e-05, "loss": 6.2834, "step": 405 }, { "epoch": 0.011017063012226254, "grad_norm": 2.6757774353027344, "learning_rate": 4.1e-05, "loss": 6.2089, "step": 410 }, { "epoch": 0.011151417439204623, "grad_norm": 2.6386969089508057, "learning_rate": 4.15e-05, "loss": 6.2232, "step": 415 }, { "epoch": 0.01128577186618299, "grad_norm": 2.8432271480560303, "learning_rate": 4.2e-05, "loss": 6.1445, "step": 420 }, { "epoch": 0.011420126293161359, "grad_norm": 2.8131535053253174, "learning_rate": 4.25e-05, "loss": 6.1136, "step": 425 }, { "epoch": 0.011554480720139728, "grad_norm": 2.476287841796875, "learning_rate": 4.3e-05, "loss": 6.1919, "step": 430 }, { "epoch": 0.011688835147118097, "grad_norm": 2.672457695007324, "learning_rate": 4.35e-05, "loss": 6.1827, "step": 435 }, { "epoch": 0.011823189574096466, "grad_norm": 2.5320873260498047, "learning_rate": 4.4000000000000006e-05, "loss": 6.1224, "step": 440 }, { "epoch": 0.011957544001074835, "grad_norm": 2.4476144313812256, "learning_rate": 4.4500000000000004e-05, "loss": 6.1666, "step": 445 }, { "epoch": 0.012091898428053204, "grad_norm": 2.737291097640991, "learning_rate": 4.5e-05, "loss": 6.2006, "step": 450 }, { "epoch": 0.012226252855031573, "grad_norm": 2.5629758834838867, "learning_rate": 4.55e-05, "loss": 6.0941, "step": 455 }, { "epoch": 0.012360607282009942, "grad_norm": 2.4126691818237305, "learning_rate": 4.600000000000001e-05, "loss": 6.0879, "step": 460 }, { "epoch": 0.012494961708988311, "grad_norm": 2.3889412879943848, "learning_rate": 4.6500000000000005e-05, "loss": 6.0713, "step": 465 }, { "epoch": 0.01262931613596668, "grad_norm": 2.744819402694702, "learning_rate": 4.7e-05, "loss": 6.1562, "step": 470 }, { "epoch": 0.01276367056294505, "grad_norm": 2.4293222427368164, "learning_rate": 4.75e-05, "loss": 6.0595, "step": 475 }, { "epoch": 0.012898024989923419, "grad_norm": 2.5619595050811768, "learning_rate": 4.8e-05, "loss": 6.1103, "step": 480 }, { "epoch": 0.013032379416901788, "grad_norm": 2.2517168521881104, "learning_rate": 4.85e-05, "loss": 6.1078, "step": 485 }, { "epoch": 0.013166733843880157, "grad_norm": 2.696000814437866, "learning_rate": 4.9e-05, "loss": 5.9199, "step": 490 }, { "epoch": 0.013301088270858524, "grad_norm": 2.3833043575286865, "learning_rate": 4.9500000000000004e-05, "loss": 6.0985, "step": 495 }, { "epoch": 0.013435442697836893, "grad_norm": 2.5483415126800537, "learning_rate": 5e-05, "loss": 6.0529, "step": 500 }, { "epoch": 0.013569797124815262, "grad_norm": 2.545701503753662, "learning_rate": 4.9993190793953425e-05, "loss": 6.0793, "step": 505 }, { "epoch": 0.013704151551793631, "grad_norm": 2.6781485080718994, "learning_rate": 4.998638158790685e-05, "loss": 6.0248, "step": 510 }, { "epoch": 0.013838505978772, "grad_norm": 2.700716972351074, "learning_rate": 4.9979572381860276e-05, "loss": 6.1312, "step": 515 }, { "epoch": 0.01397286040575037, "grad_norm": 2.3939549922943115, "learning_rate": 4.9972763175813705e-05, "loss": 6.0544, "step": 520 }, { "epoch": 0.014107214832728738, "grad_norm": 2.771691083908081, "learning_rate": 4.996595396976713e-05, "loss": 5.9878, "step": 525 }, { "epoch": 0.014241569259707107, "grad_norm": 2.369417428970337, "learning_rate": 4.995914476372055e-05, "loss": 6.0719, "step": 530 }, { "epoch": 0.014375923686685476, "grad_norm": 2.6986169815063477, "learning_rate": 4.995233555767398e-05, "loss": 6.1208, "step": 535 }, { "epoch": 0.014510278113663845, "grad_norm": 2.3913047313690186, "learning_rate": 4.99455263516274e-05, "loss": 6.0036, "step": 540 }, { "epoch": 0.014644632540642214, "grad_norm": 2.569361925125122, "learning_rate": 4.993871714558083e-05, "loss": 6.0675, "step": 545 }, { "epoch": 0.014778986967620584, "grad_norm": 2.287783622741699, "learning_rate": 4.993190793953425e-05, "loss": 5.9546, "step": 550 }, { "epoch": 0.014913341394598953, "grad_norm": 2.8044824600219727, "learning_rate": 4.992509873348768e-05, "loss": 6.0564, "step": 555 }, { "epoch": 0.015047695821577322, "grad_norm": 2.370957612991333, "learning_rate": 4.9918289527441104e-05, "loss": 5.9391, "step": 560 }, { "epoch": 0.01518205024855569, "grad_norm": 2.4755353927612305, "learning_rate": 4.9911480321394526e-05, "loss": 5.9702, "step": 565 }, { "epoch": 0.015316404675534058, "grad_norm": 2.473414421081543, "learning_rate": 4.990467111534795e-05, "loss": 6.0557, "step": 570 }, { "epoch": 0.015450759102512427, "grad_norm": 2.2430734634399414, "learning_rate": 4.989786190930138e-05, "loss": 6.0534, "step": 575 }, { "epoch": 0.015585113529490796, "grad_norm": 2.430976390838623, "learning_rate": 4.9891052703254806e-05, "loss": 5.7805, "step": 580 }, { "epoch": 0.015719467956469165, "grad_norm": 2.832655906677246, "learning_rate": 4.988424349720823e-05, "loss": 6.0228, "step": 585 }, { "epoch": 0.015853822383447534, "grad_norm": 2.3685193061828613, "learning_rate": 4.987743429116165e-05, "loss": 5.7415, "step": 590 }, { "epoch": 0.015988176810425903, "grad_norm": 2.479611396789551, "learning_rate": 4.987062508511508e-05, "loss": 5.9124, "step": 595 }, { "epoch": 0.016122531237404272, "grad_norm": 2.4757654666900635, "learning_rate": 4.98638158790685e-05, "loss": 5.9235, "step": 600 }, { "epoch": 0.01625688566438264, "grad_norm": 2.5986921787261963, "learning_rate": 4.9857006673021924e-05, "loss": 5.9294, "step": 605 }, { "epoch": 0.01639124009136101, "grad_norm": 2.630502939224243, "learning_rate": 4.985019746697535e-05, "loss": 5.7693, "step": 610 }, { "epoch": 0.01652559451833938, "grad_norm": 2.4950783252716064, "learning_rate": 4.984338826092878e-05, "loss": 5.9103, "step": 615 }, { "epoch": 0.01665994894531775, "grad_norm": 2.2728936672210693, "learning_rate": 4.9836579054882205e-05, "loss": 5.9684, "step": 620 }, { "epoch": 0.016794303372296118, "grad_norm": 2.619060516357422, "learning_rate": 4.982976984883563e-05, "loss": 5.8626, "step": 625 }, { "epoch": 0.016928657799274487, "grad_norm": 2.5404937267303467, "learning_rate": 4.982296064278905e-05, "loss": 5.7829, "step": 630 }, { "epoch": 0.017063012226252856, "grad_norm": 3.9172961711883545, "learning_rate": 4.981615143674248e-05, "loss": 5.9202, "step": 635 }, { "epoch": 0.017197366653231225, "grad_norm": 2.43959641456604, "learning_rate": 4.980934223069591e-05, "loss": 5.8023, "step": 640 }, { "epoch": 0.017331721080209594, "grad_norm": 2.476043224334717, "learning_rate": 4.980253302464933e-05, "loss": 5.9157, "step": 645 }, { "epoch": 0.017466075507187963, "grad_norm": 2.211029291152954, "learning_rate": 4.979572381860275e-05, "loss": 5.7362, "step": 650 }, { "epoch": 0.017600429934166332, "grad_norm": 2.5167977809906006, "learning_rate": 4.9788914612556174e-05, "loss": 5.9221, "step": 655 }, { "epoch": 0.0177347843611447, "grad_norm": 2.3679065704345703, "learning_rate": 4.97821054065096e-05, "loss": 5.736, "step": 660 }, { "epoch": 0.01786913878812307, "grad_norm": 2.331702947616577, "learning_rate": 4.9775296200463025e-05, "loss": 5.9375, "step": 665 }, { "epoch": 0.01800349321510144, "grad_norm": 2.4434773921966553, "learning_rate": 4.9768486994416454e-05, "loss": 5.9481, "step": 670 }, { "epoch": 0.018137847642079808, "grad_norm": 2.8634090423583984, "learning_rate": 4.976167778836988e-05, "loss": 5.8105, "step": 675 }, { "epoch": 0.018272202069058177, "grad_norm": 2.486222743988037, "learning_rate": 4.9754868582323306e-05, "loss": 5.8775, "step": 680 }, { "epoch": 0.018406556496036543, "grad_norm": 2.2074220180511475, "learning_rate": 4.974805937627673e-05, "loss": 5.9407, "step": 685 }, { "epoch": 0.018540910923014912, "grad_norm": 2.3490865230560303, "learning_rate": 4.974125017023015e-05, "loss": 5.7635, "step": 690 }, { "epoch": 0.01867526534999328, "grad_norm": 2.230475902557373, "learning_rate": 4.973444096418358e-05, "loss": 5.773, "step": 695 }, { "epoch": 0.01880961977697165, "grad_norm": 2.3701012134552, "learning_rate": 4.972763175813701e-05, "loss": 5.8768, "step": 700 }, { "epoch": 0.01894397420395002, "grad_norm": 2.5682291984558105, "learning_rate": 4.972082255209043e-05, "loss": 5.8418, "step": 705 }, { "epoch": 0.019078328630928388, "grad_norm": 2.3219289779663086, "learning_rate": 4.971401334604385e-05, "loss": 5.812, "step": 710 }, { "epoch": 0.019212683057906757, "grad_norm": 2.6463623046875, "learning_rate": 4.9707204139997275e-05, "loss": 5.7989, "step": 715 }, { "epoch": 0.019347037484885126, "grad_norm": 2.8326807022094727, "learning_rate": 4.9700394933950704e-05, "loss": 5.7261, "step": 720 }, { "epoch": 0.019481391911863495, "grad_norm": 2.723139524459839, "learning_rate": 4.9693585727904126e-05, "loss": 5.5954, "step": 725 }, { "epoch": 0.019615746338841864, "grad_norm": 2.45609712600708, "learning_rate": 4.9686776521857556e-05, "loss": 5.9225, "step": 730 }, { "epoch": 0.019750100765820233, "grad_norm": 1.9900792837142944, "learning_rate": 4.967996731581098e-05, "loss": 5.7652, "step": 735 }, { "epoch": 0.019884455192798602, "grad_norm": 2.4097843170166016, "learning_rate": 4.967315810976441e-05, "loss": 5.7977, "step": 740 }, { "epoch": 0.02001880961977697, "grad_norm": 2.3949177265167236, "learning_rate": 4.966634890371783e-05, "loss": 5.8127, "step": 745 }, { "epoch": 0.02015316404675534, "grad_norm": 2.2827463150024414, "learning_rate": 4.965953969767125e-05, "loss": 5.6738, "step": 750 }, { "epoch": 0.02028751847373371, "grad_norm": 2.2521233558654785, "learning_rate": 4.965273049162468e-05, "loss": 5.8771, "step": 755 }, { "epoch": 0.02042187290071208, "grad_norm": 2.4549942016601562, "learning_rate": 4.964592128557811e-05, "loss": 5.6877, "step": 760 }, { "epoch": 0.020556227327690448, "grad_norm": 2.5722155570983887, "learning_rate": 4.963911207953153e-05, "loss": 5.7352, "step": 765 }, { "epoch": 0.020690581754668817, "grad_norm": 2.1596367359161377, "learning_rate": 4.9632302873484954e-05, "loss": 5.7694, "step": 770 }, { "epoch": 0.020824936181647186, "grad_norm": 2.4517874717712402, "learning_rate": 4.9625493667438376e-05, "loss": 5.7355, "step": 775 }, { "epoch": 0.020959290608625555, "grad_norm": 2.0883655548095703, "learning_rate": 4.9618684461391805e-05, "loss": 5.673, "step": 780 }, { "epoch": 0.021093645035603924, "grad_norm": 2.5619287490844727, "learning_rate": 4.961187525534523e-05, "loss": 5.7157, "step": 785 }, { "epoch": 0.021227999462582293, "grad_norm": 2.2422034740448, "learning_rate": 4.960506604929866e-05, "loss": 5.7318, "step": 790 }, { "epoch": 0.021362353889560662, "grad_norm": 2.5861570835113525, "learning_rate": 4.959825684325208e-05, "loss": 5.6878, "step": 795 }, { "epoch": 0.02149670831653903, "grad_norm": 2.492201089859009, "learning_rate": 4.95914476372055e-05, "loss": 5.6216, "step": 800 }, { "epoch": 0.0216310627435174, "grad_norm": 2.8234431743621826, "learning_rate": 4.958463843115893e-05, "loss": 5.7494, "step": 805 }, { "epoch": 0.02176541717049577, "grad_norm": 2.094003915786743, "learning_rate": 4.957782922511235e-05, "loss": 5.7582, "step": 810 }, { "epoch": 0.021899771597474138, "grad_norm": 2.393399715423584, "learning_rate": 4.9571020019065775e-05, "loss": 5.7385, "step": 815 }, { "epoch": 0.022034126024452507, "grad_norm": 2.3262112140655518, "learning_rate": 4.9564210813019204e-05, "loss": 5.7897, "step": 820 }, { "epoch": 0.022168480451430876, "grad_norm": 2.2483649253845215, "learning_rate": 4.955740160697263e-05, "loss": 5.7137, "step": 825 }, { "epoch": 0.022302834878409245, "grad_norm": 2.1380503177642822, "learning_rate": 4.9550592400926055e-05, "loss": 5.5507, "step": 830 }, { "epoch": 0.022437189305387614, "grad_norm": 2.464599132537842, "learning_rate": 4.954378319487948e-05, "loss": 5.6652, "step": 835 }, { "epoch": 0.02257154373236598, "grad_norm": 2.2616629600524902, "learning_rate": 4.95369739888329e-05, "loss": 5.7686, "step": 840 }, { "epoch": 0.02270589815934435, "grad_norm": 2.13108491897583, "learning_rate": 4.953016478278633e-05, "loss": 5.6843, "step": 845 }, { "epoch": 0.022840252586322718, "grad_norm": 2.5671236515045166, "learning_rate": 4.952335557673976e-05, "loss": 5.8, "step": 850 }, { "epoch": 0.022974607013301087, "grad_norm": 2.473129987716675, "learning_rate": 4.951654637069318e-05, "loss": 5.6215, "step": 855 }, { "epoch": 0.023108961440279456, "grad_norm": 2.5920064449310303, "learning_rate": 4.95097371646466e-05, "loss": 5.7515, "step": 860 }, { "epoch": 0.023243315867257825, "grad_norm": 2.3798274993896484, "learning_rate": 4.950292795860003e-05, "loss": 5.6959, "step": 865 }, { "epoch": 0.023377670294236194, "grad_norm": 2.2634427547454834, "learning_rate": 4.9496118752553454e-05, "loss": 5.646, "step": 870 }, { "epoch": 0.023512024721214563, "grad_norm": 2.3553104400634766, "learning_rate": 4.9489309546506876e-05, "loss": 5.555, "step": 875 }, { "epoch": 0.023646379148192932, "grad_norm": 2.5519611835479736, "learning_rate": 4.9482500340460305e-05, "loss": 5.632, "step": 880 }, { "epoch": 0.0237807335751713, "grad_norm": 2.3024661540985107, "learning_rate": 4.9475691134413734e-05, "loss": 5.6788, "step": 885 }, { "epoch": 0.02391508800214967, "grad_norm": 2.5067193508148193, "learning_rate": 4.9468881928367156e-05, "loss": 5.6975, "step": 890 }, { "epoch": 0.02404944242912804, "grad_norm": 2.397663116455078, "learning_rate": 4.946207272232058e-05, "loss": 5.7011, "step": 895 }, { "epoch": 0.02418379685610641, "grad_norm": 2.439544200897217, "learning_rate": 4.9455263516274e-05, "loss": 5.5824, "step": 900 }, { "epoch": 0.024318151283084777, "grad_norm": 2.6345417499542236, "learning_rate": 4.944845431022743e-05, "loss": 5.739, "step": 905 }, { "epoch": 0.024452505710063147, "grad_norm": 2.163940668106079, "learning_rate": 4.944164510418086e-05, "loss": 5.5987, "step": 910 }, { "epoch": 0.024586860137041516, "grad_norm": 2.3982248306274414, "learning_rate": 4.943483589813428e-05, "loss": 5.714, "step": 915 }, { "epoch": 0.024721214564019885, "grad_norm": 2.4334447383880615, "learning_rate": 4.9428026692087703e-05, "loss": 5.7488, "step": 920 }, { "epoch": 0.024855568990998254, "grad_norm": 2.416102409362793, "learning_rate": 4.942121748604113e-05, "loss": 5.5881, "step": 925 }, { "epoch": 0.024989923417976623, "grad_norm": 2.588106632232666, "learning_rate": 4.9414408279994555e-05, "loss": 5.5839, "step": 930 }, { "epoch": 0.025124277844954992, "grad_norm": 2.413837432861328, "learning_rate": 4.940759907394798e-05, "loss": 5.769, "step": 935 }, { "epoch": 0.02525863227193336, "grad_norm": 2.239147186279297, "learning_rate": 4.9400789867901406e-05, "loss": 5.7587, "step": 940 }, { "epoch": 0.02539298669891173, "grad_norm": 2.5343501567840576, "learning_rate": 4.9393980661854835e-05, "loss": 5.5109, "step": 945 }, { "epoch": 0.0255273411258901, "grad_norm": 2.4583237171173096, "learning_rate": 4.938717145580826e-05, "loss": 5.6971, "step": 950 }, { "epoch": 0.025661695552868468, "grad_norm": 2.4051103591918945, "learning_rate": 4.938036224976168e-05, "loss": 5.6497, "step": 955 }, { "epoch": 0.025796049979846837, "grad_norm": 2.489861488342285, "learning_rate": 4.93735530437151e-05, "loss": 5.7268, "step": 960 }, { "epoch": 0.025930404406825206, "grad_norm": 2.2497615814208984, "learning_rate": 4.936674383766853e-05, "loss": 5.5657, "step": 965 }, { "epoch": 0.026064758833803575, "grad_norm": 2.4166667461395264, "learning_rate": 4.935993463162196e-05, "loss": 5.5962, "step": 970 }, { "epoch": 0.026199113260781944, "grad_norm": 2.4965009689331055, "learning_rate": 4.935312542557538e-05, "loss": 5.5071, "step": 975 }, { "epoch": 0.026333467687760313, "grad_norm": 2.6134660243988037, "learning_rate": 4.9346316219528805e-05, "loss": 5.6466, "step": 980 }, { "epoch": 0.026467822114738682, "grad_norm": 2.2586779594421387, "learning_rate": 4.933950701348223e-05, "loss": 5.5967, "step": 985 }, { "epoch": 0.026602176541717048, "grad_norm": 2.5898633003234863, "learning_rate": 4.9332697807435656e-05, "loss": 5.3913, "step": 990 }, { "epoch": 0.026736530968695417, "grad_norm": 2.391298294067383, "learning_rate": 4.932588860138908e-05, "loss": 5.5848, "step": 995 }, { "epoch": 0.026870885395673786, "grad_norm": 2.41550874710083, "learning_rate": 4.931907939534251e-05, "loss": 5.5264, "step": 1000 }, { "epoch": 0.027005239822652155, "grad_norm": 2.501523971557617, "learning_rate": 4.931227018929593e-05, "loss": 5.4967, "step": 1005 }, { "epoch": 0.027139594249630524, "grad_norm": 2.3912205696105957, "learning_rate": 4.930546098324936e-05, "loss": 5.5306, "step": 1010 }, { "epoch": 0.027273948676608893, "grad_norm": 2.2942121028900146, "learning_rate": 4.929865177720278e-05, "loss": 5.6278, "step": 1015 }, { "epoch": 0.027408303103587262, "grad_norm": 2.4391286373138428, "learning_rate": 4.92918425711562e-05, "loss": 5.4362, "step": 1020 }, { "epoch": 0.02754265753056563, "grad_norm": 2.450418472290039, "learning_rate": 4.9285033365109625e-05, "loss": 5.4614, "step": 1025 }, { "epoch": 0.027677011957544, "grad_norm": 2.4992129802703857, "learning_rate": 4.927822415906306e-05, "loss": 5.4093, "step": 1030 }, { "epoch": 0.02781136638452237, "grad_norm": 2.753678560256958, "learning_rate": 4.927141495301648e-05, "loss": 5.5318, "step": 1035 }, { "epoch": 0.02794572081150074, "grad_norm": 2.431758165359497, "learning_rate": 4.9264605746969906e-05, "loss": 5.562, "step": 1040 }, { "epoch": 0.028080075238479107, "grad_norm": 2.2790017127990723, "learning_rate": 4.925779654092333e-05, "loss": 5.5048, "step": 1045 }, { "epoch": 0.028214429665457477, "grad_norm": 2.3663244247436523, "learning_rate": 4.925098733487676e-05, "loss": 5.5602, "step": 1050 }, { "epoch": 0.028348784092435846, "grad_norm": 2.6443898677825928, "learning_rate": 4.924417812883018e-05, "loss": 5.5215, "step": 1055 }, { "epoch": 0.028483138519414215, "grad_norm": 2.1262621879577637, "learning_rate": 4.923736892278361e-05, "loss": 5.6381, "step": 1060 }, { "epoch": 0.028617492946392584, "grad_norm": 2.5652754306793213, "learning_rate": 4.923055971673703e-05, "loss": 5.4511, "step": 1065 }, { "epoch": 0.028751847373370953, "grad_norm": 2.287781000137329, "learning_rate": 4.922375051069046e-05, "loss": 5.4695, "step": 1070 }, { "epoch": 0.028886201800349322, "grad_norm": 2.26455020904541, "learning_rate": 4.921694130464388e-05, "loss": 5.586, "step": 1075 }, { "epoch": 0.02902055622732769, "grad_norm": 2.4085493087768555, "learning_rate": 4.9210132098597304e-05, "loss": 5.4816, "step": 1080 }, { "epoch": 0.02915491065430606, "grad_norm": 2.4961626529693604, "learning_rate": 4.9203322892550726e-05, "loss": 5.4164, "step": 1085 }, { "epoch": 0.02928926508128443, "grad_norm": 2.430562973022461, "learning_rate": 4.9196513686504155e-05, "loss": 5.4428, "step": 1090 }, { "epoch": 0.029423619508262798, "grad_norm": 2.3071069717407227, "learning_rate": 4.9189704480457584e-05, "loss": 5.4628, "step": 1095 }, { "epoch": 0.029557973935241167, "grad_norm": 2.728600263595581, "learning_rate": 4.918289527441101e-05, "loss": 5.4558, "step": 1100 }, { "epoch": 0.029692328362219536, "grad_norm": 2.7294223308563232, "learning_rate": 4.917608606836443e-05, "loss": 5.6166, "step": 1105 }, { "epoch": 0.029826682789197905, "grad_norm": 2.3402366638183594, "learning_rate": 4.916927686231785e-05, "loss": 5.4957, "step": 1110 }, { "epoch": 0.029961037216176274, "grad_norm": 2.441723585128784, "learning_rate": 4.916246765627128e-05, "loss": 5.5721, "step": 1115 }, { "epoch": 0.030095391643154643, "grad_norm": 2.482081651687622, "learning_rate": 4.915565845022471e-05, "loss": 5.3273, "step": 1120 }, { "epoch": 0.030229746070133012, "grad_norm": 2.434582233428955, "learning_rate": 4.914884924417813e-05, "loss": 5.493, "step": 1125 }, { "epoch": 0.03036410049711138, "grad_norm": 2.1452295780181885, "learning_rate": 4.9142040038131554e-05, "loss": 5.5232, "step": 1130 }, { "epoch": 0.03049845492408975, "grad_norm": 2.335707902908325, "learning_rate": 4.913523083208498e-05, "loss": 5.4953, "step": 1135 }, { "epoch": 0.030632809351068116, "grad_norm": 2.5285251140594482, "learning_rate": 4.9128421626038405e-05, "loss": 5.5154, "step": 1140 }, { "epoch": 0.030767163778046485, "grad_norm": 2.4380319118499756, "learning_rate": 4.912161241999183e-05, "loss": 5.3961, "step": 1145 }, { "epoch": 0.030901518205024854, "grad_norm": 2.517160177230835, "learning_rate": 4.9114803213945257e-05, "loss": 5.4653, "step": 1150 }, { "epoch": 0.031035872632003223, "grad_norm": 2.326714277267456, "learning_rate": 4.9107994007898686e-05, "loss": 5.3881, "step": 1155 }, { "epoch": 0.031170227058981592, "grad_norm": 2.2102363109588623, "learning_rate": 4.910118480185211e-05, "loss": 5.3753, "step": 1160 }, { "epoch": 0.031304581485959965, "grad_norm": 2.2875664234161377, "learning_rate": 4.909437559580553e-05, "loss": 5.3884, "step": 1165 }, { "epoch": 0.03143893591293833, "grad_norm": 2.52983021736145, "learning_rate": 4.908756638975895e-05, "loss": 5.5333, "step": 1170 }, { "epoch": 0.0315732903399167, "grad_norm": 2.466771125793457, "learning_rate": 4.908075718371238e-05, "loss": 5.4845, "step": 1175 }, { "epoch": 0.03170764476689507, "grad_norm": 2.3116300106048584, "learning_rate": 4.907394797766581e-05, "loss": 5.5213, "step": 1180 }, { "epoch": 0.03184199919387344, "grad_norm": 2.422750234603882, "learning_rate": 4.906713877161923e-05, "loss": 5.3867, "step": 1185 }, { "epoch": 0.031976353620851806, "grad_norm": 2.3109638690948486, "learning_rate": 4.9060329565572655e-05, "loss": 5.539, "step": 1190 }, { "epoch": 0.03211070804783018, "grad_norm": 2.3123369216918945, "learning_rate": 4.9053520359526084e-05, "loss": 5.4303, "step": 1195 }, { "epoch": 0.032245062474808545, "grad_norm": 2.449985980987549, "learning_rate": 4.9046711153479506e-05, "loss": 5.3668, "step": 1200 }, { "epoch": 0.03237941690178692, "grad_norm": 2.5965611934661865, "learning_rate": 4.903990194743293e-05, "loss": 5.4613, "step": 1205 }, { "epoch": 0.03251377132876528, "grad_norm": 2.5709943771362305, "learning_rate": 4.903309274138636e-05, "loss": 5.4945, "step": 1210 }, { "epoch": 0.032648125755743655, "grad_norm": 2.292269229888916, "learning_rate": 4.902628353533979e-05, "loss": 5.3906, "step": 1215 }, { "epoch": 0.03278248018272202, "grad_norm": 2.5061070919036865, "learning_rate": 4.901947432929321e-05, "loss": 5.3073, "step": 1220 }, { "epoch": 0.032916834609700386, "grad_norm": 2.5885732173919678, "learning_rate": 4.901266512324663e-05, "loss": 5.3256, "step": 1225 }, { "epoch": 0.03305118903667876, "grad_norm": 2.6703076362609863, "learning_rate": 4.9005855917200054e-05, "loss": 5.4299, "step": 1230 }, { "epoch": 0.033185543463657124, "grad_norm": 2.6485342979431152, "learning_rate": 4.899904671115348e-05, "loss": 5.4164, "step": 1235 }, { "epoch": 0.0333198978906355, "grad_norm": 2.5048696994781494, "learning_rate": 4.899223750510691e-05, "loss": 5.521, "step": 1240 }, { "epoch": 0.03345425231761386, "grad_norm": 2.3647947311401367, "learning_rate": 4.8985428299060334e-05, "loss": 5.4279, "step": 1245 }, { "epoch": 0.033588606744592235, "grad_norm": 2.5917186737060547, "learning_rate": 4.8978619093013756e-05, "loss": 5.2821, "step": 1250 }, { "epoch": 0.0337229611715706, "grad_norm": 2.7712130546569824, "learning_rate": 4.897180988696718e-05, "loss": 5.3969, "step": 1255 }, { "epoch": 0.03385731559854897, "grad_norm": 2.368302345275879, "learning_rate": 4.896500068092061e-05, "loss": 5.3883, "step": 1260 }, { "epoch": 0.03399167002552734, "grad_norm": 2.539823055267334, "learning_rate": 4.895819147487403e-05, "loss": 5.5243, "step": 1265 }, { "epoch": 0.03412602445250571, "grad_norm": 2.261368989944458, "learning_rate": 4.895138226882746e-05, "loss": 5.4575, "step": 1270 }, { "epoch": 0.03426037887948408, "grad_norm": 2.6786978244781494, "learning_rate": 4.894457306278088e-05, "loss": 5.4849, "step": 1275 }, { "epoch": 0.03439473330646245, "grad_norm": 2.6710543632507324, "learning_rate": 4.893776385673431e-05, "loss": 5.4024, "step": 1280 }, { "epoch": 0.034529087733440815, "grad_norm": 2.3116440773010254, "learning_rate": 4.893095465068773e-05, "loss": 5.4615, "step": 1285 }, { "epoch": 0.03466344216041919, "grad_norm": 2.3754544258117676, "learning_rate": 4.8924145444641155e-05, "loss": 5.4348, "step": 1290 }, { "epoch": 0.03479779658739755, "grad_norm": 2.5682246685028076, "learning_rate": 4.891733623859458e-05, "loss": 5.4357, "step": 1295 }, { "epoch": 0.034932151014375926, "grad_norm": 2.48640513420105, "learning_rate": 4.8910527032548006e-05, "loss": 5.4485, "step": 1300 }, { "epoch": 0.03506650544135429, "grad_norm": 2.571028470993042, "learning_rate": 4.8903717826501435e-05, "loss": 5.3691, "step": 1305 }, { "epoch": 0.035200859868332664, "grad_norm": 2.1914620399475098, "learning_rate": 4.889690862045486e-05, "loss": 5.4309, "step": 1310 }, { "epoch": 0.03533521429531103, "grad_norm": 2.3349716663360596, "learning_rate": 4.889009941440828e-05, "loss": 5.2666, "step": 1315 }, { "epoch": 0.0354695687222894, "grad_norm": 2.440905809402466, "learning_rate": 4.888329020836171e-05, "loss": 5.4088, "step": 1320 }, { "epoch": 0.03560392314926777, "grad_norm": 2.3953843116760254, "learning_rate": 4.887648100231513e-05, "loss": 5.3627, "step": 1325 }, { "epoch": 0.03573827757624614, "grad_norm": 2.581817865371704, "learning_rate": 4.886967179626856e-05, "loss": 5.4535, "step": 1330 }, { "epoch": 0.035872632003224506, "grad_norm": 2.3343987464904785, "learning_rate": 4.886286259022198e-05, "loss": 5.3891, "step": 1335 }, { "epoch": 0.03600698643020288, "grad_norm": 2.195507287979126, "learning_rate": 4.885605338417541e-05, "loss": 5.3466, "step": 1340 }, { "epoch": 0.036141340857181244, "grad_norm": 2.514629602432251, "learning_rate": 4.8849244178128833e-05, "loss": 5.4418, "step": 1345 }, { "epoch": 0.036275695284159616, "grad_norm": 2.5501441955566406, "learning_rate": 4.8842434972082256e-05, "loss": 5.37, "step": 1350 }, { "epoch": 0.03641004971113798, "grad_norm": 2.294858455657959, "learning_rate": 4.883562576603568e-05, "loss": 5.4486, "step": 1355 }, { "epoch": 0.036544404138116354, "grad_norm": 2.5770301818847656, "learning_rate": 4.882881655998911e-05, "loss": 5.3974, "step": 1360 }, { "epoch": 0.03667875856509472, "grad_norm": 2.296680212020874, "learning_rate": 4.8822007353942536e-05, "loss": 5.3902, "step": 1365 }, { "epoch": 0.036813112992073085, "grad_norm": 2.0749173164367676, "learning_rate": 4.881519814789596e-05, "loss": 5.4706, "step": 1370 }, { "epoch": 0.03694746741905146, "grad_norm": 2.461259126663208, "learning_rate": 4.880838894184938e-05, "loss": 5.3508, "step": 1375 }, { "epoch": 0.037081821846029824, "grad_norm": 2.320645332336426, "learning_rate": 4.880157973580281e-05, "loss": 5.3702, "step": 1380 }, { "epoch": 0.037216176273008196, "grad_norm": 2.358922243118286, "learning_rate": 4.879477052975623e-05, "loss": 5.3265, "step": 1385 }, { "epoch": 0.03735053069998656, "grad_norm": 2.890631914138794, "learning_rate": 4.878796132370966e-05, "loss": 5.375, "step": 1390 }, { "epoch": 0.037484885126964934, "grad_norm": 2.5326790809631348, "learning_rate": 4.878115211766308e-05, "loss": 5.4022, "step": 1395 }, { "epoch": 0.0376192395539433, "grad_norm": 2.805542230606079, "learning_rate": 4.8774342911616506e-05, "loss": 5.3828, "step": 1400 }, { "epoch": 0.03775359398092167, "grad_norm": 2.2423665523529053, "learning_rate": 4.8767533705569935e-05, "loss": 5.3923, "step": 1405 }, { "epoch": 0.03788794840790004, "grad_norm": 2.5512473583221436, "learning_rate": 4.876072449952336e-05, "loss": 5.4147, "step": 1410 }, { "epoch": 0.03802230283487841, "grad_norm": 2.252861499786377, "learning_rate": 4.875391529347678e-05, "loss": 5.418, "step": 1415 }, { "epoch": 0.038156657261856776, "grad_norm": 2.629993200302124, "learning_rate": 4.874710608743021e-05, "loss": 5.3635, "step": 1420 }, { "epoch": 0.03829101168883515, "grad_norm": 2.439270257949829, "learning_rate": 4.874029688138364e-05, "loss": 5.3613, "step": 1425 }, { "epoch": 0.038425366115813514, "grad_norm": 2.4896345138549805, "learning_rate": 4.873348767533706e-05, "loss": 5.352, "step": 1430 }, { "epoch": 0.03855972054279189, "grad_norm": 2.5188028812408447, "learning_rate": 4.872667846929048e-05, "loss": 5.4657, "step": 1435 }, { "epoch": 0.03869407496977025, "grad_norm": 2.387194871902466, "learning_rate": 4.8719869263243904e-05, "loss": 5.4058, "step": 1440 }, { "epoch": 0.038828429396748625, "grad_norm": 2.380246162414551, "learning_rate": 4.871306005719733e-05, "loss": 5.3044, "step": 1445 }, { "epoch": 0.03896278382372699, "grad_norm": 2.4011290073394775, "learning_rate": 4.8706250851150755e-05, "loss": 5.3825, "step": 1450 }, { "epoch": 0.03909713825070536, "grad_norm": 2.509749174118042, "learning_rate": 4.8699441645104184e-05, "loss": 5.4325, "step": 1455 }, { "epoch": 0.03923149267768373, "grad_norm": 2.646247148513794, "learning_rate": 4.869263243905761e-05, "loss": 5.4015, "step": 1460 }, { "epoch": 0.0393658471046621, "grad_norm": 2.2970354557037354, "learning_rate": 4.8685823233011036e-05, "loss": 5.4615, "step": 1465 }, { "epoch": 0.039500201531640466, "grad_norm": 2.2596287727355957, "learning_rate": 4.867901402696446e-05, "loss": 5.3141, "step": 1470 }, { "epoch": 0.03963455595861884, "grad_norm": 2.2906055450439453, "learning_rate": 4.867220482091788e-05, "loss": 5.3758, "step": 1475 }, { "epoch": 0.039768910385597205, "grad_norm": 2.752013683319092, "learning_rate": 4.866539561487131e-05, "loss": 5.2831, "step": 1480 }, { "epoch": 0.03990326481257558, "grad_norm": 2.4344067573547363, "learning_rate": 4.865858640882474e-05, "loss": 5.3224, "step": 1485 }, { "epoch": 0.04003761923955394, "grad_norm": 2.2967071533203125, "learning_rate": 4.865177720277816e-05, "loss": 5.3147, "step": 1490 }, { "epoch": 0.040171973666532315, "grad_norm": 2.4886233806610107, "learning_rate": 4.864496799673158e-05, "loss": 5.3791, "step": 1495 }, { "epoch": 0.04030632809351068, "grad_norm": 2.310999631881714, "learning_rate": 4.8638158790685005e-05, "loss": 5.3549, "step": 1500 }, { "epoch": 0.04044068252048905, "grad_norm": 2.321484088897705, "learning_rate": 4.8631349584638434e-05, "loss": 5.323, "step": 1505 }, { "epoch": 0.04057503694746742, "grad_norm": 2.517014503479004, "learning_rate": 4.8624540378591856e-05, "loss": 5.255, "step": 1510 }, { "epoch": 0.04070939137444579, "grad_norm": 2.575798511505127, "learning_rate": 4.8617731172545285e-05, "loss": 5.379, "step": 1515 }, { "epoch": 0.04084374580142416, "grad_norm": 2.2519052028656006, "learning_rate": 4.861092196649871e-05, "loss": 5.3484, "step": 1520 }, { "epoch": 0.04097810022840252, "grad_norm": 2.4548873901367188, "learning_rate": 4.860411276045214e-05, "loss": 5.3235, "step": 1525 }, { "epoch": 0.041112454655380895, "grad_norm": 2.3341224193573, "learning_rate": 4.859730355440556e-05, "loss": 5.1738, "step": 1530 }, { "epoch": 0.04124680908235926, "grad_norm": 2.8830623626708984, "learning_rate": 4.859049434835898e-05, "loss": 5.3159, "step": 1535 }, { "epoch": 0.04138116350933763, "grad_norm": 2.2873809337615967, "learning_rate": 4.858368514231241e-05, "loss": 5.4937, "step": 1540 }, { "epoch": 0.041515517936316, "grad_norm": 2.40313458442688, "learning_rate": 4.857687593626583e-05, "loss": 5.2487, "step": 1545 }, { "epoch": 0.04164987236329437, "grad_norm": 2.66757869720459, "learning_rate": 4.857006673021926e-05, "loss": 5.2708, "step": 1550 }, { "epoch": 0.04178422679027274, "grad_norm": 2.556011438369751, "learning_rate": 4.8563257524172684e-05, "loss": 5.2721, "step": 1555 }, { "epoch": 0.04191858121725111, "grad_norm": 2.6276113986968994, "learning_rate": 4.8556448318126106e-05, "loss": 5.3838, "step": 1560 }, { "epoch": 0.042052935644229475, "grad_norm": 2.3283538818359375, "learning_rate": 4.854963911207953e-05, "loss": 5.4259, "step": 1565 }, { "epoch": 0.04218729007120785, "grad_norm": 2.340780258178711, "learning_rate": 4.854282990603296e-05, "loss": 5.3155, "step": 1570 }, { "epoch": 0.04232164449818621, "grad_norm": 2.322408676147461, "learning_rate": 4.8536020699986387e-05, "loss": 5.242, "step": 1575 }, { "epoch": 0.042455998925164586, "grad_norm": 2.438852310180664, "learning_rate": 4.852921149393981e-05, "loss": 5.1866, "step": 1580 }, { "epoch": 0.04259035335214295, "grad_norm": 2.482048273086548, "learning_rate": 4.852240228789323e-05, "loss": 5.2217, "step": 1585 }, { "epoch": 0.042724707779121324, "grad_norm": 2.81852126121521, "learning_rate": 4.851559308184666e-05, "loss": 5.3781, "step": 1590 }, { "epoch": 0.04285906220609969, "grad_norm": 2.2842531204223633, "learning_rate": 4.850878387580008e-05, "loss": 5.2835, "step": 1595 }, { "epoch": 0.04299341663307806, "grad_norm": 2.5050878524780273, "learning_rate": 4.850197466975351e-05, "loss": 5.3429, "step": 1600 }, { "epoch": 0.04312777106005643, "grad_norm": 2.304842472076416, "learning_rate": 4.8495165463706934e-05, "loss": 5.2884, "step": 1605 }, { "epoch": 0.0432621254870348, "grad_norm": 2.4210867881774902, "learning_rate": 4.848835625766036e-05, "loss": 5.3353, "step": 1610 }, { "epoch": 0.043396479914013165, "grad_norm": 2.386096477508545, "learning_rate": 4.8481547051613785e-05, "loss": 5.4406, "step": 1615 }, { "epoch": 0.04353083434099154, "grad_norm": 2.1601107120513916, "learning_rate": 4.847473784556721e-05, "loss": 5.207, "step": 1620 }, { "epoch": 0.043665188767969904, "grad_norm": 2.3382086753845215, "learning_rate": 4.846792863952063e-05, "loss": 5.2645, "step": 1625 }, { "epoch": 0.043799543194948276, "grad_norm": 2.4124510288238525, "learning_rate": 4.846111943347406e-05, "loss": 5.2726, "step": 1630 }, { "epoch": 0.04393389762192664, "grad_norm": 2.1609771251678467, "learning_rate": 4.845431022742749e-05, "loss": 5.4364, "step": 1635 }, { "epoch": 0.044068252048905014, "grad_norm": 2.2471845149993896, "learning_rate": 4.844750102138091e-05, "loss": 5.1903, "step": 1640 }, { "epoch": 0.04420260647588338, "grad_norm": 2.5327093601226807, "learning_rate": 4.844069181533433e-05, "loss": 5.3189, "step": 1645 }, { "epoch": 0.04433696090286175, "grad_norm": 2.3454365730285645, "learning_rate": 4.843388260928776e-05, "loss": 5.2266, "step": 1650 }, { "epoch": 0.04447131532984012, "grad_norm": 2.4912145137786865, "learning_rate": 4.8427073403241184e-05, "loss": 5.2098, "step": 1655 }, { "epoch": 0.04460566975681849, "grad_norm": 2.2063989639282227, "learning_rate": 4.8420264197194606e-05, "loss": 5.293, "step": 1660 }, { "epoch": 0.044740024183796856, "grad_norm": 2.6512610912323, "learning_rate": 4.8413454991148035e-05, "loss": 5.4295, "step": 1665 }, { "epoch": 0.04487437861077523, "grad_norm": 2.7759170532226562, "learning_rate": 4.8406645785101464e-05, "loss": 5.3546, "step": 1670 }, { "epoch": 0.045008733037753594, "grad_norm": 2.256654739379883, "learning_rate": 4.8399836579054886e-05, "loss": 5.3061, "step": 1675 }, { "epoch": 0.04514308746473196, "grad_norm": 2.45420503616333, "learning_rate": 4.839302737300831e-05, "loss": 5.3713, "step": 1680 }, { "epoch": 0.04527744189171033, "grad_norm": 2.358295202255249, "learning_rate": 4.838621816696173e-05, "loss": 5.2431, "step": 1685 }, { "epoch": 0.0454117963186887, "grad_norm": 2.406562328338623, "learning_rate": 4.837940896091516e-05, "loss": 5.0867, "step": 1690 }, { "epoch": 0.04554615074566707, "grad_norm": 2.2944185733795166, "learning_rate": 4.837259975486859e-05, "loss": 5.2294, "step": 1695 }, { "epoch": 0.045680505172645436, "grad_norm": 2.407620906829834, "learning_rate": 4.836579054882201e-05, "loss": 5.1957, "step": 1700 }, { "epoch": 0.04581485959962381, "grad_norm": 2.550377130508423, "learning_rate": 4.835898134277543e-05, "loss": 5.2069, "step": 1705 }, { "epoch": 0.045949214026602174, "grad_norm": 2.2927417755126953, "learning_rate": 4.8352172136728856e-05, "loss": 5.2309, "step": 1710 }, { "epoch": 0.046083568453580546, "grad_norm": 2.30096697807312, "learning_rate": 4.8345362930682285e-05, "loss": 5.2355, "step": 1715 }, { "epoch": 0.04621792288055891, "grad_norm": 2.2296783924102783, "learning_rate": 4.833855372463571e-05, "loss": 5.3139, "step": 1720 }, { "epoch": 0.046352277307537285, "grad_norm": 2.24027681350708, "learning_rate": 4.8331744518589136e-05, "loss": 5.0176, "step": 1725 }, { "epoch": 0.04648663173451565, "grad_norm": 2.72025465965271, "learning_rate": 4.832493531254256e-05, "loss": 5.3124, "step": 1730 }, { "epoch": 0.04662098616149402, "grad_norm": 2.337449073791504, "learning_rate": 4.831812610649599e-05, "loss": 5.1725, "step": 1735 }, { "epoch": 0.04675534058847239, "grad_norm": 2.5324695110321045, "learning_rate": 4.831131690044941e-05, "loss": 5.1719, "step": 1740 }, { "epoch": 0.04688969501545076, "grad_norm": 2.5533106327056885, "learning_rate": 4.830450769440283e-05, "loss": 5.2212, "step": 1745 }, { "epoch": 0.047024049442429126, "grad_norm": 2.3930368423461914, "learning_rate": 4.829769848835626e-05, "loss": 5.2615, "step": 1750 }, { "epoch": 0.0471584038694075, "grad_norm": 2.4374313354492188, "learning_rate": 4.829088928230969e-05, "loss": 5.4474, "step": 1755 }, { "epoch": 0.047292758296385864, "grad_norm": 2.4467618465423584, "learning_rate": 4.828408007626311e-05, "loss": 5.1729, "step": 1760 }, { "epoch": 0.04742711272336424, "grad_norm": 2.3877153396606445, "learning_rate": 4.8277270870216534e-05, "loss": 5.2172, "step": 1765 }, { "epoch": 0.0475614671503426, "grad_norm": 2.432770252227783, "learning_rate": 4.827046166416996e-05, "loss": 5.1381, "step": 1770 }, { "epoch": 0.047695821577320975, "grad_norm": 2.4823038578033447, "learning_rate": 4.8263652458123386e-05, "loss": 5.2353, "step": 1775 }, { "epoch": 0.04783017600429934, "grad_norm": 2.2864232063293457, "learning_rate": 4.825684325207681e-05, "loss": 5.2289, "step": 1780 }, { "epoch": 0.04796453043127771, "grad_norm": 2.4113411903381348, "learning_rate": 4.825003404603024e-05, "loss": 5.1762, "step": 1785 }, { "epoch": 0.04809888485825608, "grad_norm": 2.7030029296875, "learning_rate": 4.824322483998366e-05, "loss": 5.295, "step": 1790 }, { "epoch": 0.04823323928523445, "grad_norm": 2.3226568698883057, "learning_rate": 4.823641563393709e-05, "loss": 5.2723, "step": 1795 }, { "epoch": 0.04836759371221282, "grad_norm": 2.478559970855713, "learning_rate": 4.822960642789051e-05, "loss": 5.278, "step": 1800 }, { "epoch": 0.04850194813919119, "grad_norm": 2.6240153312683105, "learning_rate": 4.822279722184393e-05, "loss": 5.2883, "step": 1805 }, { "epoch": 0.048636302566169555, "grad_norm": 2.363565444946289, "learning_rate": 4.821598801579736e-05, "loss": 5.4129, "step": 1810 }, { "epoch": 0.04877065699314793, "grad_norm": 2.653879165649414, "learning_rate": 4.820917880975079e-05, "loss": 5.3435, "step": 1815 }, { "epoch": 0.04890501142012629, "grad_norm": 2.356649875640869, "learning_rate": 4.820236960370421e-05, "loss": 5.166, "step": 1820 }, { "epoch": 0.04903936584710466, "grad_norm": 2.430574893951416, "learning_rate": 4.8195560397657636e-05, "loss": 5.3627, "step": 1825 }, { "epoch": 0.04917372027408303, "grad_norm": 2.2518692016601562, "learning_rate": 4.818875119161106e-05, "loss": 5.3862, "step": 1830 }, { "epoch": 0.0493080747010614, "grad_norm": 2.4937832355499268, "learning_rate": 4.818194198556449e-05, "loss": 5.2607, "step": 1835 }, { "epoch": 0.04944242912803977, "grad_norm": 2.4203927516937256, "learning_rate": 4.817513277951791e-05, "loss": 5.1208, "step": 1840 }, { "epoch": 0.049576783555018135, "grad_norm": 2.1159417629241943, "learning_rate": 4.816832357347134e-05, "loss": 5.3285, "step": 1845 }, { "epoch": 0.04971113798199651, "grad_norm": 2.1734254360198975, "learning_rate": 4.816151436742476e-05, "loss": 5.2623, "step": 1850 }, { "epoch": 0.04984549240897487, "grad_norm": 2.313054323196411, "learning_rate": 4.815470516137818e-05, "loss": 5.3001, "step": 1855 }, { "epoch": 0.049979846835953246, "grad_norm": 2.3189878463745117, "learning_rate": 4.814789595533161e-05, "loss": 5.3274, "step": 1860 }, { "epoch": 0.05011420126293161, "grad_norm": 2.7325174808502197, "learning_rate": 4.8141086749285034e-05, "loss": 5.1856, "step": 1865 }, { "epoch": 0.050248555689909984, "grad_norm": 2.5446767807006836, "learning_rate": 4.8134277543238456e-05, "loss": 5.3078, "step": 1870 }, { "epoch": 0.05038291011688835, "grad_norm": 2.719104766845703, "learning_rate": 4.8127468337191885e-05, "loss": 5.3787, "step": 1875 }, { "epoch": 0.05051726454386672, "grad_norm": 2.8305141925811768, "learning_rate": 4.8120659131145314e-05, "loss": 5.2352, "step": 1880 }, { "epoch": 0.05065161897084509, "grad_norm": 2.584324359893799, "learning_rate": 4.811384992509874e-05, "loss": 5.2162, "step": 1885 }, { "epoch": 0.05078597339782346, "grad_norm": 2.4216935634613037, "learning_rate": 4.810704071905216e-05, "loss": 5.3332, "step": 1890 }, { "epoch": 0.050920327824801825, "grad_norm": 2.4615275859832764, "learning_rate": 4.810023151300558e-05, "loss": 5.2437, "step": 1895 }, { "epoch": 0.0510546822517802, "grad_norm": 2.5613584518432617, "learning_rate": 4.809342230695901e-05, "loss": 5.2118, "step": 1900 }, { "epoch": 0.051189036678758564, "grad_norm": 2.37742018699646, "learning_rate": 4.808661310091244e-05, "loss": 5.0876, "step": 1905 }, { "epoch": 0.051323391105736936, "grad_norm": 2.275043249130249, "learning_rate": 4.807980389486586e-05, "loss": 5.1752, "step": 1910 }, { "epoch": 0.0514577455327153, "grad_norm": 2.3270816802978516, "learning_rate": 4.8072994688819284e-05, "loss": 5.2395, "step": 1915 }, { "epoch": 0.051592099959693674, "grad_norm": 2.2508606910705566, "learning_rate": 4.806618548277271e-05, "loss": 5.3465, "step": 1920 }, { "epoch": 0.05172645438667204, "grad_norm": 2.3401224613189697, "learning_rate": 4.8059376276726135e-05, "loss": 5.1628, "step": 1925 }, { "epoch": 0.05186080881365041, "grad_norm": 2.3724825382232666, "learning_rate": 4.805256707067956e-05, "loss": 5.2503, "step": 1930 }, { "epoch": 0.05199516324062878, "grad_norm": 2.3743178844451904, "learning_rate": 4.8045757864632986e-05, "loss": 5.2006, "step": 1935 }, { "epoch": 0.05212951766760715, "grad_norm": 2.4351425170898438, "learning_rate": 4.8038948658586416e-05, "loss": 5.2431, "step": 1940 }, { "epoch": 0.052263872094585516, "grad_norm": 2.3778090476989746, "learning_rate": 4.803213945253984e-05, "loss": 5.2104, "step": 1945 }, { "epoch": 0.05239822652156389, "grad_norm": 2.2631049156188965, "learning_rate": 4.802533024649326e-05, "loss": 5.1693, "step": 1950 }, { "epoch": 0.052532580948542254, "grad_norm": 2.2894062995910645, "learning_rate": 4.801852104044668e-05, "loss": 5.2593, "step": 1955 }, { "epoch": 0.05266693537552063, "grad_norm": 2.3397817611694336, "learning_rate": 4.801171183440011e-05, "loss": 5.1952, "step": 1960 }, { "epoch": 0.05280128980249899, "grad_norm": 2.0636684894561768, "learning_rate": 4.800490262835354e-05, "loss": 5.2578, "step": 1965 }, { "epoch": 0.052935644229477365, "grad_norm": 2.359027862548828, "learning_rate": 4.799809342230696e-05, "loss": 5.1703, "step": 1970 }, { "epoch": 0.05306999865645573, "grad_norm": 2.3653738498687744, "learning_rate": 4.7991284216260385e-05, "loss": 5.2127, "step": 1975 }, { "epoch": 0.053204353083434096, "grad_norm": 2.5569891929626465, "learning_rate": 4.7984475010213814e-05, "loss": 5.0935, "step": 1980 }, { "epoch": 0.05333870751041247, "grad_norm": 2.4698874950408936, "learning_rate": 4.7977665804167236e-05, "loss": 5.1307, "step": 1985 }, { "epoch": 0.053473061937390834, "grad_norm": 2.2079992294311523, "learning_rate": 4.797085659812066e-05, "loss": 5.1872, "step": 1990 }, { "epoch": 0.053607416364369206, "grad_norm": 2.270055055618286, "learning_rate": 4.796404739207409e-05, "loss": 5.2207, "step": 1995 }, { "epoch": 0.05374177079134757, "grad_norm": 2.425661563873291, "learning_rate": 4.795723818602751e-05, "loss": 5.2196, "step": 2000 }, { "epoch": 0.053876125218325945, "grad_norm": 2.0815727710723877, "learning_rate": 4.795042897998094e-05, "loss": 5.1668, "step": 2005 }, { "epoch": 0.05401047964530431, "grad_norm": 2.305485725402832, "learning_rate": 4.794361977393436e-05, "loss": 5.3338, "step": 2010 }, { "epoch": 0.05414483407228268, "grad_norm": 2.193415880203247, "learning_rate": 4.7936810567887783e-05, "loss": 5.215, "step": 2015 }, { "epoch": 0.05427918849926105, "grad_norm": 2.2659268379211426, "learning_rate": 4.793000136184121e-05, "loss": 5.1185, "step": 2020 }, { "epoch": 0.05441354292623942, "grad_norm": 2.536661386489868, "learning_rate": 4.792319215579464e-05, "loss": 5.1193, "step": 2025 }, { "epoch": 0.054547897353217786, "grad_norm": 2.5080933570861816, "learning_rate": 4.7916382949748064e-05, "loss": 5.1321, "step": 2030 }, { "epoch": 0.05468225178019616, "grad_norm": 2.3799896240234375, "learning_rate": 4.7909573743701486e-05, "loss": 5.1633, "step": 2035 }, { "epoch": 0.054816606207174524, "grad_norm": 2.2105393409729004, "learning_rate": 4.790276453765491e-05, "loss": 5.1659, "step": 2040 }, { "epoch": 0.0549509606341529, "grad_norm": 2.211395025253296, "learning_rate": 4.789595533160834e-05, "loss": 5.0961, "step": 2045 }, { "epoch": 0.05508531506113126, "grad_norm": 2.490356206893921, "learning_rate": 4.788914612556176e-05, "loss": 5.0887, "step": 2050 }, { "epoch": 0.055219669488109635, "grad_norm": 2.2620551586151123, "learning_rate": 4.788233691951519e-05, "loss": 5.2394, "step": 2055 }, { "epoch": 0.055354023915088, "grad_norm": 2.4422574043273926, "learning_rate": 4.787552771346861e-05, "loss": 5.0351, "step": 2060 }, { "epoch": 0.05548837834206637, "grad_norm": 2.757983922958374, "learning_rate": 4.786871850742204e-05, "loss": 5.2689, "step": 2065 }, { "epoch": 0.05562273276904474, "grad_norm": 2.3653347492218018, "learning_rate": 4.786190930137546e-05, "loss": 5.2375, "step": 2070 }, { "epoch": 0.05575708719602311, "grad_norm": 2.1607768535614014, "learning_rate": 4.7855100095328885e-05, "loss": 5.2352, "step": 2075 }, { "epoch": 0.05589144162300148, "grad_norm": 2.5361104011535645, "learning_rate": 4.784829088928231e-05, "loss": 5.1385, "step": 2080 }, { "epoch": 0.05602579604997985, "grad_norm": 2.4672648906707764, "learning_rate": 4.784148168323574e-05, "loss": 5.0615, "step": 2085 }, { "epoch": 0.056160150476958215, "grad_norm": 2.412135601043701, "learning_rate": 4.7834672477189165e-05, "loss": 5.1649, "step": 2090 }, { "epoch": 0.05629450490393659, "grad_norm": 2.3622710704803467, "learning_rate": 4.782786327114259e-05, "loss": 5.049, "step": 2095 }, { "epoch": 0.05642885933091495, "grad_norm": 2.3928167819976807, "learning_rate": 4.782105406509601e-05, "loss": 5.0205, "step": 2100 }, { "epoch": 0.056563213757893326, "grad_norm": 2.446167230606079, "learning_rate": 4.781424485904944e-05, "loss": 5.1282, "step": 2105 }, { "epoch": 0.05669756818487169, "grad_norm": 2.2952096462249756, "learning_rate": 4.780743565300286e-05, "loss": 5.0892, "step": 2110 }, { "epoch": 0.056831922611850064, "grad_norm": 2.3746469020843506, "learning_rate": 4.780062644695629e-05, "loss": 5.2202, "step": 2115 }, { "epoch": 0.05696627703882843, "grad_norm": 2.298170328140259, "learning_rate": 4.779381724090971e-05, "loss": 5.2387, "step": 2120 }, { "epoch": 0.0571006314658068, "grad_norm": 2.349518299102783, "learning_rate": 4.778700803486314e-05, "loss": 5.2094, "step": 2125 }, { "epoch": 0.05723498589278517, "grad_norm": 2.262782335281372, "learning_rate": 4.7780198828816563e-05, "loss": 5.0571, "step": 2130 }, { "epoch": 0.05736934031976353, "grad_norm": 2.4524972438812256, "learning_rate": 4.7773389622769986e-05, "loss": 5.1114, "step": 2135 }, { "epoch": 0.057503694746741905, "grad_norm": 2.3404948711395264, "learning_rate": 4.776658041672341e-05, "loss": 5.1455, "step": 2140 }, { "epoch": 0.05763804917372027, "grad_norm": 2.2864677906036377, "learning_rate": 4.775977121067684e-05, "loss": 5.1923, "step": 2145 }, { "epoch": 0.057772403600698644, "grad_norm": 2.7468621730804443, "learning_rate": 4.7752962004630266e-05, "loss": 5.1457, "step": 2150 }, { "epoch": 0.05790675802767701, "grad_norm": 2.581831455230713, "learning_rate": 4.774615279858369e-05, "loss": 5.1021, "step": 2155 }, { "epoch": 0.05804111245465538, "grad_norm": 2.2192766666412354, "learning_rate": 4.773934359253711e-05, "loss": 5.1822, "step": 2160 }, { "epoch": 0.05817546688163375, "grad_norm": 2.285222053527832, "learning_rate": 4.773253438649053e-05, "loss": 5.1948, "step": 2165 }, { "epoch": 0.05830982130861212, "grad_norm": 2.528031587600708, "learning_rate": 4.772572518044396e-05, "loss": 5.1731, "step": 2170 }, { "epoch": 0.058444175735590485, "grad_norm": 2.3036327362060547, "learning_rate": 4.771891597439739e-05, "loss": 5.2433, "step": 2175 }, { "epoch": 0.05857853016256886, "grad_norm": 2.3499045372009277, "learning_rate": 4.771210676835081e-05, "loss": 5.1839, "step": 2180 }, { "epoch": 0.05871288458954722, "grad_norm": 3.9805495738983154, "learning_rate": 4.7705297562304235e-05, "loss": 5.0259, "step": 2185 }, { "epoch": 0.058847239016525596, "grad_norm": 2.440889835357666, "learning_rate": 4.7698488356257665e-05, "loss": 5.1003, "step": 2190 }, { "epoch": 0.05898159344350396, "grad_norm": 2.622908115386963, "learning_rate": 4.769167915021109e-05, "loss": 5.1748, "step": 2195 }, { "epoch": 0.059115947870482334, "grad_norm": 2.197916030883789, "learning_rate": 4.768486994416451e-05, "loss": 5.2149, "step": 2200 }, { "epoch": 0.0592503022974607, "grad_norm": 2.259551763534546, "learning_rate": 4.767806073811794e-05, "loss": 5.0572, "step": 2205 }, { "epoch": 0.05938465672443907, "grad_norm": 2.4556822776794434, "learning_rate": 4.767125153207137e-05, "loss": 5.2114, "step": 2210 }, { "epoch": 0.05951901115141744, "grad_norm": 2.4652628898620605, "learning_rate": 4.766444232602479e-05, "loss": 5.1193, "step": 2215 }, { "epoch": 0.05965336557839581, "grad_norm": 2.231334924697876, "learning_rate": 4.765763311997821e-05, "loss": 5.0828, "step": 2220 }, { "epoch": 0.059787720005374176, "grad_norm": 2.242621660232544, "learning_rate": 4.7650823913931634e-05, "loss": 5.1689, "step": 2225 }, { "epoch": 0.05992207443235255, "grad_norm": 2.6371543407440186, "learning_rate": 4.764401470788506e-05, "loss": 5.1462, "step": 2230 }, { "epoch": 0.060056428859330914, "grad_norm": 2.414228677749634, "learning_rate": 4.763720550183849e-05, "loss": 5.1458, "step": 2235 }, { "epoch": 0.060190783286309286, "grad_norm": 2.395897626876831, "learning_rate": 4.7630396295791914e-05, "loss": 5.0641, "step": 2240 }, { "epoch": 0.06032513771328765, "grad_norm": 2.5467405319213867, "learning_rate": 4.7623587089745337e-05, "loss": 5.1559, "step": 2245 }, { "epoch": 0.060459492140266025, "grad_norm": 2.7299716472625732, "learning_rate": 4.7616777883698766e-05, "loss": 5.088, "step": 2250 }, { "epoch": 0.06059384656724439, "grad_norm": 2.2927069664001465, "learning_rate": 4.760996867765219e-05, "loss": 5.1461, "step": 2255 }, { "epoch": 0.06072820099422276, "grad_norm": 2.3553717136383057, "learning_rate": 4.760315947160561e-05, "loss": 5.0812, "step": 2260 }, { "epoch": 0.06086255542120113, "grad_norm": 2.4433770179748535, "learning_rate": 4.759635026555904e-05, "loss": 5.1092, "step": 2265 }, { "epoch": 0.0609969098481795, "grad_norm": 2.2795238494873047, "learning_rate": 4.758954105951247e-05, "loss": 4.9923, "step": 2270 }, { "epoch": 0.061131264275157866, "grad_norm": 2.5695433616638184, "learning_rate": 4.758273185346589e-05, "loss": 5.1081, "step": 2275 }, { "epoch": 0.06126561870213623, "grad_norm": 2.391657829284668, "learning_rate": 4.757592264741931e-05, "loss": 5.0966, "step": 2280 }, { "epoch": 0.061399973129114604, "grad_norm": 2.594651699066162, "learning_rate": 4.7569113441372735e-05, "loss": 4.9291, "step": 2285 }, { "epoch": 0.06153432755609297, "grad_norm": 2.371537446975708, "learning_rate": 4.756230423532616e-05, "loss": 5.1881, "step": 2290 }, { "epoch": 0.06166868198307134, "grad_norm": 2.6550984382629395, "learning_rate": 4.7555495029279586e-05, "loss": 5.0681, "step": 2295 }, { "epoch": 0.06180303641004971, "grad_norm": 2.2198615074157715, "learning_rate": 4.7548685823233015e-05, "loss": 5.2027, "step": 2300 }, { "epoch": 0.06193739083702808, "grad_norm": 2.3532638549804688, "learning_rate": 4.754187661718644e-05, "loss": 5.1195, "step": 2305 }, { "epoch": 0.062071745264006446, "grad_norm": 2.3932807445526123, "learning_rate": 4.753506741113986e-05, "loss": 5.1162, "step": 2310 }, { "epoch": 0.06220609969098482, "grad_norm": 2.3105628490448, "learning_rate": 4.752825820509329e-05, "loss": 5.1738, "step": 2315 }, { "epoch": 0.062340454117963184, "grad_norm": 2.4400136470794678, "learning_rate": 4.752144899904671e-05, "loss": 5.146, "step": 2320 }, { "epoch": 0.06247480854494156, "grad_norm": 2.5597667694091797, "learning_rate": 4.751463979300014e-05, "loss": 5.1395, "step": 2325 }, { "epoch": 0.06260916297191993, "grad_norm": 2.564850330352783, "learning_rate": 4.750783058695356e-05, "loss": 5.1092, "step": 2330 }, { "epoch": 0.06274351739889829, "grad_norm": 2.501206398010254, "learning_rate": 4.750102138090699e-05, "loss": 5.1121, "step": 2335 }, { "epoch": 0.06287787182587666, "grad_norm": 2.3284380435943604, "learning_rate": 4.7494212174860414e-05, "loss": 5.1128, "step": 2340 }, { "epoch": 0.06301222625285503, "grad_norm": 2.2180492877960205, "learning_rate": 4.7487402968813836e-05, "loss": 5.0992, "step": 2345 }, { "epoch": 0.0631465806798334, "grad_norm": 2.617906332015991, "learning_rate": 4.748059376276726e-05, "loss": 5.0616, "step": 2350 }, { "epoch": 0.06328093510681176, "grad_norm": 2.268599033355713, "learning_rate": 4.747378455672069e-05, "loss": 5.012, "step": 2355 }, { "epoch": 0.06341528953379014, "grad_norm": 2.400578260421753, "learning_rate": 4.7466975350674117e-05, "loss": 5.1167, "step": 2360 }, { "epoch": 0.06354964396076851, "grad_norm": 2.1784756183624268, "learning_rate": 4.746016614462754e-05, "loss": 5.2294, "step": 2365 }, { "epoch": 0.06368399838774688, "grad_norm": 2.2773759365081787, "learning_rate": 4.745335693858096e-05, "loss": 5.0885, "step": 2370 }, { "epoch": 0.06381835281472524, "grad_norm": 2.4615862369537354, "learning_rate": 4.744654773253439e-05, "loss": 5.077, "step": 2375 }, { "epoch": 0.06395270724170361, "grad_norm": 2.4551687240600586, "learning_rate": 4.743973852648781e-05, "loss": 5.0032, "step": 2380 }, { "epoch": 0.06408706166868199, "grad_norm": 2.506337881088257, "learning_rate": 4.743292932044124e-05, "loss": 5.1934, "step": 2385 }, { "epoch": 0.06422141609566036, "grad_norm": 2.1597728729248047, "learning_rate": 4.7426120114394664e-05, "loss": 5.2099, "step": 2390 }, { "epoch": 0.06435577052263872, "grad_norm": 2.6743435859680176, "learning_rate": 4.741931090834809e-05, "loss": 5.0705, "step": 2395 }, { "epoch": 0.06449012494961709, "grad_norm": 2.736037254333496, "learning_rate": 4.7412501702301515e-05, "loss": 5.1356, "step": 2400 }, { "epoch": 0.06462447937659546, "grad_norm": 2.264522075653076, "learning_rate": 4.740569249625494e-05, "loss": 4.9938, "step": 2405 }, { "epoch": 0.06475883380357383, "grad_norm": 2.451328754425049, "learning_rate": 4.739888329020836e-05, "loss": 5.0513, "step": 2410 }, { "epoch": 0.06489318823055219, "grad_norm": 2.328263998031616, "learning_rate": 4.739207408416179e-05, "loss": 4.9759, "step": 2415 }, { "epoch": 0.06502754265753057, "grad_norm": 2.426508903503418, "learning_rate": 4.738526487811522e-05, "loss": 5.0611, "step": 2420 }, { "epoch": 0.06516189708450894, "grad_norm": 2.6437110900878906, "learning_rate": 4.737845567206864e-05, "loss": 5.0155, "step": 2425 }, { "epoch": 0.06529625151148731, "grad_norm": 2.3646633625030518, "learning_rate": 4.737164646602206e-05, "loss": 4.9942, "step": 2430 }, { "epoch": 0.06543060593846567, "grad_norm": 2.51942777633667, "learning_rate": 4.7364837259975484e-05, "loss": 4.986, "step": 2435 }, { "epoch": 0.06556496036544404, "grad_norm": 2.5781991481781006, "learning_rate": 4.7358028053928914e-05, "loss": 4.9825, "step": 2440 }, { "epoch": 0.06569931479242241, "grad_norm": 2.2016520500183105, "learning_rate": 4.735121884788234e-05, "loss": 5.0746, "step": 2445 }, { "epoch": 0.06583366921940077, "grad_norm": 2.1721441745758057, "learning_rate": 4.7344409641835765e-05, "loss": 5.1294, "step": 2450 }, { "epoch": 0.06596802364637915, "grad_norm": 2.2609691619873047, "learning_rate": 4.733760043578919e-05, "loss": 4.9707, "step": 2455 }, { "epoch": 0.06610237807335752, "grad_norm": 2.3571248054504395, "learning_rate": 4.7330791229742616e-05, "loss": 4.9806, "step": 2460 }, { "epoch": 0.06623673250033589, "grad_norm": 2.3186182975769043, "learning_rate": 4.732398202369604e-05, "loss": 5.164, "step": 2465 }, { "epoch": 0.06637108692731425, "grad_norm": 2.4597530364990234, "learning_rate": 4.731717281764946e-05, "loss": 5.088, "step": 2470 }, { "epoch": 0.06650544135429262, "grad_norm": 2.6319687366485596, "learning_rate": 4.731036361160289e-05, "loss": 4.954, "step": 2475 }, { "epoch": 0.066639795781271, "grad_norm": 2.141265869140625, "learning_rate": 4.730355440555632e-05, "loss": 5.1503, "step": 2480 }, { "epoch": 0.06677415020824937, "grad_norm": 2.9543850421905518, "learning_rate": 4.729674519950974e-05, "loss": 5.1751, "step": 2485 }, { "epoch": 0.06690850463522773, "grad_norm": 2.47092342376709, "learning_rate": 4.728993599346316e-05, "loss": 5.0543, "step": 2490 }, { "epoch": 0.0670428590622061, "grad_norm": 2.329517126083374, "learning_rate": 4.7283126787416586e-05, "loss": 5.1407, "step": 2495 }, { "epoch": 0.06717721348918447, "grad_norm": 2.660526752471924, "learning_rate": 4.7276317581370015e-05, "loss": 4.9308, "step": 2500 }, { "epoch": 0.06731156791616284, "grad_norm": 2.4411728382110596, "learning_rate": 4.726950837532344e-05, "loss": 5.1104, "step": 2505 }, { "epoch": 0.0674459223431412, "grad_norm": 2.4146604537963867, "learning_rate": 4.7262699169276866e-05, "loss": 5.0502, "step": 2510 }, { "epoch": 0.06758027677011957, "grad_norm": 2.4033758640289307, "learning_rate": 4.725588996323029e-05, "loss": 4.9998, "step": 2515 }, { "epoch": 0.06771463119709795, "grad_norm": 2.470900058746338, "learning_rate": 4.724908075718372e-05, "loss": 5.0287, "step": 2520 }, { "epoch": 0.06784898562407632, "grad_norm": 2.6848537921905518, "learning_rate": 4.724227155113714e-05, "loss": 4.9814, "step": 2525 }, { "epoch": 0.06798334005105468, "grad_norm": 2.4615607261657715, "learning_rate": 4.723546234509056e-05, "loss": 5.1187, "step": 2530 }, { "epoch": 0.06811769447803305, "grad_norm": 2.095226764678955, "learning_rate": 4.722865313904399e-05, "loss": 4.9794, "step": 2535 }, { "epoch": 0.06825204890501142, "grad_norm": 2.9362363815307617, "learning_rate": 4.722184393299742e-05, "loss": 5.027, "step": 2540 }, { "epoch": 0.0683864033319898, "grad_norm": 2.225034713745117, "learning_rate": 4.721503472695084e-05, "loss": 4.9875, "step": 2545 }, { "epoch": 0.06852075775896815, "grad_norm": 2.2556567192077637, "learning_rate": 4.7208225520904264e-05, "loss": 5.1891, "step": 2550 }, { "epoch": 0.06865511218594653, "grad_norm": 2.3483803272247314, "learning_rate": 4.720141631485769e-05, "loss": 5.0629, "step": 2555 }, { "epoch": 0.0687894666129249, "grad_norm": 2.343886613845825, "learning_rate": 4.7194607108811116e-05, "loss": 5.081, "step": 2560 }, { "epoch": 0.06892382103990327, "grad_norm": 2.187832832336426, "learning_rate": 4.718779790276454e-05, "loss": 5.0394, "step": 2565 }, { "epoch": 0.06905817546688163, "grad_norm": 2.317049026489258, "learning_rate": 4.718098869671797e-05, "loss": 5.0632, "step": 2570 }, { "epoch": 0.06919252989386, "grad_norm": 2.4061245918273926, "learning_rate": 4.717417949067139e-05, "loss": 5.1499, "step": 2575 }, { "epoch": 0.06932688432083838, "grad_norm": 2.3993353843688965, "learning_rate": 4.716737028462482e-05, "loss": 5.0663, "step": 2580 }, { "epoch": 0.06946123874781673, "grad_norm": 2.314722776412964, "learning_rate": 4.716056107857824e-05, "loss": 5.1535, "step": 2585 }, { "epoch": 0.0695955931747951, "grad_norm": 2.522005319595337, "learning_rate": 4.715375187253166e-05, "loss": 5.0434, "step": 2590 }, { "epoch": 0.06972994760177348, "grad_norm": 2.3243606090545654, "learning_rate": 4.714694266648509e-05, "loss": 5.0773, "step": 2595 }, { "epoch": 0.06986430202875185, "grad_norm": 2.281684160232544, "learning_rate": 4.7140133460438514e-05, "loss": 5.1993, "step": 2600 }, { "epoch": 0.06999865645573021, "grad_norm": 2.241349220275879, "learning_rate": 4.713332425439194e-05, "loss": 5.1532, "step": 2605 }, { "epoch": 0.07013301088270858, "grad_norm": 2.460827112197876, "learning_rate": 4.7126515048345366e-05, "loss": 5.1667, "step": 2610 }, { "epoch": 0.07026736530968695, "grad_norm": 2.3938283920288086, "learning_rate": 4.711970584229879e-05, "loss": 5.0523, "step": 2615 }, { "epoch": 0.07040171973666533, "grad_norm": 2.3147292137145996, "learning_rate": 4.711289663625221e-05, "loss": 4.9852, "step": 2620 }, { "epoch": 0.07053607416364369, "grad_norm": 2.4808735847473145, "learning_rate": 4.710608743020564e-05, "loss": 5.1447, "step": 2625 }, { "epoch": 0.07067042859062206, "grad_norm": 2.366708517074585, "learning_rate": 4.709927822415907e-05, "loss": 4.9905, "step": 2630 }, { "epoch": 0.07080478301760043, "grad_norm": 2.293250322341919, "learning_rate": 4.709246901811249e-05, "loss": 5.1174, "step": 2635 }, { "epoch": 0.0709391374445788, "grad_norm": 2.2710607051849365, "learning_rate": 4.708565981206591e-05, "loss": 4.9623, "step": 2640 }, { "epoch": 0.07107349187155716, "grad_norm": 2.392634868621826, "learning_rate": 4.707885060601934e-05, "loss": 5.072, "step": 2645 }, { "epoch": 0.07120784629853553, "grad_norm": 2.289745807647705, "learning_rate": 4.7072041399972764e-05, "loss": 5.0861, "step": 2650 }, { "epoch": 0.07134220072551391, "grad_norm": 2.070662021636963, "learning_rate": 4.706523219392619e-05, "loss": 5.0058, "step": 2655 }, { "epoch": 0.07147655515249228, "grad_norm": 2.2136707305908203, "learning_rate": 4.7058422987879615e-05, "loss": 5.0072, "step": 2660 }, { "epoch": 0.07161090957947064, "grad_norm": 2.3967554569244385, "learning_rate": 4.7051613781833044e-05, "loss": 5.028, "step": 2665 }, { "epoch": 0.07174526400644901, "grad_norm": 3.260460615158081, "learning_rate": 4.704480457578647e-05, "loss": 5.1053, "step": 2670 }, { "epoch": 0.07187961843342738, "grad_norm": 2.705599784851074, "learning_rate": 4.703799536973989e-05, "loss": 4.956, "step": 2675 }, { "epoch": 0.07201397286040576, "grad_norm": 2.2675018310546875, "learning_rate": 4.703118616369331e-05, "loss": 5.0302, "step": 2680 }, { "epoch": 0.07214832728738411, "grad_norm": 2.222308397293091, "learning_rate": 4.702437695764674e-05, "loss": 5.0564, "step": 2685 }, { "epoch": 0.07228268171436249, "grad_norm": 2.364720344543457, "learning_rate": 4.701756775160017e-05, "loss": 5.0174, "step": 2690 }, { "epoch": 0.07241703614134086, "grad_norm": 2.5279181003570557, "learning_rate": 4.701075854555359e-05, "loss": 5.0167, "step": 2695 }, { "epoch": 0.07255139056831923, "grad_norm": 2.3830904960632324, "learning_rate": 4.7003949339507014e-05, "loss": 4.9756, "step": 2700 }, { "epoch": 0.07268574499529759, "grad_norm": 2.4851126670837402, "learning_rate": 4.699714013346044e-05, "loss": 5.0366, "step": 2705 }, { "epoch": 0.07282009942227596, "grad_norm": 2.32207989692688, "learning_rate": 4.6990330927413865e-05, "loss": 5.0195, "step": 2710 }, { "epoch": 0.07295445384925434, "grad_norm": 2.4050586223602295, "learning_rate": 4.698352172136729e-05, "loss": 5.1234, "step": 2715 }, { "epoch": 0.07308880827623271, "grad_norm": 2.2521891593933105, "learning_rate": 4.6976712515320716e-05, "loss": 5.0699, "step": 2720 }, { "epoch": 0.07322316270321107, "grad_norm": 2.549636125564575, "learning_rate": 4.6969903309274145e-05, "loss": 4.9025, "step": 2725 }, { "epoch": 0.07335751713018944, "grad_norm": 2.7519614696502686, "learning_rate": 4.696309410322757e-05, "loss": 4.9843, "step": 2730 }, { "epoch": 0.07349187155716781, "grad_norm": 2.649751901626587, "learning_rate": 4.695628489718099e-05, "loss": 5.0242, "step": 2735 }, { "epoch": 0.07362622598414617, "grad_norm": 2.270838499069214, "learning_rate": 4.694947569113441e-05, "loss": 4.9189, "step": 2740 }, { "epoch": 0.07376058041112454, "grad_norm": 2.2538695335388184, "learning_rate": 4.694266648508784e-05, "loss": 5.0254, "step": 2745 }, { "epoch": 0.07389493483810292, "grad_norm": 2.377070426940918, "learning_rate": 4.693585727904127e-05, "loss": 5.1025, "step": 2750 }, { "epoch": 0.07402928926508129, "grad_norm": 2.4636082649230957, "learning_rate": 4.692904807299469e-05, "loss": 5.0375, "step": 2755 }, { "epoch": 0.07416364369205965, "grad_norm": 2.2239975929260254, "learning_rate": 4.6922238866948115e-05, "loss": 4.8865, "step": 2760 }, { "epoch": 0.07429799811903802, "grad_norm": 2.287213087081909, "learning_rate": 4.691542966090154e-05, "loss": 5.1238, "step": 2765 }, { "epoch": 0.07443235254601639, "grad_norm": 2.3553996086120605, "learning_rate": 4.6908620454854966e-05, "loss": 4.9435, "step": 2770 }, { "epoch": 0.07456670697299476, "grad_norm": 2.5199694633483887, "learning_rate": 4.690181124880839e-05, "loss": 4.9821, "step": 2775 }, { "epoch": 0.07470106139997312, "grad_norm": 2.2390990257263184, "learning_rate": 4.689500204276182e-05, "loss": 5.0951, "step": 2780 }, { "epoch": 0.0748354158269515, "grad_norm": 2.681732177734375, "learning_rate": 4.688819283671524e-05, "loss": 5.0516, "step": 2785 }, { "epoch": 0.07496977025392987, "grad_norm": 2.4223461151123047, "learning_rate": 4.688138363066867e-05, "loss": 5.0633, "step": 2790 }, { "epoch": 0.07510412468090824, "grad_norm": 2.39823579788208, "learning_rate": 4.687457442462209e-05, "loss": 5.0584, "step": 2795 }, { "epoch": 0.0752384791078866, "grad_norm": 2.076220989227295, "learning_rate": 4.6867765218575513e-05, "loss": 4.9805, "step": 2800 }, { "epoch": 0.07537283353486497, "grad_norm": 2.4038548469543457, "learning_rate": 4.686095601252894e-05, "loss": 5.0362, "step": 2805 }, { "epoch": 0.07550718796184334, "grad_norm": 2.4277186393737793, "learning_rate": 4.685414680648237e-05, "loss": 5.029, "step": 2810 }, { "epoch": 0.07564154238882172, "grad_norm": 2.360374689102173, "learning_rate": 4.6847337600435794e-05, "loss": 4.9493, "step": 2815 }, { "epoch": 0.07577589681580008, "grad_norm": 2.3976619243621826, "learning_rate": 4.6840528394389216e-05, "loss": 4.8935, "step": 2820 }, { "epoch": 0.07591025124277845, "grad_norm": 2.2577006816864014, "learning_rate": 4.683371918834264e-05, "loss": 4.955, "step": 2825 }, { "epoch": 0.07604460566975682, "grad_norm": 2.4220681190490723, "learning_rate": 4.682690998229607e-05, "loss": 4.9848, "step": 2830 }, { "epoch": 0.0761789600967352, "grad_norm": 2.5361251831054688, "learning_rate": 4.682010077624949e-05, "loss": 4.8206, "step": 2835 }, { "epoch": 0.07631331452371355, "grad_norm": 2.34853196144104, "learning_rate": 4.681329157020292e-05, "loss": 4.9062, "step": 2840 }, { "epoch": 0.07644766895069192, "grad_norm": 2.45651912689209, "learning_rate": 4.680648236415634e-05, "loss": 4.8791, "step": 2845 }, { "epoch": 0.0765820233776703, "grad_norm": 2.2464826107025146, "learning_rate": 4.679967315810977e-05, "loss": 5.0624, "step": 2850 }, { "epoch": 0.07671637780464867, "grad_norm": 2.316802978515625, "learning_rate": 4.679286395206319e-05, "loss": 4.9207, "step": 2855 }, { "epoch": 0.07685073223162703, "grad_norm": 2.4910387992858887, "learning_rate": 4.6786054746016615e-05, "loss": 5.0292, "step": 2860 }, { "epoch": 0.0769850866586054, "grad_norm": 2.3384172916412354, "learning_rate": 4.6779245539970044e-05, "loss": 4.9664, "step": 2865 }, { "epoch": 0.07711944108558377, "grad_norm": 2.515455484390259, "learning_rate": 4.677243633392347e-05, "loss": 5.0691, "step": 2870 }, { "epoch": 0.07725379551256215, "grad_norm": 2.5678048133850098, "learning_rate": 4.6765627127876895e-05, "loss": 4.9324, "step": 2875 }, { "epoch": 0.0773881499395405, "grad_norm": 2.3092541694641113, "learning_rate": 4.675881792183032e-05, "loss": 5.0426, "step": 2880 }, { "epoch": 0.07752250436651888, "grad_norm": 2.2976062297821045, "learning_rate": 4.675200871578374e-05, "loss": 4.8977, "step": 2885 }, { "epoch": 0.07765685879349725, "grad_norm": 2.2484400272369385, "learning_rate": 4.674519950973716e-05, "loss": 5.1509, "step": 2890 }, { "epoch": 0.07779121322047561, "grad_norm": 2.293313980102539, "learning_rate": 4.673839030369059e-05, "loss": 5.057, "step": 2895 }, { "epoch": 0.07792556764745398, "grad_norm": 2.252776861190796, "learning_rate": 4.673158109764402e-05, "loss": 5.0953, "step": 2900 }, { "epoch": 0.07805992207443235, "grad_norm": 2.192119598388672, "learning_rate": 4.672477189159744e-05, "loss": 4.996, "step": 2905 }, { "epoch": 0.07819427650141073, "grad_norm": 2.5377445220947266, "learning_rate": 4.6717962685550864e-05, "loss": 4.8901, "step": 2910 }, { "epoch": 0.07832863092838908, "grad_norm": 2.3738508224487305, "learning_rate": 4.671115347950429e-05, "loss": 4.9915, "step": 2915 }, { "epoch": 0.07846298535536746, "grad_norm": 2.434407949447632, "learning_rate": 4.6704344273457716e-05, "loss": 4.9025, "step": 2920 }, { "epoch": 0.07859733978234583, "grad_norm": 2.2632462978363037, "learning_rate": 4.669753506741114e-05, "loss": 4.9604, "step": 2925 }, { "epoch": 0.0787316942093242, "grad_norm": 2.3816940784454346, "learning_rate": 4.669072586136457e-05, "loss": 4.9577, "step": 2930 }, { "epoch": 0.07886604863630256, "grad_norm": 2.3685219287872314, "learning_rate": 4.6683916655317996e-05, "loss": 4.9602, "step": 2935 }, { "epoch": 0.07900040306328093, "grad_norm": 2.2433366775512695, "learning_rate": 4.667710744927142e-05, "loss": 5.0234, "step": 2940 }, { "epoch": 0.0791347574902593, "grad_norm": 2.35847806930542, "learning_rate": 4.667029824322484e-05, "loss": 5.0336, "step": 2945 }, { "epoch": 0.07926911191723768, "grad_norm": 2.6191012859344482, "learning_rate": 4.666348903717826e-05, "loss": 4.8677, "step": 2950 }, { "epoch": 0.07940346634421604, "grad_norm": 2.3648300170898438, "learning_rate": 4.665667983113169e-05, "loss": 4.9328, "step": 2955 }, { "epoch": 0.07953782077119441, "grad_norm": 2.472687005996704, "learning_rate": 4.664987062508512e-05, "loss": 5.0775, "step": 2960 }, { "epoch": 0.07967217519817278, "grad_norm": 2.536130428314209, "learning_rate": 4.664306141903854e-05, "loss": 4.9376, "step": 2965 }, { "epoch": 0.07980652962515115, "grad_norm": 2.417445659637451, "learning_rate": 4.6636252212991965e-05, "loss": 5.0188, "step": 2970 }, { "epoch": 0.07994088405212951, "grad_norm": 2.951399087905884, "learning_rate": 4.6629443006945394e-05, "loss": 4.9488, "step": 2975 }, { "epoch": 0.08007523847910789, "grad_norm": 2.51804256439209, "learning_rate": 4.662263380089882e-05, "loss": 4.9984, "step": 2980 }, { "epoch": 0.08020959290608626, "grad_norm": 2.422224998474121, "learning_rate": 4.661582459485224e-05, "loss": 4.9569, "step": 2985 }, { "epoch": 0.08034394733306463, "grad_norm": 2.5573370456695557, "learning_rate": 4.660901538880567e-05, "loss": 4.9572, "step": 2990 }, { "epoch": 0.08047830176004299, "grad_norm": 2.5019991397857666, "learning_rate": 4.66022061827591e-05, "loss": 4.9161, "step": 2995 }, { "epoch": 0.08061265618702136, "grad_norm": 2.424339532852173, "learning_rate": 4.659539697671252e-05, "loss": 4.9821, "step": 3000 }, { "epoch": 0.08074701061399973, "grad_norm": 2.5886359214782715, "learning_rate": 4.658858777066594e-05, "loss": 4.9406, "step": 3005 }, { "epoch": 0.0808813650409781, "grad_norm": 2.5271315574645996, "learning_rate": 4.6581778564619364e-05, "loss": 4.9047, "step": 3010 }, { "epoch": 0.08101571946795647, "grad_norm": 2.215725898742676, "learning_rate": 4.657496935857279e-05, "loss": 4.9045, "step": 3015 }, { "epoch": 0.08115007389493484, "grad_norm": 2.2105448246002197, "learning_rate": 4.656816015252622e-05, "loss": 5.0101, "step": 3020 }, { "epoch": 0.08128442832191321, "grad_norm": 2.5475404262542725, "learning_rate": 4.6561350946479644e-05, "loss": 5.0141, "step": 3025 }, { "epoch": 0.08141878274889158, "grad_norm": 2.4291939735412598, "learning_rate": 4.6554541740433067e-05, "loss": 5.0046, "step": 3030 }, { "epoch": 0.08155313717586994, "grad_norm": 2.349987506866455, "learning_rate": 4.654773253438649e-05, "loss": 4.9757, "step": 3035 }, { "epoch": 0.08168749160284831, "grad_norm": 2.3299314975738525, "learning_rate": 4.654092332833992e-05, "loss": 5.002, "step": 3040 }, { "epoch": 0.08182184602982669, "grad_norm": 2.4213287830352783, "learning_rate": 4.653411412229334e-05, "loss": 5.0308, "step": 3045 }, { "epoch": 0.08195620045680505, "grad_norm": 2.607639789581299, "learning_rate": 4.652730491624677e-05, "loss": 4.8631, "step": 3050 }, { "epoch": 0.08209055488378342, "grad_norm": 2.5304009914398193, "learning_rate": 4.652049571020019e-05, "loss": 4.8938, "step": 3055 }, { "epoch": 0.08222490931076179, "grad_norm": 2.324573040008545, "learning_rate": 4.651368650415362e-05, "loss": 4.9556, "step": 3060 }, { "epoch": 0.08235926373774016, "grad_norm": 2.276792049407959, "learning_rate": 4.650687729810704e-05, "loss": 4.9162, "step": 3065 }, { "epoch": 0.08249361816471852, "grad_norm": 2.319880485534668, "learning_rate": 4.6500068092060465e-05, "loss": 4.9036, "step": 3070 }, { "epoch": 0.0826279725916969, "grad_norm": 2.460873603820801, "learning_rate": 4.6493258886013894e-05, "loss": 4.838, "step": 3075 }, { "epoch": 0.08276232701867527, "grad_norm": 2.4000260829925537, "learning_rate": 4.648644967996732e-05, "loss": 4.9028, "step": 3080 }, { "epoch": 0.08289668144565364, "grad_norm": 2.5372064113616943, "learning_rate": 4.6479640473920745e-05, "loss": 5.0519, "step": 3085 }, { "epoch": 0.083031035872632, "grad_norm": 2.3044536113739014, "learning_rate": 4.647283126787417e-05, "loss": 5.0919, "step": 3090 }, { "epoch": 0.08316539029961037, "grad_norm": 2.452742099761963, "learning_rate": 4.646602206182759e-05, "loss": 4.9606, "step": 3095 }, { "epoch": 0.08329974472658874, "grad_norm": 2.202953338623047, "learning_rate": 4.645921285578102e-05, "loss": 4.91, "step": 3100 }, { "epoch": 0.08343409915356712, "grad_norm": 2.39689564704895, "learning_rate": 4.645240364973444e-05, "loss": 4.8595, "step": 3105 }, { "epoch": 0.08356845358054547, "grad_norm": 2.6198947429656982, "learning_rate": 4.644559444368787e-05, "loss": 4.985, "step": 3110 }, { "epoch": 0.08370280800752385, "grad_norm": 2.3617098331451416, "learning_rate": 4.643878523764129e-05, "loss": 4.9485, "step": 3115 }, { "epoch": 0.08383716243450222, "grad_norm": 2.5182414054870605, "learning_rate": 4.643197603159472e-05, "loss": 4.904, "step": 3120 }, { "epoch": 0.08397151686148059, "grad_norm": 2.5316312313079834, "learning_rate": 4.6425166825548144e-05, "loss": 5.0195, "step": 3125 }, { "epoch": 0.08410587128845895, "grad_norm": 2.136096715927124, "learning_rate": 4.6418357619501566e-05, "loss": 5.0062, "step": 3130 }, { "epoch": 0.08424022571543732, "grad_norm": 2.3298838138580322, "learning_rate": 4.641154841345499e-05, "loss": 4.9261, "step": 3135 }, { "epoch": 0.0843745801424157, "grad_norm": 2.1770806312561035, "learning_rate": 4.6404739207408424e-05, "loss": 5.1226, "step": 3140 }, { "epoch": 0.08450893456939407, "grad_norm": 2.1904795169830322, "learning_rate": 4.6397930001361846e-05, "loss": 4.8955, "step": 3145 }, { "epoch": 0.08464328899637243, "grad_norm": 2.2624268531799316, "learning_rate": 4.639112079531527e-05, "loss": 5.0873, "step": 3150 }, { "epoch": 0.0847776434233508, "grad_norm": 2.3995871543884277, "learning_rate": 4.638431158926869e-05, "loss": 5.1072, "step": 3155 }, { "epoch": 0.08491199785032917, "grad_norm": 2.1977829933166504, "learning_rate": 4.637750238322212e-05, "loss": 5.0039, "step": 3160 }, { "epoch": 0.08504635227730754, "grad_norm": 2.339897871017456, "learning_rate": 4.637069317717554e-05, "loss": 5.0036, "step": 3165 }, { "epoch": 0.0851807067042859, "grad_norm": 2.371072769165039, "learning_rate": 4.636388397112897e-05, "loss": 4.8441, "step": 3170 }, { "epoch": 0.08531506113126427, "grad_norm": 2.293200969696045, "learning_rate": 4.6357074765082394e-05, "loss": 4.9125, "step": 3175 }, { "epoch": 0.08544941555824265, "grad_norm": 2.3724539279937744, "learning_rate": 4.6350265559035816e-05, "loss": 4.9601, "step": 3180 }, { "epoch": 0.08558376998522102, "grad_norm": 2.492265224456787, "learning_rate": 4.6343456352989245e-05, "loss": 4.9638, "step": 3185 }, { "epoch": 0.08571812441219938, "grad_norm": 2.412221670150757, "learning_rate": 4.633664714694267e-05, "loss": 4.9121, "step": 3190 }, { "epoch": 0.08585247883917775, "grad_norm": 2.345449447631836, "learning_rate": 4.632983794089609e-05, "loss": 4.826, "step": 3195 }, { "epoch": 0.08598683326615612, "grad_norm": 2.3856143951416016, "learning_rate": 4.632302873484952e-05, "loss": 4.9671, "step": 3200 }, { "epoch": 0.08612118769313448, "grad_norm": 2.2164435386657715, "learning_rate": 4.631621952880295e-05, "loss": 4.8246, "step": 3205 }, { "epoch": 0.08625554212011285, "grad_norm": 2.436462640762329, "learning_rate": 4.630941032275637e-05, "loss": 5.0598, "step": 3210 }, { "epoch": 0.08638989654709123, "grad_norm": 2.385547399520874, "learning_rate": 4.630260111670979e-05, "loss": 4.8489, "step": 3215 }, { "epoch": 0.0865242509740696, "grad_norm": 2.5401670932769775, "learning_rate": 4.6295791910663214e-05, "loss": 4.8629, "step": 3220 }, { "epoch": 0.08665860540104796, "grad_norm": 2.3023781776428223, "learning_rate": 4.6288982704616643e-05, "loss": 4.9337, "step": 3225 }, { "epoch": 0.08679295982802633, "grad_norm": 2.53151798248291, "learning_rate": 4.628217349857007e-05, "loss": 4.9401, "step": 3230 }, { "epoch": 0.0869273142550047, "grad_norm": 2.152036190032959, "learning_rate": 4.6275364292523495e-05, "loss": 4.9722, "step": 3235 }, { "epoch": 0.08706166868198308, "grad_norm": 2.1881864070892334, "learning_rate": 4.626855508647692e-05, "loss": 4.9625, "step": 3240 }, { "epoch": 0.08719602310896143, "grad_norm": 2.2732608318328857, "learning_rate": 4.6261745880430346e-05, "loss": 4.8329, "step": 3245 }, { "epoch": 0.08733037753593981, "grad_norm": 2.151388168334961, "learning_rate": 4.625493667438377e-05, "loss": 5.0471, "step": 3250 }, { "epoch": 0.08746473196291818, "grad_norm": 2.393622636795044, "learning_rate": 4.624812746833719e-05, "loss": 4.9852, "step": 3255 }, { "epoch": 0.08759908638989655, "grad_norm": 2.534522533416748, "learning_rate": 4.624131826229062e-05, "loss": 4.8568, "step": 3260 }, { "epoch": 0.08773344081687491, "grad_norm": 2.4262566566467285, "learning_rate": 4.623450905624405e-05, "loss": 5.0406, "step": 3265 }, { "epoch": 0.08786779524385328, "grad_norm": 2.323293924331665, "learning_rate": 4.622769985019747e-05, "loss": 5.0479, "step": 3270 }, { "epoch": 0.08800214967083166, "grad_norm": 2.4120004177093506, "learning_rate": 4.622089064415089e-05, "loss": 5.0464, "step": 3275 }, { "epoch": 0.08813650409781003, "grad_norm": 2.489932060241699, "learning_rate": 4.6214081438104316e-05, "loss": 4.9575, "step": 3280 }, { "epoch": 0.08827085852478839, "grad_norm": 2.620137929916382, "learning_rate": 4.6207272232057745e-05, "loss": 4.9206, "step": 3285 }, { "epoch": 0.08840521295176676, "grad_norm": 2.4803669452667236, "learning_rate": 4.6200463026011174e-05, "loss": 5.0104, "step": 3290 }, { "epoch": 0.08853956737874513, "grad_norm": 2.33331298828125, "learning_rate": 4.6193653819964596e-05, "loss": 4.9044, "step": 3295 }, { "epoch": 0.0886739218057235, "grad_norm": 2.533698558807373, "learning_rate": 4.618684461391802e-05, "loss": 5.0146, "step": 3300 }, { "epoch": 0.08880827623270186, "grad_norm": 2.2677273750305176, "learning_rate": 4.618003540787145e-05, "loss": 5.0116, "step": 3305 }, { "epoch": 0.08894263065968024, "grad_norm": 2.6705358028411865, "learning_rate": 4.617322620182487e-05, "loss": 4.9719, "step": 3310 }, { "epoch": 0.08907698508665861, "grad_norm": 2.181166648864746, "learning_rate": 4.616641699577829e-05, "loss": 5.0249, "step": 3315 }, { "epoch": 0.08921133951363698, "grad_norm": 2.366442918777466, "learning_rate": 4.615960778973172e-05, "loss": 4.9317, "step": 3320 }, { "epoch": 0.08934569394061534, "grad_norm": 2.1919569969177246, "learning_rate": 4.615279858368514e-05, "loss": 4.9039, "step": 3325 }, { "epoch": 0.08948004836759371, "grad_norm": 2.3448522090911865, "learning_rate": 4.614598937763857e-05, "loss": 4.8646, "step": 3330 }, { "epoch": 0.08961440279457208, "grad_norm": 2.3348026275634766, "learning_rate": 4.6139180171591994e-05, "loss": 4.9396, "step": 3335 }, { "epoch": 0.08974875722155046, "grad_norm": 2.298412799835205, "learning_rate": 4.613237096554542e-05, "loss": 4.9486, "step": 3340 }, { "epoch": 0.08988311164852882, "grad_norm": 2.1365394592285156, "learning_rate": 4.612556175949884e-05, "loss": 4.9046, "step": 3345 }, { "epoch": 0.09001746607550719, "grad_norm": 2.5887131690979004, "learning_rate": 4.611875255345227e-05, "loss": 4.9578, "step": 3350 }, { "epoch": 0.09015182050248556, "grad_norm": 2.4161834716796875, "learning_rate": 4.61119433474057e-05, "loss": 4.9373, "step": 3355 }, { "epoch": 0.09028617492946392, "grad_norm": 2.466649293899536, "learning_rate": 4.610513414135912e-05, "loss": 4.8397, "step": 3360 }, { "epoch": 0.09042052935644229, "grad_norm": 2.365720748901367, "learning_rate": 4.609832493531254e-05, "loss": 4.8461, "step": 3365 }, { "epoch": 0.09055488378342066, "grad_norm": 2.2871487140655518, "learning_rate": 4.609151572926597e-05, "loss": 4.971, "step": 3370 }, { "epoch": 0.09068923821039904, "grad_norm": 2.8257601261138916, "learning_rate": 4.608470652321939e-05, "loss": 4.9835, "step": 3375 }, { "epoch": 0.0908235926373774, "grad_norm": 2.2513794898986816, "learning_rate": 4.607789731717282e-05, "loss": 5.0145, "step": 3380 }, { "epoch": 0.09095794706435577, "grad_norm": 2.658571481704712, "learning_rate": 4.6071088111126244e-05, "loss": 4.9941, "step": 3385 }, { "epoch": 0.09109230149133414, "grad_norm": 2.5282750129699707, "learning_rate": 4.606427890507967e-05, "loss": 4.9412, "step": 3390 }, { "epoch": 0.09122665591831251, "grad_norm": 2.568573236465454, "learning_rate": 4.6057469699033095e-05, "loss": 4.949, "step": 3395 }, { "epoch": 0.09136101034529087, "grad_norm": 2.4815893173217773, "learning_rate": 4.605066049298652e-05, "loss": 5.0554, "step": 3400 }, { "epoch": 0.09149536477226924, "grad_norm": 2.4829440116882324, "learning_rate": 4.604385128693994e-05, "loss": 4.8589, "step": 3405 }, { "epoch": 0.09162971919924762, "grad_norm": 2.2588183879852295, "learning_rate": 4.603704208089337e-05, "loss": 4.9571, "step": 3410 }, { "epoch": 0.09176407362622599, "grad_norm": 2.1706416606903076, "learning_rate": 4.60302328748468e-05, "loss": 4.8513, "step": 3415 }, { "epoch": 0.09189842805320435, "grad_norm": 2.259800910949707, "learning_rate": 4.602342366880022e-05, "loss": 4.9065, "step": 3420 }, { "epoch": 0.09203278248018272, "grad_norm": 2.5378379821777344, "learning_rate": 4.601661446275364e-05, "loss": 5.0089, "step": 3425 }, { "epoch": 0.09216713690716109, "grad_norm": 2.327890634536743, "learning_rate": 4.600980525670707e-05, "loss": 4.9966, "step": 3430 }, { "epoch": 0.09230149133413947, "grad_norm": 2.3996288776397705, "learning_rate": 4.6002996050660494e-05, "loss": 4.9848, "step": 3435 }, { "epoch": 0.09243584576111782, "grad_norm": 2.450749158859253, "learning_rate": 4.599618684461392e-05, "loss": 4.8816, "step": 3440 }, { "epoch": 0.0925702001880962, "grad_norm": 2.3224682807922363, "learning_rate": 4.5989377638567345e-05, "loss": 5.0016, "step": 3445 }, { "epoch": 0.09270455461507457, "grad_norm": 2.397367238998413, "learning_rate": 4.5982568432520774e-05, "loss": 4.8428, "step": 3450 }, { "epoch": 0.09283890904205294, "grad_norm": 2.6210317611694336, "learning_rate": 4.5975759226474197e-05, "loss": 4.9057, "step": 3455 }, { "epoch": 0.0929732634690313, "grad_norm": 2.44633150100708, "learning_rate": 4.596895002042762e-05, "loss": 4.8284, "step": 3460 }, { "epoch": 0.09310761789600967, "grad_norm": 2.477090358734131, "learning_rate": 4.596214081438104e-05, "loss": 4.8567, "step": 3465 }, { "epoch": 0.09324197232298805, "grad_norm": 2.488525390625, "learning_rate": 4.595533160833447e-05, "loss": 4.9228, "step": 3470 }, { "epoch": 0.09337632674996642, "grad_norm": 2.200896978378296, "learning_rate": 4.59485224022879e-05, "loss": 4.9107, "step": 3475 }, { "epoch": 0.09351068117694478, "grad_norm": 2.488555669784546, "learning_rate": 4.594171319624132e-05, "loss": 4.9039, "step": 3480 }, { "epoch": 0.09364503560392315, "grad_norm": 2.311699390411377, "learning_rate": 4.5934903990194744e-05, "loss": 4.8956, "step": 3485 }, { "epoch": 0.09377939003090152, "grad_norm": 2.2264766693115234, "learning_rate": 4.5928094784148166e-05, "loss": 4.9072, "step": 3490 }, { "epoch": 0.09391374445787988, "grad_norm": 2.348525047302246, "learning_rate": 4.5921285578101595e-05, "loss": 5.0134, "step": 3495 }, { "epoch": 0.09404809888485825, "grad_norm": 2.2881531715393066, "learning_rate": 4.5914476372055024e-05, "loss": 4.9881, "step": 3500 }, { "epoch": 0.09418245331183663, "grad_norm": 2.463690757751465, "learning_rate": 4.5907667166008446e-05, "loss": 4.9656, "step": 3505 }, { "epoch": 0.094316807738815, "grad_norm": 2.177170991897583, "learning_rate": 4.590085795996187e-05, "loss": 4.8708, "step": 3510 }, { "epoch": 0.09445116216579336, "grad_norm": 2.52181339263916, "learning_rate": 4.58940487539153e-05, "loss": 4.7624, "step": 3515 }, { "epoch": 0.09458551659277173, "grad_norm": 2.4385335445404053, "learning_rate": 4.588723954786872e-05, "loss": 4.8535, "step": 3520 }, { "epoch": 0.0947198710197501, "grad_norm": 2.4057161808013916, "learning_rate": 4.588043034182214e-05, "loss": 4.9165, "step": 3525 }, { "epoch": 0.09485422544672847, "grad_norm": 2.464326858520508, "learning_rate": 4.587362113577557e-05, "loss": 4.7491, "step": 3530 }, { "epoch": 0.09498857987370683, "grad_norm": 2.37044620513916, "learning_rate": 4.5866811929729e-05, "loss": 4.9403, "step": 3535 }, { "epoch": 0.0951229343006852, "grad_norm": 2.5464742183685303, "learning_rate": 4.586000272368242e-05, "loss": 4.8521, "step": 3540 }, { "epoch": 0.09525728872766358, "grad_norm": 2.3816168308258057, "learning_rate": 4.5853193517635845e-05, "loss": 5.0172, "step": 3545 }, { "epoch": 0.09539164315464195, "grad_norm": 2.3183231353759766, "learning_rate": 4.584638431158927e-05, "loss": 4.8062, "step": 3550 }, { "epoch": 0.09552599758162031, "grad_norm": 2.490726947784424, "learning_rate": 4.5839575105542696e-05, "loss": 5.0705, "step": 3555 }, { "epoch": 0.09566035200859868, "grad_norm": 2.7080209255218506, "learning_rate": 4.583276589949612e-05, "loss": 5.0083, "step": 3560 }, { "epoch": 0.09579470643557705, "grad_norm": 2.382826328277588, "learning_rate": 4.582595669344955e-05, "loss": 4.7448, "step": 3565 }, { "epoch": 0.09592906086255543, "grad_norm": 2.6137123107910156, "learning_rate": 4.581914748740297e-05, "loss": 4.8083, "step": 3570 }, { "epoch": 0.09606341528953379, "grad_norm": 2.500060796737671, "learning_rate": 4.58123382813564e-05, "loss": 4.9944, "step": 3575 }, { "epoch": 0.09619776971651216, "grad_norm": 2.218621015548706, "learning_rate": 4.580552907530982e-05, "loss": 4.9006, "step": 3580 }, { "epoch": 0.09633212414349053, "grad_norm": 2.533972978591919, "learning_rate": 4.579871986926324e-05, "loss": 4.808, "step": 3585 }, { "epoch": 0.0964664785704689, "grad_norm": 2.1555912494659424, "learning_rate": 4.579191066321667e-05, "loss": 4.8323, "step": 3590 }, { "epoch": 0.09660083299744726, "grad_norm": 2.311100721359253, "learning_rate": 4.57851014571701e-05, "loss": 4.8312, "step": 3595 }, { "epoch": 0.09673518742442563, "grad_norm": 2.2688076496124268, "learning_rate": 4.5778292251123524e-05, "loss": 4.8179, "step": 3600 }, { "epoch": 0.096869541851404, "grad_norm": 2.2784695625305176, "learning_rate": 4.5771483045076946e-05, "loss": 4.9883, "step": 3605 }, { "epoch": 0.09700389627838238, "grad_norm": 2.4393088817596436, "learning_rate": 4.576467383903037e-05, "loss": 4.8732, "step": 3610 }, { "epoch": 0.09713825070536074, "grad_norm": 2.2697343826293945, "learning_rate": 4.57578646329838e-05, "loss": 4.9243, "step": 3615 }, { "epoch": 0.09727260513233911, "grad_norm": 2.426361322402954, "learning_rate": 4.575105542693722e-05, "loss": 4.8421, "step": 3620 }, { "epoch": 0.09740695955931748, "grad_norm": 2.343950033187866, "learning_rate": 4.574424622089065e-05, "loss": 4.8981, "step": 3625 }, { "epoch": 0.09754131398629586, "grad_norm": 2.503652811050415, "learning_rate": 4.573743701484407e-05, "loss": 4.9175, "step": 3630 }, { "epoch": 0.09767566841327421, "grad_norm": 2.2795350551605225, "learning_rate": 4.573062780879749e-05, "loss": 4.8883, "step": 3635 }, { "epoch": 0.09781002284025259, "grad_norm": 2.2998039722442627, "learning_rate": 4.572381860275092e-05, "loss": 4.918, "step": 3640 }, { "epoch": 0.09794437726723096, "grad_norm": 2.231745481491089, "learning_rate": 4.5717009396704344e-05, "loss": 4.9396, "step": 3645 }, { "epoch": 0.09807873169420932, "grad_norm": 2.2835354804992676, "learning_rate": 4.5710200190657774e-05, "loss": 5.0009, "step": 3650 }, { "epoch": 0.09821308612118769, "grad_norm": 2.394118309020996, "learning_rate": 4.5703390984611196e-05, "loss": 4.9711, "step": 3655 }, { "epoch": 0.09834744054816606, "grad_norm": 2.459158182144165, "learning_rate": 4.5696581778564625e-05, "loss": 4.9808, "step": 3660 }, { "epoch": 0.09848179497514443, "grad_norm": 2.3846614360809326, "learning_rate": 4.568977257251805e-05, "loss": 4.8306, "step": 3665 }, { "epoch": 0.0986161494021228, "grad_norm": 2.3341453075408936, "learning_rate": 4.568296336647147e-05, "loss": 4.9436, "step": 3670 }, { "epoch": 0.09875050382910117, "grad_norm": 2.4033708572387695, "learning_rate": 4.567615416042489e-05, "loss": 4.9908, "step": 3675 }, { "epoch": 0.09888485825607954, "grad_norm": 2.154600143432617, "learning_rate": 4.566934495437832e-05, "loss": 4.6785, "step": 3680 }, { "epoch": 0.09901921268305791, "grad_norm": 2.5777153968811035, "learning_rate": 4.566253574833175e-05, "loss": 4.8747, "step": 3685 }, { "epoch": 0.09915356711003627, "grad_norm": 2.3845765590667725, "learning_rate": 4.565572654228517e-05, "loss": 4.9459, "step": 3690 }, { "epoch": 0.09928792153701464, "grad_norm": 2.464461326599121, "learning_rate": 4.5648917336238594e-05, "loss": 4.7985, "step": 3695 }, { "epoch": 0.09942227596399301, "grad_norm": 2.0905392169952393, "learning_rate": 4.564210813019202e-05, "loss": 4.9871, "step": 3700 }, { "epoch": 0.09955663039097139, "grad_norm": 2.4455697536468506, "learning_rate": 4.5635298924145446e-05, "loss": 4.9849, "step": 3705 }, { "epoch": 0.09969098481794975, "grad_norm": 2.5106849670410156, "learning_rate": 4.5628489718098875e-05, "loss": 4.8517, "step": 3710 }, { "epoch": 0.09982533924492812, "grad_norm": 2.6265969276428223, "learning_rate": 4.56216805120523e-05, "loss": 4.81, "step": 3715 }, { "epoch": 0.09995969367190649, "grad_norm": 2.0911262035369873, "learning_rate": 4.5614871306005726e-05, "loss": 4.8222, "step": 3720 }, { "epoch": 0.10009404809888486, "grad_norm": 2.6245334148406982, "learning_rate": 4.560806209995915e-05, "loss": 4.8013, "step": 3725 }, { "epoch": 0.10022840252586322, "grad_norm": 2.5066633224487305, "learning_rate": 4.560125289391257e-05, "loss": 4.8526, "step": 3730 }, { "epoch": 0.1003627569528416, "grad_norm": 2.2056989669799805, "learning_rate": 4.559444368786599e-05, "loss": 4.9282, "step": 3735 }, { "epoch": 0.10049711137981997, "grad_norm": 2.328695297241211, "learning_rate": 4.558763448181942e-05, "loss": 4.8001, "step": 3740 }, { "epoch": 0.10063146580679834, "grad_norm": 2.262921094894409, "learning_rate": 4.558082527577285e-05, "loss": 4.7917, "step": 3745 }, { "epoch": 0.1007658202337767, "grad_norm": 2.4201860427856445, "learning_rate": 4.557401606972627e-05, "loss": 4.7839, "step": 3750 }, { "epoch": 0.10090017466075507, "grad_norm": 2.142925977706909, "learning_rate": 4.5567206863679695e-05, "loss": 4.7718, "step": 3755 }, { "epoch": 0.10103452908773344, "grad_norm": 2.5661582946777344, "learning_rate": 4.5560397657633124e-05, "loss": 4.8784, "step": 3760 }, { "epoch": 0.10116888351471182, "grad_norm": 2.518620491027832, "learning_rate": 4.555358845158655e-05, "loss": 5.0041, "step": 3765 }, { "epoch": 0.10130323794169017, "grad_norm": 2.145602226257324, "learning_rate": 4.554677924553997e-05, "loss": 4.8942, "step": 3770 }, { "epoch": 0.10143759236866855, "grad_norm": 2.5080015659332275, "learning_rate": 4.55399700394934e-05, "loss": 4.9117, "step": 3775 }, { "epoch": 0.10157194679564692, "grad_norm": 2.5845413208007812, "learning_rate": 4.553316083344682e-05, "loss": 4.8026, "step": 3780 }, { "epoch": 0.10170630122262529, "grad_norm": 2.4090616703033447, "learning_rate": 4.552635162740025e-05, "loss": 4.8893, "step": 3785 }, { "epoch": 0.10184065564960365, "grad_norm": 2.2573182582855225, "learning_rate": 4.551954242135367e-05, "loss": 4.9695, "step": 3790 }, { "epoch": 0.10197501007658202, "grad_norm": 2.3865768909454346, "learning_rate": 4.5512733215307094e-05, "loss": 4.868, "step": 3795 }, { "epoch": 0.1021093645035604, "grad_norm": 2.2999753952026367, "learning_rate": 4.550592400926052e-05, "loss": 4.8374, "step": 3800 }, { "epoch": 0.10224371893053875, "grad_norm": 2.3399412631988525, "learning_rate": 4.549911480321395e-05, "loss": 4.8755, "step": 3805 }, { "epoch": 0.10237807335751713, "grad_norm": 2.1934211254119873, "learning_rate": 4.5492305597167374e-05, "loss": 4.8114, "step": 3810 }, { "epoch": 0.1025124277844955, "grad_norm": 2.4351773262023926, "learning_rate": 4.5485496391120796e-05, "loss": 4.8878, "step": 3815 }, { "epoch": 0.10264678221147387, "grad_norm": 2.1701009273529053, "learning_rate": 4.547868718507422e-05, "loss": 4.8687, "step": 3820 }, { "epoch": 0.10278113663845223, "grad_norm": 2.4852359294891357, "learning_rate": 4.547187797902765e-05, "loss": 4.9959, "step": 3825 }, { "epoch": 0.1029154910654306, "grad_norm": 2.4492363929748535, "learning_rate": 4.546506877298107e-05, "loss": 4.9215, "step": 3830 }, { "epoch": 0.10304984549240898, "grad_norm": 2.2819671630859375, "learning_rate": 4.54582595669345e-05, "loss": 4.9463, "step": 3835 }, { "epoch": 0.10318419991938735, "grad_norm": 2.2214770317077637, "learning_rate": 4.545145036088792e-05, "loss": 4.8956, "step": 3840 }, { "epoch": 0.1033185543463657, "grad_norm": 2.302542209625244, "learning_rate": 4.544464115484135e-05, "loss": 4.7953, "step": 3845 }, { "epoch": 0.10345290877334408, "grad_norm": 2.407921075820923, "learning_rate": 4.543783194879477e-05, "loss": 4.806, "step": 3850 }, { "epoch": 0.10358726320032245, "grad_norm": 2.5172712802886963, "learning_rate": 4.5431022742748195e-05, "loss": 4.8893, "step": 3855 }, { "epoch": 0.10372161762730082, "grad_norm": 2.548431158065796, "learning_rate": 4.5424213536701624e-05, "loss": 4.6952, "step": 3860 }, { "epoch": 0.10385597205427918, "grad_norm": 2.088240146636963, "learning_rate": 4.541740433065505e-05, "loss": 4.7694, "step": 3865 }, { "epoch": 0.10399032648125756, "grad_norm": 2.1839518547058105, "learning_rate": 4.5410595124608475e-05, "loss": 4.8916, "step": 3870 }, { "epoch": 0.10412468090823593, "grad_norm": 2.1736936569213867, "learning_rate": 4.54037859185619e-05, "loss": 4.73, "step": 3875 }, { "epoch": 0.1042590353352143, "grad_norm": 2.3348512649536133, "learning_rate": 4.539697671251532e-05, "loss": 4.8301, "step": 3880 }, { "epoch": 0.10439338976219266, "grad_norm": 2.2691328525543213, "learning_rate": 4.539016750646875e-05, "loss": 4.8908, "step": 3885 }, { "epoch": 0.10452774418917103, "grad_norm": 2.3222343921661377, "learning_rate": 4.538335830042217e-05, "loss": 4.7588, "step": 3890 }, { "epoch": 0.1046620986161494, "grad_norm": 2.2783796787261963, "learning_rate": 4.53765490943756e-05, "loss": 4.8291, "step": 3895 }, { "epoch": 0.10479645304312778, "grad_norm": 2.2894301414489746, "learning_rate": 4.536973988832902e-05, "loss": 4.9146, "step": 3900 }, { "epoch": 0.10493080747010614, "grad_norm": 2.2368619441986084, "learning_rate": 4.536293068228245e-05, "loss": 4.7488, "step": 3905 }, { "epoch": 0.10506516189708451, "grad_norm": 2.4413812160491943, "learning_rate": 4.5356121476235874e-05, "loss": 4.9104, "step": 3910 }, { "epoch": 0.10519951632406288, "grad_norm": 2.445951223373413, "learning_rate": 4.5349312270189296e-05, "loss": 4.7909, "step": 3915 }, { "epoch": 0.10533387075104125, "grad_norm": 2.5580689907073975, "learning_rate": 4.5342503064142725e-05, "loss": 4.9032, "step": 3920 }, { "epoch": 0.10546822517801961, "grad_norm": 2.387916326522827, "learning_rate": 4.533569385809615e-05, "loss": 4.992, "step": 3925 }, { "epoch": 0.10560257960499798, "grad_norm": 2.3785805702209473, "learning_rate": 4.5328884652049576e-05, "loss": 4.8345, "step": 3930 }, { "epoch": 0.10573693403197636, "grad_norm": 2.3815479278564453, "learning_rate": 4.5322075446003e-05, "loss": 4.8454, "step": 3935 }, { "epoch": 0.10587128845895473, "grad_norm": 2.535463333129883, "learning_rate": 4.531526623995642e-05, "loss": 4.9197, "step": 3940 }, { "epoch": 0.10600564288593309, "grad_norm": 2.25718092918396, "learning_rate": 4.530845703390984e-05, "loss": 4.7668, "step": 3945 }, { "epoch": 0.10613999731291146, "grad_norm": 2.453803777694702, "learning_rate": 4.530164782786327e-05, "loss": 4.9305, "step": 3950 }, { "epoch": 0.10627435173988983, "grad_norm": 2.783095598220825, "learning_rate": 4.52948386218167e-05, "loss": 4.8045, "step": 3955 }, { "epoch": 0.10640870616686819, "grad_norm": 2.3370118141174316, "learning_rate": 4.5288029415770124e-05, "loss": 4.7908, "step": 3960 }, { "epoch": 0.10654306059384656, "grad_norm": 2.4311699867248535, "learning_rate": 4.5281220209723546e-05, "loss": 5.0291, "step": 3965 }, { "epoch": 0.10667741502082494, "grad_norm": 2.5788986682891846, "learning_rate": 4.5274411003676975e-05, "loss": 4.8295, "step": 3970 }, { "epoch": 0.10681176944780331, "grad_norm": 2.515810251235962, "learning_rate": 4.52676017976304e-05, "loss": 4.9363, "step": 3975 }, { "epoch": 0.10694612387478167, "grad_norm": 2.2663097381591797, "learning_rate": 4.526079259158382e-05, "loss": 4.7726, "step": 3980 }, { "epoch": 0.10708047830176004, "grad_norm": 2.4607527256011963, "learning_rate": 4.525398338553725e-05, "loss": 4.7594, "step": 3985 }, { "epoch": 0.10721483272873841, "grad_norm": 2.100602865219116, "learning_rate": 4.524717417949068e-05, "loss": 4.8253, "step": 3990 }, { "epoch": 0.10734918715571679, "grad_norm": 2.069812536239624, "learning_rate": 4.52403649734441e-05, "loss": 4.9251, "step": 3995 }, { "epoch": 0.10748354158269514, "grad_norm": 2.5128419399261475, "learning_rate": 4.523355576739752e-05, "loss": 4.8984, "step": 4000 }, { "epoch": 0.10761789600967352, "grad_norm": 2.25811767578125, "learning_rate": 4.5226746561350944e-05, "loss": 4.7767, "step": 4005 }, { "epoch": 0.10775225043665189, "grad_norm": 2.6393041610717773, "learning_rate": 4.5219937355304373e-05, "loss": 4.8818, "step": 4010 }, { "epoch": 0.10788660486363026, "grad_norm": 2.3550100326538086, "learning_rate": 4.52131281492578e-05, "loss": 4.8485, "step": 4015 }, { "epoch": 0.10802095929060862, "grad_norm": 2.305943250656128, "learning_rate": 4.5206318943211225e-05, "loss": 4.9345, "step": 4020 }, { "epoch": 0.10815531371758699, "grad_norm": 2.2776548862457275, "learning_rate": 4.519950973716465e-05, "loss": 4.7306, "step": 4025 }, { "epoch": 0.10828966814456537, "grad_norm": 2.351945161819458, "learning_rate": 4.5192700531118076e-05, "loss": 4.7719, "step": 4030 }, { "epoch": 0.10842402257154374, "grad_norm": 2.304865837097168, "learning_rate": 4.51858913250715e-05, "loss": 4.7545, "step": 4035 }, { "epoch": 0.1085583769985221, "grad_norm": 2.3425676822662354, "learning_rate": 4.517908211902492e-05, "loss": 4.7759, "step": 4040 }, { "epoch": 0.10869273142550047, "grad_norm": 2.3676183223724365, "learning_rate": 4.517227291297835e-05, "loss": 4.9045, "step": 4045 }, { "epoch": 0.10882708585247884, "grad_norm": 2.3830666542053223, "learning_rate": 4.516546370693178e-05, "loss": 4.9098, "step": 4050 }, { "epoch": 0.10896144027945721, "grad_norm": 2.481192111968994, "learning_rate": 4.51586545008852e-05, "loss": 4.7721, "step": 4055 }, { "epoch": 0.10909579470643557, "grad_norm": 2.399505138397217, "learning_rate": 4.515184529483862e-05, "loss": 4.7054, "step": 4060 }, { "epoch": 0.10923014913341395, "grad_norm": 2.340904951095581, "learning_rate": 4.5145036088792045e-05, "loss": 4.9621, "step": 4065 }, { "epoch": 0.10936450356039232, "grad_norm": 2.3511340618133545, "learning_rate": 4.5138226882745475e-05, "loss": 4.771, "step": 4070 }, { "epoch": 0.10949885798737069, "grad_norm": 2.4745922088623047, "learning_rate": 4.5131417676698904e-05, "loss": 4.7935, "step": 4075 }, { "epoch": 0.10963321241434905, "grad_norm": 2.42933988571167, "learning_rate": 4.5124608470652326e-05, "loss": 4.9054, "step": 4080 }, { "epoch": 0.10976756684132742, "grad_norm": 2.2317187786102295, "learning_rate": 4.511779926460575e-05, "loss": 4.7962, "step": 4085 }, { "epoch": 0.1099019212683058, "grad_norm": 2.3949458599090576, "learning_rate": 4.511099005855917e-05, "loss": 4.7295, "step": 4090 }, { "epoch": 0.11003627569528417, "grad_norm": 2.3402559757232666, "learning_rate": 4.51041808525126e-05, "loss": 4.831, "step": 4095 }, { "epoch": 0.11017063012226253, "grad_norm": 2.512261390686035, "learning_rate": 4.509737164646602e-05, "loss": 4.847, "step": 4100 }, { "epoch": 0.1103049845492409, "grad_norm": 2.3977127075195312, "learning_rate": 4.509056244041945e-05, "loss": 4.9364, "step": 4105 }, { "epoch": 0.11043933897621927, "grad_norm": 2.0969760417938232, "learning_rate": 4.508375323437287e-05, "loss": 4.7632, "step": 4110 }, { "epoch": 0.11057369340319763, "grad_norm": 2.3379251956939697, "learning_rate": 4.50769440283263e-05, "loss": 4.8934, "step": 4115 }, { "epoch": 0.110708047830176, "grad_norm": 2.295227289199829, "learning_rate": 4.5070134822279724e-05, "loss": 4.8004, "step": 4120 }, { "epoch": 0.11084240225715437, "grad_norm": 2.161325454711914, "learning_rate": 4.5063325616233147e-05, "loss": 4.8489, "step": 4125 }, { "epoch": 0.11097675668413275, "grad_norm": 2.4868967533111572, "learning_rate": 4.505651641018657e-05, "loss": 4.6796, "step": 4130 }, { "epoch": 0.1111111111111111, "grad_norm": 2.30232310295105, "learning_rate": 4.5049707204140005e-05, "loss": 4.7677, "step": 4135 }, { "epoch": 0.11124546553808948, "grad_norm": 2.3230373859405518, "learning_rate": 4.504289799809343e-05, "loss": 4.8115, "step": 4140 }, { "epoch": 0.11137981996506785, "grad_norm": 2.38686466217041, "learning_rate": 4.503608879204685e-05, "loss": 4.7916, "step": 4145 }, { "epoch": 0.11151417439204622, "grad_norm": 2.492284059524536, "learning_rate": 4.502927958600027e-05, "loss": 4.7954, "step": 4150 }, { "epoch": 0.11164852881902458, "grad_norm": 2.453956127166748, "learning_rate": 4.50224703799537e-05, "loss": 4.7882, "step": 4155 }, { "epoch": 0.11178288324600295, "grad_norm": 2.505645275115967, "learning_rate": 4.501566117390712e-05, "loss": 4.8118, "step": 4160 }, { "epoch": 0.11191723767298133, "grad_norm": 2.40903639793396, "learning_rate": 4.500885196786055e-05, "loss": 4.8932, "step": 4165 }, { "epoch": 0.1120515920999597, "grad_norm": 2.4800972938537598, "learning_rate": 4.5002042761813974e-05, "loss": 4.7828, "step": 4170 }, { "epoch": 0.11218594652693806, "grad_norm": 2.289245843887329, "learning_rate": 4.49952335557674e-05, "loss": 4.9556, "step": 4175 }, { "epoch": 0.11232030095391643, "grad_norm": 2.3668859004974365, "learning_rate": 4.4988424349720825e-05, "loss": 4.8433, "step": 4180 }, { "epoch": 0.1124546553808948, "grad_norm": 2.5421972274780273, "learning_rate": 4.498161514367425e-05, "loss": 4.7374, "step": 4185 }, { "epoch": 0.11258900980787317, "grad_norm": 2.3949403762817383, "learning_rate": 4.497480593762767e-05, "loss": 4.8768, "step": 4190 }, { "epoch": 0.11272336423485153, "grad_norm": 2.4453353881835938, "learning_rate": 4.49679967315811e-05, "loss": 4.7807, "step": 4195 }, { "epoch": 0.1128577186618299, "grad_norm": 2.5284366607666016, "learning_rate": 4.496118752553453e-05, "loss": 4.8374, "step": 4200 }, { "epoch": 0.11299207308880828, "grad_norm": 2.2510130405426025, "learning_rate": 4.495437831948795e-05, "loss": 4.8367, "step": 4205 }, { "epoch": 0.11312642751578665, "grad_norm": 2.456310749053955, "learning_rate": 4.494756911344137e-05, "loss": 4.9486, "step": 4210 }, { "epoch": 0.11326078194276501, "grad_norm": 2.4718942642211914, "learning_rate": 4.49407599073948e-05, "loss": 4.7642, "step": 4215 }, { "epoch": 0.11339513636974338, "grad_norm": 2.5681488513946533, "learning_rate": 4.4933950701348224e-05, "loss": 4.7475, "step": 4220 }, { "epoch": 0.11352949079672175, "grad_norm": 2.713550090789795, "learning_rate": 4.492714149530165e-05, "loss": 4.9712, "step": 4225 }, { "epoch": 0.11366384522370013, "grad_norm": 2.3059887886047363, "learning_rate": 4.4920332289255075e-05, "loss": 4.8545, "step": 4230 }, { "epoch": 0.11379819965067849, "grad_norm": 2.271151304244995, "learning_rate": 4.49135230832085e-05, "loss": 4.9656, "step": 4235 }, { "epoch": 0.11393255407765686, "grad_norm": 2.380279064178467, "learning_rate": 4.4906713877161927e-05, "loss": 4.8414, "step": 4240 }, { "epoch": 0.11406690850463523, "grad_norm": 2.6072587966918945, "learning_rate": 4.489990467111535e-05, "loss": 4.7965, "step": 4245 }, { "epoch": 0.1142012629316136, "grad_norm": 2.4508564472198486, "learning_rate": 4.489309546506877e-05, "loss": 4.8604, "step": 4250 }, { "epoch": 0.11433561735859196, "grad_norm": 2.3962242603302, "learning_rate": 4.48862862590222e-05, "loss": 4.7706, "step": 4255 }, { "epoch": 0.11446997178557033, "grad_norm": 2.6643543243408203, "learning_rate": 4.487947705297563e-05, "loss": 4.8796, "step": 4260 }, { "epoch": 0.11460432621254871, "grad_norm": 2.2862443923950195, "learning_rate": 4.487266784692905e-05, "loss": 4.8573, "step": 4265 }, { "epoch": 0.11473868063952707, "grad_norm": 2.2548675537109375, "learning_rate": 4.4865858640882474e-05, "loss": 4.7709, "step": 4270 }, { "epoch": 0.11487303506650544, "grad_norm": 2.393411874771118, "learning_rate": 4.4859049434835896e-05, "loss": 4.8663, "step": 4275 }, { "epoch": 0.11500738949348381, "grad_norm": 2.3601062297821045, "learning_rate": 4.4852240228789325e-05, "loss": 4.8592, "step": 4280 }, { "epoch": 0.11514174392046218, "grad_norm": 2.2856903076171875, "learning_rate": 4.4845431022742754e-05, "loss": 4.8743, "step": 4285 }, { "epoch": 0.11527609834744054, "grad_norm": 2.2420032024383545, "learning_rate": 4.4838621816696176e-05, "loss": 4.8173, "step": 4290 }, { "epoch": 0.11541045277441891, "grad_norm": 2.34596848487854, "learning_rate": 4.48318126106496e-05, "loss": 4.9407, "step": 4295 }, { "epoch": 0.11554480720139729, "grad_norm": 2.249833583831787, "learning_rate": 4.482500340460303e-05, "loss": 4.7955, "step": 4300 }, { "epoch": 0.11567916162837566, "grad_norm": 2.2568256855010986, "learning_rate": 4.481819419855645e-05, "loss": 4.7546, "step": 4305 }, { "epoch": 0.11581351605535402, "grad_norm": 2.5375142097473145, "learning_rate": 4.481138499250987e-05, "loss": 4.8389, "step": 4310 }, { "epoch": 0.11594787048233239, "grad_norm": 2.4763433933258057, "learning_rate": 4.48045757864633e-05, "loss": 4.8254, "step": 4315 }, { "epoch": 0.11608222490931076, "grad_norm": 2.442808151245117, "learning_rate": 4.479776658041673e-05, "loss": 4.7394, "step": 4320 }, { "epoch": 0.11621657933628914, "grad_norm": 2.290862798690796, "learning_rate": 4.479095737437015e-05, "loss": 4.819, "step": 4325 }, { "epoch": 0.1163509337632675, "grad_norm": 2.581564426422119, "learning_rate": 4.4784148168323575e-05, "loss": 4.7278, "step": 4330 }, { "epoch": 0.11648528819024587, "grad_norm": 2.193546772003174, "learning_rate": 4.4777338962277e-05, "loss": 4.7554, "step": 4335 }, { "epoch": 0.11661964261722424, "grad_norm": 2.334792375564575, "learning_rate": 4.4770529756230426e-05, "loss": 4.8878, "step": 4340 }, { "epoch": 0.11675399704420261, "grad_norm": 2.1534948348999023, "learning_rate": 4.4763720550183855e-05, "loss": 4.8333, "step": 4345 }, { "epoch": 0.11688835147118097, "grad_norm": 2.372164011001587, "learning_rate": 4.475691134413728e-05, "loss": 4.8058, "step": 4350 }, { "epoch": 0.11702270589815934, "grad_norm": 2.6233463287353516, "learning_rate": 4.47501021380907e-05, "loss": 4.8134, "step": 4355 }, { "epoch": 0.11715706032513772, "grad_norm": 2.5761067867279053, "learning_rate": 4.474329293204413e-05, "loss": 4.7723, "step": 4360 }, { "epoch": 0.11729141475211609, "grad_norm": 2.2746036052703857, "learning_rate": 4.473648372599755e-05, "loss": 4.8873, "step": 4365 }, { "epoch": 0.11742576917909445, "grad_norm": 2.214812994003296, "learning_rate": 4.472967451995097e-05, "loss": 4.8907, "step": 4370 }, { "epoch": 0.11756012360607282, "grad_norm": 2.3953659534454346, "learning_rate": 4.47228653139044e-05, "loss": 4.6522, "step": 4375 }, { "epoch": 0.11769447803305119, "grad_norm": 2.6663784980773926, "learning_rate": 4.4716056107857825e-05, "loss": 4.8405, "step": 4380 }, { "epoch": 0.11782883246002956, "grad_norm": 2.4421095848083496, "learning_rate": 4.4709246901811254e-05, "loss": 4.7374, "step": 4385 }, { "epoch": 0.11796318688700792, "grad_norm": 2.476816415786743, "learning_rate": 4.4702437695764676e-05, "loss": 4.6978, "step": 4390 }, { "epoch": 0.1180975413139863, "grad_norm": 2.278705358505249, "learning_rate": 4.46956284897181e-05, "loss": 4.8287, "step": 4395 }, { "epoch": 0.11823189574096467, "grad_norm": 2.3131260871887207, "learning_rate": 4.468881928367152e-05, "loss": 4.6929, "step": 4400 }, { "epoch": 0.11836625016794304, "grad_norm": 2.562412977218628, "learning_rate": 4.468201007762495e-05, "loss": 4.7091, "step": 4405 }, { "epoch": 0.1185006045949214, "grad_norm": 2.254849910736084, "learning_rate": 4.467520087157838e-05, "loss": 4.7875, "step": 4410 }, { "epoch": 0.11863495902189977, "grad_norm": 2.204624891281128, "learning_rate": 4.46683916655318e-05, "loss": 4.692, "step": 4415 }, { "epoch": 0.11876931344887814, "grad_norm": 2.2432289123535156, "learning_rate": 4.466158245948522e-05, "loss": 4.8274, "step": 4420 }, { "epoch": 0.1189036678758565, "grad_norm": 2.5432636737823486, "learning_rate": 4.465477325343865e-05, "loss": 4.9673, "step": 4425 }, { "epoch": 0.11903802230283488, "grad_norm": 2.230104923248291, "learning_rate": 4.4647964047392074e-05, "loss": 4.7719, "step": 4430 }, { "epoch": 0.11917237672981325, "grad_norm": 2.272420644760132, "learning_rate": 4.4641154841345503e-05, "loss": 4.7614, "step": 4435 }, { "epoch": 0.11930673115679162, "grad_norm": 2.0710220336914062, "learning_rate": 4.4634345635298926e-05, "loss": 4.7047, "step": 4440 }, { "epoch": 0.11944108558376998, "grad_norm": 2.415905237197876, "learning_rate": 4.4627536429252355e-05, "loss": 4.9128, "step": 4445 }, { "epoch": 0.11957544001074835, "grad_norm": 2.3551747798919678, "learning_rate": 4.462072722320578e-05, "loss": 4.7327, "step": 4450 }, { "epoch": 0.11970979443772672, "grad_norm": 2.4013113975524902, "learning_rate": 4.46139180171592e-05, "loss": 4.7552, "step": 4455 }, { "epoch": 0.1198441488647051, "grad_norm": 2.282001495361328, "learning_rate": 4.460710881111262e-05, "loss": 4.7584, "step": 4460 }, { "epoch": 0.11997850329168346, "grad_norm": 2.4643735885620117, "learning_rate": 4.460029960506605e-05, "loss": 4.6315, "step": 4465 }, { "epoch": 0.12011285771866183, "grad_norm": 2.58450984954834, "learning_rate": 4.459349039901948e-05, "loss": 4.7834, "step": 4470 }, { "epoch": 0.1202472121456402, "grad_norm": 2.2559621334075928, "learning_rate": 4.45866811929729e-05, "loss": 4.8369, "step": 4475 }, { "epoch": 0.12038156657261857, "grad_norm": 2.3033199310302734, "learning_rate": 4.4579871986926324e-05, "loss": 4.6834, "step": 4480 }, { "epoch": 0.12051592099959693, "grad_norm": 2.2106590270996094, "learning_rate": 4.457306278087975e-05, "loss": 4.9238, "step": 4485 }, { "epoch": 0.1206502754265753, "grad_norm": 2.456451892852783, "learning_rate": 4.4566253574833176e-05, "loss": 4.832, "step": 4490 }, { "epoch": 0.12078462985355368, "grad_norm": 2.2797577381134033, "learning_rate": 4.4559444368786605e-05, "loss": 4.7856, "step": 4495 }, { "epoch": 0.12091898428053205, "grad_norm": 2.378916025161743, "learning_rate": 4.455263516274003e-05, "loss": 4.8582, "step": 4500 }, { "epoch": 0.12105333870751041, "grad_norm": 2.538640260696411, "learning_rate": 4.4545825956693456e-05, "loss": 4.7919, "step": 4505 }, { "epoch": 0.12118769313448878, "grad_norm": 2.3927602767944336, "learning_rate": 4.453901675064688e-05, "loss": 4.8186, "step": 4510 }, { "epoch": 0.12132204756146715, "grad_norm": 2.429544448852539, "learning_rate": 4.45322075446003e-05, "loss": 4.8529, "step": 4515 }, { "epoch": 0.12145640198844553, "grad_norm": 2.1288082599639893, "learning_rate": 4.452539833855372e-05, "loss": 4.7527, "step": 4520 }, { "epoch": 0.12159075641542388, "grad_norm": 2.492410659790039, "learning_rate": 4.451858913250715e-05, "loss": 4.8259, "step": 4525 }, { "epoch": 0.12172511084240226, "grad_norm": 2.3878021240234375, "learning_rate": 4.451177992646058e-05, "loss": 4.7131, "step": 4530 }, { "epoch": 0.12185946526938063, "grad_norm": 2.3997464179992676, "learning_rate": 4.4504970720414e-05, "loss": 4.8381, "step": 4535 }, { "epoch": 0.121993819696359, "grad_norm": 2.2738037109375, "learning_rate": 4.4498161514367425e-05, "loss": 4.6911, "step": 4540 }, { "epoch": 0.12212817412333736, "grad_norm": 2.4462459087371826, "learning_rate": 4.449135230832085e-05, "loss": 4.6052, "step": 4545 }, { "epoch": 0.12226252855031573, "grad_norm": 2.23532772064209, "learning_rate": 4.448454310227428e-05, "loss": 4.7455, "step": 4550 }, { "epoch": 0.1223968829772941, "grad_norm": 2.6852872371673584, "learning_rate": 4.4477733896227706e-05, "loss": 4.7792, "step": 4555 }, { "epoch": 0.12253123740427246, "grad_norm": 2.169888734817505, "learning_rate": 4.447092469018113e-05, "loss": 4.7512, "step": 4560 }, { "epoch": 0.12266559183125084, "grad_norm": 2.096015214920044, "learning_rate": 4.446411548413455e-05, "loss": 4.7795, "step": 4565 }, { "epoch": 0.12279994625822921, "grad_norm": 2.611541509628296, "learning_rate": 4.445730627808798e-05, "loss": 4.6357, "step": 4570 }, { "epoch": 0.12293430068520758, "grad_norm": 2.2759056091308594, "learning_rate": 4.44504970720414e-05, "loss": 4.6561, "step": 4575 }, { "epoch": 0.12306865511218594, "grad_norm": 2.3145864009857178, "learning_rate": 4.4443687865994824e-05, "loss": 4.8076, "step": 4580 }, { "epoch": 0.12320300953916431, "grad_norm": 2.4229812622070312, "learning_rate": 4.443687865994825e-05, "loss": 4.8465, "step": 4585 }, { "epoch": 0.12333736396614269, "grad_norm": 2.2861647605895996, "learning_rate": 4.443006945390168e-05, "loss": 4.8535, "step": 4590 }, { "epoch": 0.12347171839312106, "grad_norm": 2.146202564239502, "learning_rate": 4.4423260247855104e-05, "loss": 4.749, "step": 4595 }, { "epoch": 0.12360607282009942, "grad_norm": 2.3462462425231934, "learning_rate": 4.4416451041808526e-05, "loss": 4.784, "step": 4600 }, { "epoch": 0.12374042724707779, "grad_norm": 2.518326997756958, "learning_rate": 4.440964183576195e-05, "loss": 4.8874, "step": 4605 }, { "epoch": 0.12387478167405616, "grad_norm": 2.1689605712890625, "learning_rate": 4.440283262971538e-05, "loss": 4.7882, "step": 4610 }, { "epoch": 0.12400913610103453, "grad_norm": 2.4828147888183594, "learning_rate": 4.43960234236688e-05, "loss": 4.6951, "step": 4615 }, { "epoch": 0.12414349052801289, "grad_norm": 2.474313497543335, "learning_rate": 4.438921421762223e-05, "loss": 4.7948, "step": 4620 }, { "epoch": 0.12427784495499127, "grad_norm": 2.473074436187744, "learning_rate": 4.438240501157565e-05, "loss": 4.8089, "step": 4625 }, { "epoch": 0.12441219938196964, "grad_norm": 2.515138864517212, "learning_rate": 4.437559580552908e-05, "loss": 4.6175, "step": 4630 }, { "epoch": 0.12454655380894801, "grad_norm": 2.4840569496154785, "learning_rate": 4.43687865994825e-05, "loss": 4.7728, "step": 4635 }, { "epoch": 0.12468090823592637, "grad_norm": 2.4376375675201416, "learning_rate": 4.4361977393435925e-05, "loss": 4.7834, "step": 4640 }, { "epoch": 0.12481526266290474, "grad_norm": 2.5587005615234375, "learning_rate": 4.4355168187389354e-05, "loss": 4.7969, "step": 4645 }, { "epoch": 0.12494961708988311, "grad_norm": 2.200929880142212, "learning_rate": 4.434835898134278e-05, "loss": 4.7567, "step": 4650 }, { "epoch": 0.1250839715168615, "grad_norm": 2.485139846801758, "learning_rate": 4.4341549775296205e-05, "loss": 4.718, "step": 4655 }, { "epoch": 0.12521832594383986, "grad_norm": 2.618278980255127, "learning_rate": 4.433474056924963e-05, "loss": 4.7871, "step": 4660 }, { "epoch": 0.12535268037081823, "grad_norm": 2.3308727741241455, "learning_rate": 4.432793136320305e-05, "loss": 4.7209, "step": 4665 }, { "epoch": 0.12548703479779658, "grad_norm": 2.2121143341064453, "learning_rate": 4.432112215715647e-05, "loss": 4.7732, "step": 4670 }, { "epoch": 0.12562138922477495, "grad_norm": 2.4613654613494873, "learning_rate": 4.43143129511099e-05, "loss": 4.653, "step": 4675 }, { "epoch": 0.12575574365175332, "grad_norm": 2.215388298034668, "learning_rate": 4.430750374506333e-05, "loss": 4.7284, "step": 4680 }, { "epoch": 0.1258900980787317, "grad_norm": 2.3992061614990234, "learning_rate": 4.430069453901675e-05, "loss": 4.7304, "step": 4685 }, { "epoch": 0.12602445250571007, "grad_norm": 2.317227363586426, "learning_rate": 4.4293885332970175e-05, "loss": 4.7673, "step": 4690 }, { "epoch": 0.12615880693268844, "grad_norm": 2.5580008029937744, "learning_rate": 4.4287076126923604e-05, "loss": 4.7657, "step": 4695 }, { "epoch": 0.1262931613596668, "grad_norm": 2.4706919193267822, "learning_rate": 4.4280266920877026e-05, "loss": 4.8699, "step": 4700 }, { "epoch": 0.12642751578664518, "grad_norm": 2.168724775314331, "learning_rate": 4.4273457714830455e-05, "loss": 4.6182, "step": 4705 }, { "epoch": 0.12656187021362353, "grad_norm": 2.3658969402313232, "learning_rate": 4.426664850878388e-05, "loss": 4.7864, "step": 4710 }, { "epoch": 0.1266962246406019, "grad_norm": 2.393610715866089, "learning_rate": 4.4259839302737306e-05, "loss": 4.7453, "step": 4715 }, { "epoch": 0.12683057906758027, "grad_norm": 2.362715244293213, "learning_rate": 4.425303009669073e-05, "loss": 4.737, "step": 4720 }, { "epoch": 0.12696493349455865, "grad_norm": 2.5027658939361572, "learning_rate": 4.424622089064415e-05, "loss": 4.7266, "step": 4725 }, { "epoch": 0.12709928792153702, "grad_norm": 2.51792573928833, "learning_rate": 4.423941168459757e-05, "loss": 4.8414, "step": 4730 }, { "epoch": 0.1272336423485154, "grad_norm": 2.5710716247558594, "learning_rate": 4.4232602478551e-05, "loss": 4.6183, "step": 4735 }, { "epoch": 0.12736799677549376, "grad_norm": 2.19071888923645, "learning_rate": 4.422579327250443e-05, "loss": 4.7927, "step": 4740 }, { "epoch": 0.1275023512024721, "grad_norm": 2.284208297729492, "learning_rate": 4.4218984066457854e-05, "loss": 4.6194, "step": 4745 }, { "epoch": 0.12763670562945048, "grad_norm": 2.2364134788513184, "learning_rate": 4.4212174860411276e-05, "loss": 4.7763, "step": 4750 }, { "epoch": 0.12777106005642885, "grad_norm": 2.4097795486450195, "learning_rate": 4.4205365654364705e-05, "loss": 4.6742, "step": 4755 }, { "epoch": 0.12790541448340723, "grad_norm": 2.063389778137207, "learning_rate": 4.419855644831813e-05, "loss": 4.6319, "step": 4760 }, { "epoch": 0.1280397689103856, "grad_norm": 2.4098453521728516, "learning_rate": 4.4191747242271556e-05, "loss": 4.7207, "step": 4765 }, { "epoch": 0.12817412333736397, "grad_norm": 2.6554555892944336, "learning_rate": 4.418493803622498e-05, "loss": 4.7537, "step": 4770 }, { "epoch": 0.12830847776434234, "grad_norm": 2.4152040481567383, "learning_rate": 4.417812883017841e-05, "loss": 4.8198, "step": 4775 }, { "epoch": 0.12844283219132072, "grad_norm": 2.3129639625549316, "learning_rate": 4.417131962413183e-05, "loss": 4.7426, "step": 4780 }, { "epoch": 0.12857718661829906, "grad_norm": 2.2476649284362793, "learning_rate": 4.416451041808525e-05, "loss": 4.7686, "step": 4785 }, { "epoch": 0.12871154104527743, "grad_norm": 2.3596677780151367, "learning_rate": 4.4157701212038674e-05, "loss": 4.7868, "step": 4790 }, { "epoch": 0.1288458954722558, "grad_norm": 2.683835506439209, "learning_rate": 4.41508920059921e-05, "loss": 4.6513, "step": 4795 }, { "epoch": 0.12898024989923418, "grad_norm": 2.1998159885406494, "learning_rate": 4.414408279994553e-05, "loss": 4.6741, "step": 4800 }, { "epoch": 0.12911460432621255, "grad_norm": 2.3731305599212646, "learning_rate": 4.4137273593898955e-05, "loss": 4.694, "step": 4805 }, { "epoch": 0.12924895875319092, "grad_norm": 2.496506929397583, "learning_rate": 4.413046438785238e-05, "loss": 4.7821, "step": 4810 }, { "epoch": 0.1293833131801693, "grad_norm": 2.4163739681243896, "learning_rate": 4.41236551818058e-05, "loss": 4.6394, "step": 4815 }, { "epoch": 0.12951766760714767, "grad_norm": 2.572873592376709, "learning_rate": 4.411684597575923e-05, "loss": 4.6573, "step": 4820 }, { "epoch": 0.129652022034126, "grad_norm": 2.257214307785034, "learning_rate": 4.411003676971265e-05, "loss": 4.7253, "step": 4825 }, { "epoch": 0.12978637646110439, "grad_norm": 2.7237305641174316, "learning_rate": 4.410322756366608e-05, "loss": 4.4476, "step": 4830 }, { "epoch": 0.12992073088808276, "grad_norm": 2.396909475326538, "learning_rate": 4.40964183576195e-05, "loss": 4.7351, "step": 4835 }, { "epoch": 0.13005508531506113, "grad_norm": 2.5134711265563965, "learning_rate": 4.408960915157293e-05, "loss": 4.8653, "step": 4840 }, { "epoch": 0.1301894397420395, "grad_norm": 2.438359498977661, "learning_rate": 4.408279994552635e-05, "loss": 4.7611, "step": 4845 }, { "epoch": 0.13032379416901788, "grad_norm": 2.2780253887176514, "learning_rate": 4.4075990739479775e-05, "loss": 4.9026, "step": 4850 }, { "epoch": 0.13045814859599625, "grad_norm": 2.368100643157959, "learning_rate": 4.4069181533433204e-05, "loss": 4.7376, "step": 4855 }, { "epoch": 0.13059250302297462, "grad_norm": 2.1737263202667236, "learning_rate": 4.4062372327386634e-05, "loss": 4.8204, "step": 4860 }, { "epoch": 0.13072685744995297, "grad_norm": 2.2839977741241455, "learning_rate": 4.4055563121340056e-05, "loss": 4.7728, "step": 4865 }, { "epoch": 0.13086121187693134, "grad_norm": 2.3215525150299072, "learning_rate": 4.404875391529348e-05, "loss": 4.7073, "step": 4870 }, { "epoch": 0.1309955663039097, "grad_norm": 2.2425525188446045, "learning_rate": 4.40419447092469e-05, "loss": 4.7774, "step": 4875 }, { "epoch": 0.13112992073088808, "grad_norm": 2.4383742809295654, "learning_rate": 4.403513550320033e-05, "loss": 4.7138, "step": 4880 }, { "epoch": 0.13126427515786646, "grad_norm": 2.2931289672851562, "learning_rate": 4.402832629715375e-05, "loss": 4.7834, "step": 4885 }, { "epoch": 0.13139862958484483, "grad_norm": 2.4571781158447266, "learning_rate": 4.402151709110718e-05, "loss": 4.6783, "step": 4890 }, { "epoch": 0.1315329840118232, "grad_norm": 2.2795464992523193, "learning_rate": 4.40147078850606e-05, "loss": 4.6735, "step": 4895 }, { "epoch": 0.13166733843880155, "grad_norm": 2.322563648223877, "learning_rate": 4.400789867901403e-05, "loss": 4.7541, "step": 4900 }, { "epoch": 0.13180169286577992, "grad_norm": 2.354914665222168, "learning_rate": 4.4001089472967454e-05, "loss": 4.7263, "step": 4905 }, { "epoch": 0.1319360472927583, "grad_norm": 2.393778085708618, "learning_rate": 4.3994280266920877e-05, "loss": 4.6778, "step": 4910 }, { "epoch": 0.13207040171973666, "grad_norm": 2.352766752243042, "learning_rate": 4.3987471060874306e-05, "loss": 4.7806, "step": 4915 }, { "epoch": 0.13220475614671504, "grad_norm": 2.304657459259033, "learning_rate": 4.3980661854827735e-05, "loss": 4.6589, "step": 4920 }, { "epoch": 0.1323391105736934, "grad_norm": 2.1979665756225586, "learning_rate": 4.397385264878116e-05, "loss": 4.8172, "step": 4925 }, { "epoch": 0.13247346500067178, "grad_norm": 2.532045602798462, "learning_rate": 4.396704344273458e-05, "loss": 4.7536, "step": 4930 }, { "epoch": 0.13260781942765015, "grad_norm": 2.3392767906188965, "learning_rate": 4.3960234236688e-05, "loss": 4.8324, "step": 4935 }, { "epoch": 0.1327421738546285, "grad_norm": 2.3727712631225586, "learning_rate": 4.395342503064143e-05, "loss": 4.7825, "step": 4940 }, { "epoch": 0.13287652828160687, "grad_norm": 2.3137311935424805, "learning_rate": 4.394661582459485e-05, "loss": 4.7353, "step": 4945 }, { "epoch": 0.13301088270858524, "grad_norm": 2.325050115585327, "learning_rate": 4.393980661854828e-05, "loss": 4.6556, "step": 4950 }, { "epoch": 0.13314523713556362, "grad_norm": 2.3303592205047607, "learning_rate": 4.3932997412501704e-05, "loss": 4.7295, "step": 4955 }, { "epoch": 0.133279591562542, "grad_norm": 2.3345165252685547, "learning_rate": 4.3926188206455126e-05, "loss": 4.665, "step": 4960 }, { "epoch": 0.13341394598952036, "grad_norm": 2.2794764041900635, "learning_rate": 4.3919379000408555e-05, "loss": 4.7288, "step": 4965 }, { "epoch": 0.13354830041649873, "grad_norm": 2.4537861347198486, "learning_rate": 4.391256979436198e-05, "loss": 4.7142, "step": 4970 }, { "epoch": 0.1336826548434771, "grad_norm": 2.697866916656494, "learning_rate": 4.39057605883154e-05, "loss": 4.8093, "step": 4975 }, { "epoch": 0.13381700927045545, "grad_norm": 2.7227869033813477, "learning_rate": 4.389895138226883e-05, "loss": 4.7776, "step": 4980 }, { "epoch": 0.13395136369743382, "grad_norm": 2.3009612560272217, "learning_rate": 4.389214217622226e-05, "loss": 4.708, "step": 4985 }, { "epoch": 0.1340857181244122, "grad_norm": 2.4681789875030518, "learning_rate": 4.388533297017568e-05, "loss": 4.8216, "step": 4990 }, { "epoch": 0.13422007255139057, "grad_norm": 2.1975107192993164, "learning_rate": 4.38785237641291e-05, "loss": 4.799, "step": 4995 }, { "epoch": 0.13435442697836894, "grad_norm": 2.4353811740875244, "learning_rate": 4.3871714558082525e-05, "loss": 4.7116, "step": 5000 }, { "epoch": 0.1344887814053473, "grad_norm": 2.7893145084381104, "learning_rate": 4.3864905352035954e-05, "loss": 4.7318, "step": 5005 }, { "epoch": 0.13462313583232569, "grad_norm": 2.3651912212371826, "learning_rate": 4.385809614598938e-05, "loss": 4.881, "step": 5010 }, { "epoch": 0.13475749025930403, "grad_norm": 2.261274814605713, "learning_rate": 4.3851286939942805e-05, "loss": 4.8236, "step": 5015 }, { "epoch": 0.1348918446862824, "grad_norm": 2.3519933223724365, "learning_rate": 4.384447773389623e-05, "loss": 4.761, "step": 5020 }, { "epoch": 0.13502619911326078, "grad_norm": 2.2974727153778076, "learning_rate": 4.3837668527849656e-05, "loss": 4.738, "step": 5025 }, { "epoch": 0.13516055354023915, "grad_norm": 2.246166467666626, "learning_rate": 4.383085932180308e-05, "loss": 4.858, "step": 5030 }, { "epoch": 0.13529490796721752, "grad_norm": 2.214803457260132, "learning_rate": 4.38240501157565e-05, "loss": 4.7445, "step": 5035 }, { "epoch": 0.1354292623941959, "grad_norm": 2.1694109439849854, "learning_rate": 4.381724090970993e-05, "loss": 4.7438, "step": 5040 }, { "epoch": 0.13556361682117427, "grad_norm": 2.5251026153564453, "learning_rate": 4.381043170366336e-05, "loss": 4.6166, "step": 5045 }, { "epoch": 0.13569797124815264, "grad_norm": 2.5384957790374756, "learning_rate": 4.380362249761678e-05, "loss": 4.6988, "step": 5050 }, { "epoch": 0.13583232567513098, "grad_norm": 2.5966992378234863, "learning_rate": 4.3796813291570204e-05, "loss": 4.8781, "step": 5055 }, { "epoch": 0.13596668010210936, "grad_norm": 2.3633997440338135, "learning_rate": 4.3790004085523626e-05, "loss": 4.7447, "step": 5060 }, { "epoch": 0.13610103452908773, "grad_norm": 2.391948699951172, "learning_rate": 4.3783194879477055e-05, "loss": 4.7047, "step": 5065 }, { "epoch": 0.1362353889560661, "grad_norm": 2.0203118324279785, "learning_rate": 4.3776385673430484e-05, "loss": 4.7053, "step": 5070 }, { "epoch": 0.13636974338304447, "grad_norm": 2.174410820007324, "learning_rate": 4.3769576467383906e-05, "loss": 4.7354, "step": 5075 }, { "epoch": 0.13650409781002285, "grad_norm": 2.3941686153411865, "learning_rate": 4.376276726133733e-05, "loss": 4.7285, "step": 5080 }, { "epoch": 0.13663845223700122, "grad_norm": 2.2955944538116455, "learning_rate": 4.375595805529076e-05, "loss": 4.6808, "step": 5085 }, { "epoch": 0.1367728066639796, "grad_norm": 2.482304573059082, "learning_rate": 4.374914884924418e-05, "loss": 4.7423, "step": 5090 }, { "epoch": 0.13690716109095794, "grad_norm": 2.3817191123962402, "learning_rate": 4.37423396431976e-05, "loss": 4.6605, "step": 5095 }, { "epoch": 0.1370415155179363, "grad_norm": 2.444042205810547, "learning_rate": 4.373553043715103e-05, "loss": 4.7439, "step": 5100 }, { "epoch": 0.13717586994491468, "grad_norm": 2.229215145111084, "learning_rate": 4.372872123110446e-05, "loss": 4.7792, "step": 5105 }, { "epoch": 0.13731022437189305, "grad_norm": 2.2401981353759766, "learning_rate": 4.372191202505788e-05, "loss": 4.7043, "step": 5110 }, { "epoch": 0.13744457879887143, "grad_norm": 2.2581069469451904, "learning_rate": 4.3715102819011305e-05, "loss": 4.7434, "step": 5115 }, { "epoch": 0.1375789332258498, "grad_norm": 2.096940755844116, "learning_rate": 4.370829361296473e-05, "loss": 4.6921, "step": 5120 }, { "epoch": 0.13771328765282817, "grad_norm": 2.375547409057617, "learning_rate": 4.3701484406918156e-05, "loss": 4.8029, "step": 5125 }, { "epoch": 0.13784764207980654, "grad_norm": 2.2480034828186035, "learning_rate": 4.3694675200871585e-05, "loss": 4.5713, "step": 5130 }, { "epoch": 0.1379819965067849, "grad_norm": 2.2785820960998535, "learning_rate": 4.368786599482501e-05, "loss": 4.7943, "step": 5135 }, { "epoch": 0.13811635093376326, "grad_norm": 2.2028582096099854, "learning_rate": 4.368105678877843e-05, "loss": 4.7145, "step": 5140 }, { "epoch": 0.13825070536074163, "grad_norm": 2.2140886783599854, "learning_rate": 4.367424758273185e-05, "loss": 4.8231, "step": 5145 }, { "epoch": 0.13838505978772, "grad_norm": 2.371229648590088, "learning_rate": 4.366743837668528e-05, "loss": 4.6638, "step": 5150 }, { "epoch": 0.13851941421469838, "grad_norm": 2.5016679763793945, "learning_rate": 4.36606291706387e-05, "loss": 4.7156, "step": 5155 }, { "epoch": 0.13865376864167675, "grad_norm": 2.2718706130981445, "learning_rate": 4.365381996459213e-05, "loss": 4.645, "step": 5160 }, { "epoch": 0.13878812306865512, "grad_norm": 2.4279985427856445, "learning_rate": 4.3647010758545555e-05, "loss": 4.8226, "step": 5165 }, { "epoch": 0.13892247749563347, "grad_norm": 2.6278507709503174, "learning_rate": 4.3640201552498984e-05, "loss": 4.8118, "step": 5170 }, { "epoch": 0.13905683192261184, "grad_norm": 2.070530414581299, "learning_rate": 4.3633392346452406e-05, "loss": 4.6142, "step": 5175 }, { "epoch": 0.1391911863495902, "grad_norm": 2.2454535961151123, "learning_rate": 4.362658314040583e-05, "loss": 4.6687, "step": 5180 }, { "epoch": 0.13932554077656858, "grad_norm": 2.251532554626465, "learning_rate": 4.361977393435925e-05, "loss": 4.6862, "step": 5185 }, { "epoch": 0.13945989520354696, "grad_norm": 2.5029964447021484, "learning_rate": 4.3612964728312686e-05, "loss": 4.6623, "step": 5190 }, { "epoch": 0.13959424963052533, "grad_norm": 2.2702221870422363, "learning_rate": 4.360615552226611e-05, "loss": 4.619, "step": 5195 }, { "epoch": 0.1397286040575037, "grad_norm": 2.638491630554199, "learning_rate": 4.359934631621953e-05, "loss": 4.7212, "step": 5200 }, { "epoch": 0.13986295848448208, "grad_norm": 2.2260608673095703, "learning_rate": 4.359253711017295e-05, "loss": 4.6825, "step": 5205 }, { "epoch": 0.13999731291146042, "grad_norm": 2.407391309738159, "learning_rate": 4.358572790412638e-05, "loss": 4.6911, "step": 5210 }, { "epoch": 0.1401316673384388, "grad_norm": 2.4948818683624268, "learning_rate": 4.3578918698079804e-05, "loss": 4.7699, "step": 5215 }, { "epoch": 0.14026602176541716, "grad_norm": 2.351940155029297, "learning_rate": 4.3572109492033233e-05, "loss": 4.6927, "step": 5220 }, { "epoch": 0.14040037619239554, "grad_norm": 2.388455390930176, "learning_rate": 4.3565300285986656e-05, "loss": 4.8632, "step": 5225 }, { "epoch": 0.1405347306193739, "grad_norm": 2.1543734073638916, "learning_rate": 4.3558491079940085e-05, "loss": 4.7038, "step": 5230 }, { "epoch": 0.14066908504635228, "grad_norm": 2.450618267059326, "learning_rate": 4.355168187389351e-05, "loss": 4.7647, "step": 5235 }, { "epoch": 0.14080343947333065, "grad_norm": 2.4088172912597656, "learning_rate": 4.354487266784693e-05, "loss": 4.7614, "step": 5240 }, { "epoch": 0.14093779390030903, "grad_norm": 2.2573354244232178, "learning_rate": 4.353806346180035e-05, "loss": 4.71, "step": 5245 }, { "epoch": 0.14107214832728737, "grad_norm": 2.1550099849700928, "learning_rate": 4.353125425575378e-05, "loss": 4.6734, "step": 5250 }, { "epoch": 0.14120650275426574, "grad_norm": 2.2590415477752686, "learning_rate": 4.352444504970721e-05, "loss": 4.6657, "step": 5255 }, { "epoch": 0.14134085718124412, "grad_norm": 2.579711675643921, "learning_rate": 4.351763584366063e-05, "loss": 4.8048, "step": 5260 }, { "epoch": 0.1414752116082225, "grad_norm": 2.5554847717285156, "learning_rate": 4.3510826637614054e-05, "loss": 4.6923, "step": 5265 }, { "epoch": 0.14160956603520086, "grad_norm": 2.264326810836792, "learning_rate": 4.3504017431567476e-05, "loss": 4.6755, "step": 5270 }, { "epoch": 0.14174392046217923, "grad_norm": 2.633161783218384, "learning_rate": 4.3497208225520905e-05, "loss": 4.6751, "step": 5275 }, { "epoch": 0.1418782748891576, "grad_norm": 3.0556602478027344, "learning_rate": 4.3490399019474335e-05, "loss": 4.6202, "step": 5280 }, { "epoch": 0.14201262931613598, "grad_norm": 2.360241174697876, "learning_rate": 4.348358981342776e-05, "loss": 4.7366, "step": 5285 }, { "epoch": 0.14214698374311432, "grad_norm": 2.3771448135375977, "learning_rate": 4.347678060738118e-05, "loss": 4.6491, "step": 5290 }, { "epoch": 0.1422813381700927, "grad_norm": 2.277683734893799, "learning_rate": 4.346997140133461e-05, "loss": 4.6599, "step": 5295 }, { "epoch": 0.14241569259707107, "grad_norm": 2.3080601692199707, "learning_rate": 4.346316219528803e-05, "loss": 4.7805, "step": 5300 }, { "epoch": 0.14255004702404944, "grad_norm": 2.375811815261841, "learning_rate": 4.345635298924145e-05, "loss": 4.6954, "step": 5305 }, { "epoch": 0.14268440145102781, "grad_norm": 2.5283961296081543, "learning_rate": 4.344954378319488e-05, "loss": 4.5886, "step": 5310 }, { "epoch": 0.1428187558780062, "grad_norm": 2.4433255195617676, "learning_rate": 4.344273457714831e-05, "loss": 4.7019, "step": 5315 }, { "epoch": 0.14295311030498456, "grad_norm": 2.1628475189208984, "learning_rate": 4.343592537110173e-05, "loss": 4.6834, "step": 5320 }, { "epoch": 0.1430874647319629, "grad_norm": 2.1188840866088867, "learning_rate": 4.3429116165055155e-05, "loss": 4.7292, "step": 5325 }, { "epoch": 0.14322181915894128, "grad_norm": 2.0829310417175293, "learning_rate": 4.342230695900858e-05, "loss": 4.7359, "step": 5330 }, { "epoch": 0.14335617358591965, "grad_norm": 2.472219228744507, "learning_rate": 4.3415497752962007e-05, "loss": 4.7155, "step": 5335 }, { "epoch": 0.14349052801289802, "grad_norm": 2.3763554096221924, "learning_rate": 4.3408688546915436e-05, "loss": 4.704, "step": 5340 }, { "epoch": 0.1436248824398764, "grad_norm": 2.222320318222046, "learning_rate": 4.340187934086886e-05, "loss": 4.7033, "step": 5345 }, { "epoch": 0.14375923686685477, "grad_norm": 2.209571599960327, "learning_rate": 4.339507013482228e-05, "loss": 4.6626, "step": 5350 }, { "epoch": 0.14389359129383314, "grad_norm": 2.4574036598205566, "learning_rate": 4.338826092877571e-05, "loss": 4.6549, "step": 5355 }, { "epoch": 0.1440279457208115, "grad_norm": 2.506190538406372, "learning_rate": 4.338145172272913e-05, "loss": 4.6334, "step": 5360 }, { "epoch": 0.14416230014778986, "grad_norm": 2.230236291885376, "learning_rate": 4.3374642516682554e-05, "loss": 4.7253, "step": 5365 }, { "epoch": 0.14429665457476823, "grad_norm": 2.390226364135742, "learning_rate": 4.336783331063598e-05, "loss": 4.6153, "step": 5370 }, { "epoch": 0.1444310090017466, "grad_norm": 2.410153388977051, "learning_rate": 4.336102410458941e-05, "loss": 4.6611, "step": 5375 }, { "epoch": 0.14456536342872497, "grad_norm": 2.4580087661743164, "learning_rate": 4.3354214898542834e-05, "loss": 4.7531, "step": 5380 }, { "epoch": 0.14469971785570335, "grad_norm": 2.527559757232666, "learning_rate": 4.3347405692496256e-05, "loss": 4.5698, "step": 5385 }, { "epoch": 0.14483407228268172, "grad_norm": 2.5995399951934814, "learning_rate": 4.334059648644968e-05, "loss": 4.5992, "step": 5390 }, { "epoch": 0.1449684267096601, "grad_norm": 2.2917354106903076, "learning_rate": 4.333378728040311e-05, "loss": 4.7568, "step": 5395 }, { "epoch": 0.14510278113663846, "grad_norm": 2.3101868629455566, "learning_rate": 4.332697807435654e-05, "loss": 4.6168, "step": 5400 }, { "epoch": 0.1452371355636168, "grad_norm": 2.313093900680542, "learning_rate": 4.332016886830996e-05, "loss": 4.8072, "step": 5405 }, { "epoch": 0.14537148999059518, "grad_norm": 2.5541889667510986, "learning_rate": 4.331335966226338e-05, "loss": 4.7087, "step": 5410 }, { "epoch": 0.14550584441757355, "grad_norm": 2.508989095687866, "learning_rate": 4.3306550456216804e-05, "loss": 4.6275, "step": 5415 }, { "epoch": 0.14564019884455193, "grad_norm": 2.3997724056243896, "learning_rate": 4.329974125017023e-05, "loss": 4.6216, "step": 5420 }, { "epoch": 0.1457745532715303, "grad_norm": 2.179547071456909, "learning_rate": 4.3292932044123655e-05, "loss": 4.6877, "step": 5425 }, { "epoch": 0.14590890769850867, "grad_norm": 2.3510310649871826, "learning_rate": 4.3286122838077084e-05, "loss": 4.7389, "step": 5430 }, { "epoch": 0.14604326212548704, "grad_norm": 2.5767464637756348, "learning_rate": 4.3279313632030506e-05, "loss": 4.556, "step": 5435 }, { "epoch": 0.14617761655246542, "grad_norm": 2.4311118125915527, "learning_rate": 4.3272504425983935e-05, "loss": 4.7749, "step": 5440 }, { "epoch": 0.14631197097944376, "grad_norm": 2.3742549419403076, "learning_rate": 4.326569521993736e-05, "loss": 4.6938, "step": 5445 }, { "epoch": 0.14644632540642213, "grad_norm": 2.472548484802246, "learning_rate": 4.325888601389078e-05, "loss": 4.75, "step": 5450 }, { "epoch": 0.1465806798334005, "grad_norm": 2.5138728618621826, "learning_rate": 4.32520768078442e-05, "loss": 4.6166, "step": 5455 }, { "epoch": 0.14671503426037888, "grad_norm": 2.5273678302764893, "learning_rate": 4.324526760179763e-05, "loss": 4.7135, "step": 5460 }, { "epoch": 0.14684938868735725, "grad_norm": 2.2863588333129883, "learning_rate": 4.323845839575106e-05, "loss": 4.8035, "step": 5465 }, { "epoch": 0.14698374311433562, "grad_norm": 2.5088889598846436, "learning_rate": 4.323164918970448e-05, "loss": 4.5936, "step": 5470 }, { "epoch": 0.147118097541314, "grad_norm": 2.1304173469543457, "learning_rate": 4.3224839983657905e-05, "loss": 4.748, "step": 5475 }, { "epoch": 0.14725245196829234, "grad_norm": 2.287482261657715, "learning_rate": 4.3218030777611334e-05, "loss": 4.7375, "step": 5480 }, { "epoch": 0.14738680639527071, "grad_norm": 2.2737138271331787, "learning_rate": 4.3211221571564756e-05, "loss": 4.7367, "step": 5485 }, { "epoch": 0.1475211608222491, "grad_norm": 2.5050153732299805, "learning_rate": 4.3204412365518185e-05, "loss": 4.7159, "step": 5490 }, { "epoch": 0.14765551524922746, "grad_norm": 2.3213109970092773, "learning_rate": 4.319760315947161e-05, "loss": 4.5497, "step": 5495 }, { "epoch": 0.14778986967620583, "grad_norm": 2.4361581802368164, "learning_rate": 4.3190793953425036e-05, "loss": 4.7025, "step": 5500 }, { "epoch": 0.1479242241031842, "grad_norm": 2.4409122467041016, "learning_rate": 4.318398474737846e-05, "loss": 4.693, "step": 5505 }, { "epoch": 0.14805857853016258, "grad_norm": 2.3179500102996826, "learning_rate": 4.317717554133188e-05, "loss": 4.6942, "step": 5510 }, { "epoch": 0.14819293295714095, "grad_norm": 2.516125440597534, "learning_rate": 4.31703663352853e-05, "loss": 4.6148, "step": 5515 }, { "epoch": 0.1483272873841193, "grad_norm": 2.203394889831543, "learning_rate": 4.316355712923873e-05, "loss": 4.7775, "step": 5520 }, { "epoch": 0.14846164181109767, "grad_norm": 2.5464253425598145, "learning_rate": 4.315674792319216e-05, "loss": 4.6528, "step": 5525 }, { "epoch": 0.14859599623807604, "grad_norm": 2.224968671798706, "learning_rate": 4.3149938717145584e-05, "loss": 4.5648, "step": 5530 }, { "epoch": 0.1487303506650544, "grad_norm": 2.447449207305908, "learning_rate": 4.3143129511099006e-05, "loss": 4.6464, "step": 5535 }, { "epoch": 0.14886470509203278, "grad_norm": 2.341639280319214, "learning_rate": 4.3136320305052435e-05, "loss": 4.6618, "step": 5540 }, { "epoch": 0.14899905951901116, "grad_norm": 2.156888961791992, "learning_rate": 4.312951109900586e-05, "loss": 4.7622, "step": 5545 }, { "epoch": 0.14913341394598953, "grad_norm": 2.1902127265930176, "learning_rate": 4.3122701892959286e-05, "loss": 4.6198, "step": 5550 }, { "epoch": 0.1492677683729679, "grad_norm": 2.3121960163116455, "learning_rate": 4.311589268691271e-05, "loss": 4.6535, "step": 5555 }, { "epoch": 0.14940212279994625, "grad_norm": 2.141181468963623, "learning_rate": 4.310908348086613e-05, "loss": 4.7069, "step": 5560 }, { "epoch": 0.14953647722692462, "grad_norm": 2.5161216259002686, "learning_rate": 4.310227427481956e-05, "loss": 4.8493, "step": 5565 }, { "epoch": 0.149670831653903, "grad_norm": 2.417941093444824, "learning_rate": 4.309546506877298e-05, "loss": 4.5883, "step": 5570 }, { "epoch": 0.14980518608088136, "grad_norm": 2.4670495986938477, "learning_rate": 4.3088655862726404e-05, "loss": 4.7349, "step": 5575 }, { "epoch": 0.14993954050785974, "grad_norm": 2.4075560569763184, "learning_rate": 4.308184665667983e-05, "loss": 4.686, "step": 5580 }, { "epoch": 0.1500738949348381, "grad_norm": 2.2981793880462646, "learning_rate": 4.307503745063326e-05, "loss": 4.5618, "step": 5585 }, { "epoch": 0.15020824936181648, "grad_norm": 2.3917644023895264, "learning_rate": 4.3068228244586685e-05, "loss": 4.6336, "step": 5590 }, { "epoch": 0.15034260378879485, "grad_norm": 2.308039903640747, "learning_rate": 4.306141903854011e-05, "loss": 4.6664, "step": 5595 }, { "epoch": 0.1504769582157732, "grad_norm": 2.4636881351470947, "learning_rate": 4.305460983249353e-05, "loss": 4.6192, "step": 5600 }, { "epoch": 0.15061131264275157, "grad_norm": 2.4126856327056885, "learning_rate": 4.304780062644696e-05, "loss": 4.7288, "step": 5605 }, { "epoch": 0.15074566706972994, "grad_norm": 2.3596444129943848, "learning_rate": 4.304099142040039e-05, "loss": 4.7448, "step": 5610 }, { "epoch": 0.15088002149670832, "grad_norm": 2.4230477809906006, "learning_rate": 4.303418221435381e-05, "loss": 4.5681, "step": 5615 }, { "epoch": 0.1510143759236867, "grad_norm": 2.239949941635132, "learning_rate": 4.302737300830723e-05, "loss": 4.6868, "step": 5620 }, { "epoch": 0.15114873035066506, "grad_norm": 2.2889106273651123, "learning_rate": 4.302056380226066e-05, "loss": 4.7044, "step": 5625 }, { "epoch": 0.15128308477764343, "grad_norm": 2.311051368713379, "learning_rate": 4.301375459621408e-05, "loss": 4.631, "step": 5630 }, { "epoch": 0.15141743920462178, "grad_norm": 2.24658465385437, "learning_rate": 4.3006945390167505e-05, "loss": 4.6437, "step": 5635 }, { "epoch": 0.15155179363160015, "grad_norm": 2.239083766937256, "learning_rate": 4.3000136184120934e-05, "loss": 4.7385, "step": 5640 }, { "epoch": 0.15168614805857852, "grad_norm": 2.26863694190979, "learning_rate": 4.2993326978074363e-05, "loss": 4.6555, "step": 5645 }, { "epoch": 0.1518205024855569, "grad_norm": 2.4318182468414307, "learning_rate": 4.2986517772027786e-05, "loss": 4.645, "step": 5650 }, { "epoch": 0.15195485691253527, "grad_norm": 2.3421406745910645, "learning_rate": 4.297970856598121e-05, "loss": 4.6459, "step": 5655 }, { "epoch": 0.15208921133951364, "grad_norm": 2.5247678756713867, "learning_rate": 4.297289935993463e-05, "loss": 4.7227, "step": 5660 }, { "epoch": 0.152223565766492, "grad_norm": 2.3438174724578857, "learning_rate": 4.296609015388806e-05, "loss": 4.7852, "step": 5665 }, { "epoch": 0.1523579201934704, "grad_norm": 2.39847731590271, "learning_rate": 4.295928094784148e-05, "loss": 4.6858, "step": 5670 }, { "epoch": 0.15249227462044873, "grad_norm": 2.4760515689849854, "learning_rate": 4.295247174179491e-05, "loss": 4.6744, "step": 5675 }, { "epoch": 0.1526266290474271, "grad_norm": 2.105417490005493, "learning_rate": 4.294566253574833e-05, "loss": 4.6975, "step": 5680 }, { "epoch": 0.15276098347440548, "grad_norm": 2.1435978412628174, "learning_rate": 4.293885332970176e-05, "loss": 4.6832, "step": 5685 }, { "epoch": 0.15289533790138385, "grad_norm": 2.5127382278442383, "learning_rate": 4.2932044123655184e-05, "loss": 4.6844, "step": 5690 }, { "epoch": 0.15302969232836222, "grad_norm": 2.443847417831421, "learning_rate": 4.2925234917608606e-05, "loss": 4.5977, "step": 5695 }, { "epoch": 0.1531640467553406, "grad_norm": 2.358274221420288, "learning_rate": 4.2918425711562036e-05, "loss": 4.7252, "step": 5700 }, { "epoch": 0.15329840118231897, "grad_norm": 2.2438042163848877, "learning_rate": 4.291161650551546e-05, "loss": 4.7179, "step": 5705 }, { "epoch": 0.15343275560929734, "grad_norm": 2.232947826385498, "learning_rate": 4.290480729946889e-05, "loss": 4.6442, "step": 5710 }, { "epoch": 0.15356711003627568, "grad_norm": 2.2285115718841553, "learning_rate": 4.289799809342231e-05, "loss": 4.7839, "step": 5715 }, { "epoch": 0.15370146446325406, "grad_norm": 2.3586935997009277, "learning_rate": 4.289118888737573e-05, "loss": 4.7326, "step": 5720 }, { "epoch": 0.15383581889023243, "grad_norm": 2.3652799129486084, "learning_rate": 4.2884379681329154e-05, "loss": 4.6875, "step": 5725 }, { "epoch": 0.1539701733172108, "grad_norm": 2.1931145191192627, "learning_rate": 4.287757047528258e-05, "loss": 4.6205, "step": 5730 }, { "epoch": 0.15410452774418917, "grad_norm": 2.5673062801361084, "learning_rate": 4.287076126923601e-05, "loss": 4.7581, "step": 5735 }, { "epoch": 0.15423888217116755, "grad_norm": 2.4709861278533936, "learning_rate": 4.2863952063189434e-05, "loss": 4.6863, "step": 5740 }, { "epoch": 0.15437323659814592, "grad_norm": 2.0905582904815674, "learning_rate": 4.2857142857142856e-05, "loss": 4.7595, "step": 5745 }, { "epoch": 0.1545075910251243, "grad_norm": 2.549795150756836, "learning_rate": 4.2850333651096285e-05, "loss": 4.8004, "step": 5750 }, { "epoch": 0.15464194545210264, "grad_norm": 2.912210464477539, "learning_rate": 4.284352444504971e-05, "loss": 4.4553, "step": 5755 }, { "epoch": 0.154776299879081, "grad_norm": 2.2438201904296875, "learning_rate": 4.283671523900314e-05, "loss": 4.6412, "step": 5760 }, { "epoch": 0.15491065430605938, "grad_norm": 2.1749048233032227, "learning_rate": 4.282990603295656e-05, "loss": 4.6736, "step": 5765 }, { "epoch": 0.15504500873303775, "grad_norm": 2.099368095397949, "learning_rate": 4.282309682690999e-05, "loss": 4.6474, "step": 5770 }, { "epoch": 0.15517936316001613, "grad_norm": 2.3514087200164795, "learning_rate": 4.281628762086341e-05, "loss": 4.5752, "step": 5775 }, { "epoch": 0.1553137175869945, "grad_norm": 2.193413734436035, "learning_rate": 4.280947841481683e-05, "loss": 4.5385, "step": 5780 }, { "epoch": 0.15544807201397287, "grad_norm": 2.358552932739258, "learning_rate": 4.2802669208770255e-05, "loss": 4.6124, "step": 5785 }, { "epoch": 0.15558242644095122, "grad_norm": 2.308378219604492, "learning_rate": 4.2795860002723684e-05, "loss": 4.6325, "step": 5790 }, { "epoch": 0.1557167808679296, "grad_norm": 2.1170716285705566, "learning_rate": 4.278905079667711e-05, "loss": 4.6792, "step": 5795 }, { "epoch": 0.15585113529490796, "grad_norm": 2.5537612438201904, "learning_rate": 4.2782241590630535e-05, "loss": 4.7151, "step": 5800 }, { "epoch": 0.15598548972188633, "grad_norm": 2.303457736968994, "learning_rate": 4.277543238458396e-05, "loss": 4.5988, "step": 5805 }, { "epoch": 0.1561198441488647, "grad_norm": 2.3445920944213867, "learning_rate": 4.2768623178537386e-05, "loss": 4.5099, "step": 5810 }, { "epoch": 0.15625419857584308, "grad_norm": 2.38425874710083, "learning_rate": 4.276181397249081e-05, "loss": 4.7772, "step": 5815 }, { "epoch": 0.15638855300282145, "grad_norm": 2.7910056114196777, "learning_rate": 4.275500476644423e-05, "loss": 4.6721, "step": 5820 }, { "epoch": 0.15652290742979982, "grad_norm": 2.4527087211608887, "learning_rate": 4.274819556039766e-05, "loss": 4.6279, "step": 5825 }, { "epoch": 0.15665726185677817, "grad_norm": 2.4822309017181396, "learning_rate": 4.274138635435109e-05, "loss": 4.6326, "step": 5830 }, { "epoch": 0.15679161628375654, "grad_norm": 2.3668267726898193, "learning_rate": 4.273457714830451e-05, "loss": 4.6652, "step": 5835 }, { "epoch": 0.1569259707107349, "grad_norm": 2.3012495040893555, "learning_rate": 4.2727767942257934e-05, "loss": 4.5839, "step": 5840 }, { "epoch": 0.15706032513771329, "grad_norm": 2.3303887844085693, "learning_rate": 4.2720958736211356e-05, "loss": 4.7545, "step": 5845 }, { "epoch": 0.15719467956469166, "grad_norm": 2.26655650138855, "learning_rate": 4.2714149530164785e-05, "loss": 4.5606, "step": 5850 }, { "epoch": 0.15732903399167003, "grad_norm": 2.1791794300079346, "learning_rate": 4.2707340324118214e-05, "loss": 4.6482, "step": 5855 }, { "epoch": 0.1574633884186484, "grad_norm": 2.3366024494171143, "learning_rate": 4.2700531118071636e-05, "loss": 4.6179, "step": 5860 }, { "epoch": 0.15759774284562678, "grad_norm": 2.2457427978515625, "learning_rate": 4.269372191202506e-05, "loss": 4.5906, "step": 5865 }, { "epoch": 0.15773209727260512, "grad_norm": 2.2892916202545166, "learning_rate": 4.268691270597848e-05, "loss": 4.6369, "step": 5870 }, { "epoch": 0.1578664516995835, "grad_norm": 2.3485372066497803, "learning_rate": 4.268010349993191e-05, "loss": 4.7087, "step": 5875 }, { "epoch": 0.15800080612656187, "grad_norm": 2.357516288757324, "learning_rate": 4.267329429388533e-05, "loss": 4.7383, "step": 5880 }, { "epoch": 0.15813516055354024, "grad_norm": 2.543384313583374, "learning_rate": 4.266648508783876e-05, "loss": 4.5478, "step": 5885 }, { "epoch": 0.1582695149805186, "grad_norm": 2.70294189453125, "learning_rate": 4.2659675881792183e-05, "loss": 4.6505, "step": 5890 }, { "epoch": 0.15840386940749698, "grad_norm": 2.223876714706421, "learning_rate": 4.265286667574561e-05, "loss": 4.6407, "step": 5895 }, { "epoch": 0.15853822383447536, "grad_norm": 2.4264795780181885, "learning_rate": 4.2646057469699035e-05, "loss": 4.8008, "step": 5900 }, { "epoch": 0.15867257826145373, "grad_norm": 2.2152278423309326, "learning_rate": 4.263924826365246e-05, "loss": 4.7099, "step": 5905 }, { "epoch": 0.15880693268843207, "grad_norm": 2.322824239730835, "learning_rate": 4.2632439057605886e-05, "loss": 4.7685, "step": 5910 }, { "epoch": 0.15894128711541045, "grad_norm": 2.4484431743621826, "learning_rate": 4.2625629851559315e-05, "loss": 4.6356, "step": 5915 }, { "epoch": 0.15907564154238882, "grad_norm": 2.285881280899048, "learning_rate": 4.261882064551274e-05, "loss": 4.6492, "step": 5920 }, { "epoch": 0.1592099959693672, "grad_norm": 2.434908151626587, "learning_rate": 4.261201143946616e-05, "loss": 4.6614, "step": 5925 }, { "epoch": 0.15934435039634556, "grad_norm": 2.502322196960449, "learning_rate": 4.260520223341958e-05, "loss": 4.7753, "step": 5930 }, { "epoch": 0.15947870482332394, "grad_norm": 2.623091697692871, "learning_rate": 4.259839302737301e-05, "loss": 4.8025, "step": 5935 }, { "epoch": 0.1596130592503023, "grad_norm": 2.3283679485321045, "learning_rate": 4.259158382132643e-05, "loss": 4.5877, "step": 5940 }, { "epoch": 0.15974741367728065, "grad_norm": 2.4143717288970947, "learning_rate": 4.258477461527986e-05, "loss": 4.6031, "step": 5945 }, { "epoch": 0.15988176810425903, "grad_norm": 2.351607084274292, "learning_rate": 4.2577965409233285e-05, "loss": 4.5914, "step": 5950 }, { "epoch": 0.1600161225312374, "grad_norm": 2.4132354259490967, "learning_rate": 4.2571156203186714e-05, "loss": 4.5631, "step": 5955 }, { "epoch": 0.16015047695821577, "grad_norm": 2.398253917694092, "learning_rate": 4.2564346997140136e-05, "loss": 4.6229, "step": 5960 }, { "epoch": 0.16028483138519414, "grad_norm": 2.437215566635132, "learning_rate": 4.255753779109356e-05, "loss": 4.7297, "step": 5965 }, { "epoch": 0.16041918581217252, "grad_norm": 2.2300703525543213, "learning_rate": 4.255072858504699e-05, "loss": 4.5491, "step": 5970 }, { "epoch": 0.1605535402391509, "grad_norm": 2.309344530105591, "learning_rate": 4.2543919379000416e-05, "loss": 4.7037, "step": 5975 }, { "epoch": 0.16068789466612926, "grad_norm": 2.2457475662231445, "learning_rate": 4.253711017295384e-05, "loss": 4.7216, "step": 5980 }, { "epoch": 0.1608222490931076, "grad_norm": 2.2409584522247314, "learning_rate": 4.253030096690726e-05, "loss": 4.6473, "step": 5985 }, { "epoch": 0.16095660352008598, "grad_norm": 2.3405046463012695, "learning_rate": 4.252349176086068e-05, "loss": 4.6641, "step": 5990 }, { "epoch": 0.16109095794706435, "grad_norm": 2.4117116928100586, "learning_rate": 4.251668255481411e-05, "loss": 4.5869, "step": 5995 }, { "epoch": 0.16122531237404272, "grad_norm": 2.4327499866485596, "learning_rate": 4.2509873348767534e-05, "loss": 4.6004, "step": 6000 }, { "epoch": 0.1613596668010211, "grad_norm": 2.4647257328033447, "learning_rate": 4.250306414272096e-05, "loss": 4.6908, "step": 6005 }, { "epoch": 0.16149402122799947, "grad_norm": 2.4930577278137207, "learning_rate": 4.2496254936674386e-05, "loss": 4.7004, "step": 6010 }, { "epoch": 0.16162837565497784, "grad_norm": 2.3261513710021973, "learning_rate": 4.248944573062781e-05, "loss": 4.6225, "step": 6015 }, { "epoch": 0.1617627300819562, "grad_norm": 2.5002598762512207, "learning_rate": 4.248263652458124e-05, "loss": 4.6243, "step": 6020 }, { "epoch": 0.16189708450893456, "grad_norm": 2.171928882598877, "learning_rate": 4.247582731853466e-05, "loss": 4.7203, "step": 6025 }, { "epoch": 0.16203143893591293, "grad_norm": 2.506356716156006, "learning_rate": 4.246901811248808e-05, "loss": 4.6799, "step": 6030 }, { "epoch": 0.1621657933628913, "grad_norm": 2.3867156505584717, "learning_rate": 4.246220890644151e-05, "loss": 4.6373, "step": 6035 }, { "epoch": 0.16230014778986968, "grad_norm": 2.478376626968384, "learning_rate": 4.245539970039494e-05, "loss": 4.7229, "step": 6040 }, { "epoch": 0.16243450221684805, "grad_norm": 2.2173712253570557, "learning_rate": 4.244859049434836e-05, "loss": 4.6154, "step": 6045 }, { "epoch": 0.16256885664382642, "grad_norm": 2.357283592224121, "learning_rate": 4.2441781288301784e-05, "loss": 4.5737, "step": 6050 }, { "epoch": 0.1627032110708048, "grad_norm": 2.4080758094787598, "learning_rate": 4.2434972082255206e-05, "loss": 4.5552, "step": 6055 }, { "epoch": 0.16283756549778317, "grad_norm": 2.4244191646575928, "learning_rate": 4.2428162876208635e-05, "loss": 4.7059, "step": 6060 }, { "epoch": 0.1629719199247615, "grad_norm": 2.342439889907837, "learning_rate": 4.2421353670162064e-05, "loss": 4.5859, "step": 6065 }, { "epoch": 0.16310627435173988, "grad_norm": 2.381415605545044, "learning_rate": 4.241454446411549e-05, "loss": 4.715, "step": 6070 }, { "epoch": 0.16324062877871826, "grad_norm": 2.187832832336426, "learning_rate": 4.240773525806891e-05, "loss": 4.6729, "step": 6075 }, { "epoch": 0.16337498320569663, "grad_norm": 2.2844033241271973, "learning_rate": 4.240092605202234e-05, "loss": 4.6235, "step": 6080 }, { "epoch": 0.163509337632675, "grad_norm": 2.1520915031433105, "learning_rate": 4.239411684597576e-05, "loss": 4.6282, "step": 6085 }, { "epoch": 0.16364369205965337, "grad_norm": 2.274156332015991, "learning_rate": 4.238730763992918e-05, "loss": 4.5433, "step": 6090 }, { "epoch": 0.16377804648663175, "grad_norm": 2.2229771614074707, "learning_rate": 4.238049843388261e-05, "loss": 4.7427, "step": 6095 }, { "epoch": 0.1639124009136101, "grad_norm": 2.12465238571167, "learning_rate": 4.237368922783604e-05, "loss": 4.6112, "step": 6100 }, { "epoch": 0.16404675534058846, "grad_norm": 2.288067579269409, "learning_rate": 4.236688002178946e-05, "loss": 4.5895, "step": 6105 }, { "epoch": 0.16418110976756684, "grad_norm": 2.296560525894165, "learning_rate": 4.2360070815742885e-05, "loss": 4.5356, "step": 6110 }, { "epoch": 0.1643154641945452, "grad_norm": 2.351196050643921, "learning_rate": 4.235326160969631e-05, "loss": 4.6556, "step": 6115 }, { "epoch": 0.16444981862152358, "grad_norm": 2.287306785583496, "learning_rate": 4.2346452403649737e-05, "loss": 4.5939, "step": 6120 }, { "epoch": 0.16458417304850195, "grad_norm": 2.4288675785064697, "learning_rate": 4.2339643197603166e-05, "loss": 4.5915, "step": 6125 }, { "epoch": 0.16471852747548033, "grad_norm": 2.171098470687866, "learning_rate": 4.233283399155659e-05, "loss": 4.5222, "step": 6130 }, { "epoch": 0.1648528819024587, "grad_norm": 2.141940116882324, "learning_rate": 4.232602478551001e-05, "loss": 4.6666, "step": 6135 }, { "epoch": 0.16498723632943704, "grad_norm": 2.3677592277526855, "learning_rate": 4.231921557946344e-05, "loss": 4.6978, "step": 6140 }, { "epoch": 0.16512159075641542, "grad_norm": 2.283712863922119, "learning_rate": 4.231240637341686e-05, "loss": 4.7622, "step": 6145 }, { "epoch": 0.1652559451833938, "grad_norm": 2.208111047744751, "learning_rate": 4.2305597167370284e-05, "loss": 4.697, "step": 6150 }, { "epoch": 0.16539029961037216, "grad_norm": 2.327491521835327, "learning_rate": 4.229878796132371e-05, "loss": 4.5452, "step": 6155 }, { "epoch": 0.16552465403735053, "grad_norm": 2.6018028259277344, "learning_rate": 4.2291978755277135e-05, "loss": 4.655, "step": 6160 }, { "epoch": 0.1656590084643289, "grad_norm": 2.3526546955108643, "learning_rate": 4.2285169549230564e-05, "loss": 4.5395, "step": 6165 }, { "epoch": 0.16579336289130728, "grad_norm": 2.5430386066436768, "learning_rate": 4.2278360343183986e-05, "loss": 4.5327, "step": 6170 }, { "epoch": 0.16592771731828565, "grad_norm": 2.2715277671813965, "learning_rate": 4.227155113713741e-05, "loss": 4.5757, "step": 6175 }, { "epoch": 0.166062071745264, "grad_norm": 2.0955557823181152, "learning_rate": 4.226474193109084e-05, "loss": 4.6361, "step": 6180 }, { "epoch": 0.16619642617224237, "grad_norm": 2.400583267211914, "learning_rate": 4.225793272504427e-05, "loss": 4.6402, "step": 6185 }, { "epoch": 0.16633078059922074, "grad_norm": 2.170935869216919, "learning_rate": 4.225112351899769e-05, "loss": 4.757, "step": 6190 }, { "epoch": 0.1664651350261991, "grad_norm": 2.524139404296875, "learning_rate": 4.224431431295111e-05, "loss": 4.621, "step": 6195 }, { "epoch": 0.16659948945317749, "grad_norm": 2.2586677074432373, "learning_rate": 4.2237505106904534e-05, "loss": 4.5916, "step": 6200 }, { "epoch": 0.16673384388015586, "grad_norm": 2.3482825756073, "learning_rate": 4.223069590085796e-05, "loss": 4.6336, "step": 6205 }, { "epoch": 0.16686819830713423, "grad_norm": 2.035543918609619, "learning_rate": 4.2223886694811385e-05, "loss": 4.6313, "step": 6210 }, { "epoch": 0.1670025527341126, "grad_norm": 2.2824630737304688, "learning_rate": 4.2217077488764814e-05, "loss": 4.5794, "step": 6215 }, { "epoch": 0.16713690716109095, "grad_norm": 2.4770102500915527, "learning_rate": 4.2210268282718236e-05, "loss": 4.7118, "step": 6220 }, { "epoch": 0.16727126158806932, "grad_norm": 2.48583722114563, "learning_rate": 4.2203459076671665e-05, "loss": 4.6572, "step": 6225 }, { "epoch": 0.1674056160150477, "grad_norm": 2.1995291709899902, "learning_rate": 4.219664987062509e-05, "loss": 4.6766, "step": 6230 }, { "epoch": 0.16753997044202606, "grad_norm": 2.4620847702026367, "learning_rate": 4.218984066457851e-05, "loss": 4.6487, "step": 6235 }, { "epoch": 0.16767432486900444, "grad_norm": 2.292832374572754, "learning_rate": 4.218303145853193e-05, "loss": 4.7578, "step": 6240 }, { "epoch": 0.1678086792959828, "grad_norm": 2.2099673748016357, "learning_rate": 4.217622225248537e-05, "loss": 4.6665, "step": 6245 }, { "epoch": 0.16794303372296118, "grad_norm": 2.0643885135650635, "learning_rate": 4.216941304643879e-05, "loss": 4.7198, "step": 6250 }, { "epoch": 0.16807738814993953, "grad_norm": 2.1985578536987305, "learning_rate": 4.216260384039221e-05, "loss": 4.6381, "step": 6255 }, { "epoch": 0.1682117425769179, "grad_norm": 2.3217663764953613, "learning_rate": 4.2155794634345635e-05, "loss": 4.6219, "step": 6260 }, { "epoch": 0.16834609700389627, "grad_norm": 2.3149125576019287, "learning_rate": 4.2148985428299064e-05, "loss": 4.6082, "step": 6265 }, { "epoch": 0.16848045143087464, "grad_norm": 2.496859073638916, "learning_rate": 4.2142176222252486e-05, "loss": 4.7181, "step": 6270 }, { "epoch": 0.16861480585785302, "grad_norm": 2.0511131286621094, "learning_rate": 4.2135367016205915e-05, "loss": 4.7112, "step": 6275 }, { "epoch": 0.1687491602848314, "grad_norm": 2.260448455810547, "learning_rate": 4.212855781015934e-05, "loss": 4.5699, "step": 6280 }, { "epoch": 0.16888351471180976, "grad_norm": 2.306929349899292, "learning_rate": 4.2121748604112766e-05, "loss": 4.6419, "step": 6285 }, { "epoch": 0.16901786913878813, "grad_norm": 2.1921920776367188, "learning_rate": 4.211493939806619e-05, "loss": 4.7025, "step": 6290 }, { "epoch": 0.16915222356576648, "grad_norm": 2.4268269538879395, "learning_rate": 4.210813019201961e-05, "loss": 4.5908, "step": 6295 }, { "epoch": 0.16928657799274485, "grad_norm": 2.3178722858428955, "learning_rate": 4.210132098597303e-05, "loss": 4.6871, "step": 6300 }, { "epoch": 0.16942093241972322, "grad_norm": 2.2124123573303223, "learning_rate": 4.209451177992646e-05, "loss": 4.7646, "step": 6305 }, { "epoch": 0.1695552868467016, "grad_norm": 2.371185779571533, "learning_rate": 4.208770257387989e-05, "loss": 4.5945, "step": 6310 }, { "epoch": 0.16968964127367997, "grad_norm": 2.21028208732605, "learning_rate": 4.2080893367833313e-05, "loss": 4.6358, "step": 6315 }, { "epoch": 0.16982399570065834, "grad_norm": 2.199416160583496, "learning_rate": 4.2074084161786736e-05, "loss": 4.7773, "step": 6320 }, { "epoch": 0.16995835012763671, "grad_norm": 2.350001096725464, "learning_rate": 4.206727495574016e-05, "loss": 4.5694, "step": 6325 }, { "epoch": 0.1700927045546151, "grad_norm": 2.2529332637786865, "learning_rate": 4.206046574969359e-05, "loss": 4.7092, "step": 6330 }, { "epoch": 0.17022705898159343, "grad_norm": 2.588395357131958, "learning_rate": 4.2053656543647016e-05, "loss": 4.6163, "step": 6335 }, { "epoch": 0.1703614134085718, "grad_norm": 2.208899974822998, "learning_rate": 4.204684733760044e-05, "loss": 4.5044, "step": 6340 }, { "epoch": 0.17049576783555018, "grad_norm": 2.4447290897369385, "learning_rate": 4.204003813155386e-05, "loss": 4.6403, "step": 6345 }, { "epoch": 0.17063012226252855, "grad_norm": 2.3725602626800537, "learning_rate": 4.203322892550729e-05, "loss": 4.6863, "step": 6350 }, { "epoch": 0.17076447668950692, "grad_norm": 2.3884928226470947, "learning_rate": 4.202641971946071e-05, "loss": 4.6021, "step": 6355 }, { "epoch": 0.1708988311164853, "grad_norm": 2.3537652492523193, "learning_rate": 4.2019610513414134e-05, "loss": 4.6261, "step": 6360 }, { "epoch": 0.17103318554346367, "grad_norm": 2.136301040649414, "learning_rate": 4.201280130736756e-05, "loss": 4.649, "step": 6365 }, { "epoch": 0.17116753997044204, "grad_norm": 2.3084094524383545, "learning_rate": 4.200599210132099e-05, "loss": 4.5735, "step": 6370 }, { "epoch": 0.17130189439742038, "grad_norm": 2.2184677124023438, "learning_rate": 4.1999182895274415e-05, "loss": 4.4994, "step": 6375 }, { "epoch": 0.17143624882439876, "grad_norm": 2.2734274864196777, "learning_rate": 4.199237368922784e-05, "loss": 4.4762, "step": 6380 }, { "epoch": 0.17157060325137713, "grad_norm": 2.3786418437957764, "learning_rate": 4.198556448318126e-05, "loss": 4.5552, "step": 6385 }, { "epoch": 0.1717049576783555, "grad_norm": 2.3438937664031982, "learning_rate": 4.197875527713469e-05, "loss": 4.782, "step": 6390 }, { "epoch": 0.17183931210533387, "grad_norm": 2.4480435848236084, "learning_rate": 4.197194607108812e-05, "loss": 4.7028, "step": 6395 }, { "epoch": 0.17197366653231225, "grad_norm": 2.3457813262939453, "learning_rate": 4.196513686504154e-05, "loss": 4.6757, "step": 6400 }, { "epoch": 0.17210802095929062, "grad_norm": 2.4116549491882324, "learning_rate": 4.195832765899496e-05, "loss": 4.5225, "step": 6405 }, { "epoch": 0.17224237538626896, "grad_norm": 2.4665629863739014, "learning_rate": 4.195151845294839e-05, "loss": 4.6547, "step": 6410 }, { "epoch": 0.17237672981324734, "grad_norm": 2.513230562210083, "learning_rate": 4.194470924690181e-05, "loss": 4.5844, "step": 6415 }, { "epoch": 0.1725110842402257, "grad_norm": 2.404723644256592, "learning_rate": 4.1937900040855235e-05, "loss": 4.6189, "step": 6420 }, { "epoch": 0.17264543866720408, "grad_norm": 2.268045425415039, "learning_rate": 4.1931090834808664e-05, "loss": 4.5955, "step": 6425 }, { "epoch": 0.17277979309418245, "grad_norm": 2.355091094970703, "learning_rate": 4.1924281628762093e-05, "loss": 4.634, "step": 6430 }, { "epoch": 0.17291414752116083, "grad_norm": 2.2507715225219727, "learning_rate": 4.1917472422715516e-05, "loss": 4.6452, "step": 6435 }, { "epoch": 0.1730485019481392, "grad_norm": 2.3553433418273926, "learning_rate": 4.191066321666894e-05, "loss": 4.7423, "step": 6440 }, { "epoch": 0.17318285637511757, "grad_norm": 2.272806406021118, "learning_rate": 4.190385401062236e-05, "loss": 4.567, "step": 6445 }, { "epoch": 0.17331721080209592, "grad_norm": 2.4044992923736572, "learning_rate": 4.189704480457578e-05, "loss": 4.5237, "step": 6450 }, { "epoch": 0.1734515652290743, "grad_norm": 2.1048424243927, "learning_rate": 4.189023559852922e-05, "loss": 4.6141, "step": 6455 }, { "epoch": 0.17358591965605266, "grad_norm": 2.1680407524108887, "learning_rate": 4.188342639248264e-05, "loss": 4.6585, "step": 6460 }, { "epoch": 0.17372027408303103, "grad_norm": 2.3715052604675293, "learning_rate": 4.187661718643606e-05, "loss": 4.7898, "step": 6465 }, { "epoch": 0.1738546285100094, "grad_norm": 2.68088960647583, "learning_rate": 4.1869807980389485e-05, "loss": 4.493, "step": 6470 }, { "epoch": 0.17398898293698778, "grad_norm": 2.558058023452759, "learning_rate": 4.1862998774342914e-05, "loss": 4.6277, "step": 6475 }, { "epoch": 0.17412333736396615, "grad_norm": 2.171090841293335, "learning_rate": 4.1856189568296336e-05, "loss": 4.6525, "step": 6480 }, { "epoch": 0.17425769179094452, "grad_norm": 2.1461739540100098, "learning_rate": 4.1849380362249765e-05, "loss": 4.5093, "step": 6485 }, { "epoch": 0.17439204621792287, "grad_norm": 2.1991193294525146, "learning_rate": 4.184257115620319e-05, "loss": 4.7081, "step": 6490 }, { "epoch": 0.17452640064490124, "grad_norm": 2.47517728805542, "learning_rate": 4.183576195015662e-05, "loss": 4.5958, "step": 6495 }, { "epoch": 0.17466075507187961, "grad_norm": 2.183870792388916, "learning_rate": 4.182895274411004e-05, "loss": 4.6201, "step": 6500 }, { "epoch": 0.174795109498858, "grad_norm": 2.359966993331909, "learning_rate": 4.182214353806346e-05, "loss": 4.6841, "step": 6505 }, { "epoch": 0.17492946392583636, "grad_norm": 2.2989532947540283, "learning_rate": 4.1815334332016884e-05, "loss": 4.6966, "step": 6510 }, { "epoch": 0.17506381835281473, "grad_norm": 2.254702091217041, "learning_rate": 4.180852512597031e-05, "loss": 4.6757, "step": 6515 }, { "epoch": 0.1751981727797931, "grad_norm": 2.3578250408172607, "learning_rate": 4.180171591992374e-05, "loss": 4.6516, "step": 6520 }, { "epoch": 0.17533252720677148, "grad_norm": 2.4930083751678467, "learning_rate": 4.1794906713877164e-05, "loss": 4.731, "step": 6525 }, { "epoch": 0.17546688163374982, "grad_norm": 2.198848009109497, "learning_rate": 4.1788097507830586e-05, "loss": 4.7227, "step": 6530 }, { "epoch": 0.1756012360607282, "grad_norm": 2.4210662841796875, "learning_rate": 4.1781288301784015e-05, "loss": 4.5918, "step": 6535 }, { "epoch": 0.17573559048770657, "grad_norm": 2.58354115486145, "learning_rate": 4.177447909573744e-05, "loss": 4.6029, "step": 6540 }, { "epoch": 0.17586994491468494, "grad_norm": 2.2262752056121826, "learning_rate": 4.1767669889690867e-05, "loss": 4.485, "step": 6545 }, { "epoch": 0.1760042993416633, "grad_norm": 2.6898539066314697, "learning_rate": 4.176086068364429e-05, "loss": 4.6133, "step": 6550 }, { "epoch": 0.17613865376864168, "grad_norm": 2.4759645462036133, "learning_rate": 4.175405147759772e-05, "loss": 4.7214, "step": 6555 }, { "epoch": 0.17627300819562006, "grad_norm": 2.338465929031372, "learning_rate": 4.174724227155114e-05, "loss": 4.6771, "step": 6560 }, { "epoch": 0.1764073626225984, "grad_norm": 2.3396828174591064, "learning_rate": 4.174043306550456e-05, "loss": 4.5749, "step": 6565 }, { "epoch": 0.17654171704957677, "grad_norm": 2.555844783782959, "learning_rate": 4.1733623859457985e-05, "loss": 4.4251, "step": 6570 }, { "epoch": 0.17667607147655515, "grad_norm": 2.3667538166046143, "learning_rate": 4.1726814653411414e-05, "loss": 4.6276, "step": 6575 }, { "epoch": 0.17681042590353352, "grad_norm": 2.3648812770843506, "learning_rate": 4.172000544736484e-05, "loss": 4.5702, "step": 6580 }, { "epoch": 0.1769447803305119, "grad_norm": 2.0452256202697754, "learning_rate": 4.1713196241318265e-05, "loss": 4.5153, "step": 6585 }, { "epoch": 0.17707913475749026, "grad_norm": 2.5574960708618164, "learning_rate": 4.170638703527169e-05, "loss": 4.5806, "step": 6590 }, { "epoch": 0.17721348918446864, "grad_norm": 2.6324303150177, "learning_rate": 4.169957782922511e-05, "loss": 4.5472, "step": 6595 }, { "epoch": 0.177347843611447, "grad_norm": 2.534785747528076, "learning_rate": 4.169276862317854e-05, "loss": 4.594, "step": 6600 }, { "epoch": 0.17748219803842535, "grad_norm": 2.409205913543701, "learning_rate": 4.168595941713197e-05, "loss": 4.6887, "step": 6605 }, { "epoch": 0.17761655246540373, "grad_norm": 2.389843463897705, "learning_rate": 4.167915021108539e-05, "loss": 4.5704, "step": 6610 }, { "epoch": 0.1777509068923821, "grad_norm": 2.202401638031006, "learning_rate": 4.167234100503881e-05, "loss": 4.5835, "step": 6615 }, { "epoch": 0.17788526131936047, "grad_norm": 2.4570469856262207, "learning_rate": 4.166553179899224e-05, "loss": 4.6597, "step": 6620 }, { "epoch": 0.17801961574633884, "grad_norm": 2.570322036743164, "learning_rate": 4.1658722592945664e-05, "loss": 4.4766, "step": 6625 }, { "epoch": 0.17815397017331722, "grad_norm": 2.451951026916504, "learning_rate": 4.1651913386899086e-05, "loss": 4.6803, "step": 6630 }, { "epoch": 0.1782883246002956, "grad_norm": 2.2731688022613525, "learning_rate": 4.1645104180852515e-05, "loss": 4.61, "step": 6635 }, { "epoch": 0.17842267902727396, "grad_norm": 2.4533755779266357, "learning_rate": 4.1638294974805944e-05, "loss": 4.6184, "step": 6640 }, { "epoch": 0.1785570334542523, "grad_norm": 2.3204565048217773, "learning_rate": 4.1631485768759366e-05, "loss": 4.5813, "step": 6645 }, { "epoch": 0.17869138788123068, "grad_norm": 2.2887136936187744, "learning_rate": 4.162467656271279e-05, "loss": 4.5791, "step": 6650 }, { "epoch": 0.17882574230820905, "grad_norm": 2.4397172927856445, "learning_rate": 4.161786735666621e-05, "loss": 4.43, "step": 6655 }, { "epoch": 0.17896009673518742, "grad_norm": 2.2695908546447754, "learning_rate": 4.161105815061964e-05, "loss": 4.4514, "step": 6660 }, { "epoch": 0.1790944511621658, "grad_norm": 2.322587013244629, "learning_rate": 4.160424894457306e-05, "loss": 4.6821, "step": 6665 }, { "epoch": 0.17922880558914417, "grad_norm": 2.3344407081604004, "learning_rate": 4.159743973852649e-05, "loss": 4.7318, "step": 6670 }, { "epoch": 0.17936316001612254, "grad_norm": 2.229515790939331, "learning_rate": 4.159063053247991e-05, "loss": 4.6057, "step": 6675 }, { "epoch": 0.17949751444310091, "grad_norm": 2.249580144882202, "learning_rate": 4.158382132643334e-05, "loss": 4.5167, "step": 6680 }, { "epoch": 0.17963186887007926, "grad_norm": 2.192352056503296, "learning_rate": 4.1577012120386765e-05, "loss": 4.6519, "step": 6685 }, { "epoch": 0.17976622329705763, "grad_norm": 2.2247118949890137, "learning_rate": 4.157020291434019e-05, "loss": 4.5879, "step": 6690 }, { "epoch": 0.179900577724036, "grad_norm": 2.4266550540924072, "learning_rate": 4.1563393708293616e-05, "loss": 4.776, "step": 6695 }, { "epoch": 0.18003493215101438, "grad_norm": 2.397467613220215, "learning_rate": 4.1556584502247045e-05, "loss": 4.6221, "step": 6700 }, { "epoch": 0.18016928657799275, "grad_norm": 2.3627591133117676, "learning_rate": 4.154977529620047e-05, "loss": 4.6666, "step": 6705 }, { "epoch": 0.18030364100497112, "grad_norm": 2.3746418952941895, "learning_rate": 4.154296609015389e-05, "loss": 4.5638, "step": 6710 }, { "epoch": 0.1804379954319495, "grad_norm": 2.334683895111084, "learning_rate": 4.153615688410731e-05, "loss": 4.4996, "step": 6715 }, { "epoch": 0.18057234985892784, "grad_norm": 2.2431259155273438, "learning_rate": 4.152934767806074e-05, "loss": 4.6149, "step": 6720 }, { "epoch": 0.1807067042859062, "grad_norm": 2.3584394454956055, "learning_rate": 4.152253847201416e-05, "loss": 4.5275, "step": 6725 }, { "epoch": 0.18084105871288458, "grad_norm": 2.269402265548706, "learning_rate": 4.151572926596759e-05, "loss": 4.4714, "step": 6730 }, { "epoch": 0.18097541313986296, "grad_norm": 2.4162747859954834, "learning_rate": 4.1508920059921014e-05, "loss": 4.6992, "step": 6735 }, { "epoch": 0.18110976756684133, "grad_norm": 2.2333920001983643, "learning_rate": 4.1502110853874444e-05, "loss": 4.69, "step": 6740 }, { "epoch": 0.1812441219938197, "grad_norm": 2.289665699005127, "learning_rate": 4.1495301647827866e-05, "loss": 4.6365, "step": 6745 }, { "epoch": 0.18137847642079807, "grad_norm": 2.664175033569336, "learning_rate": 4.148849244178129e-05, "loss": 4.5822, "step": 6750 }, { "epoch": 0.18151283084777645, "grad_norm": 2.292074680328369, "learning_rate": 4.148168323573472e-05, "loss": 4.4943, "step": 6755 }, { "epoch": 0.1816471852747548, "grad_norm": 2.5145933628082275, "learning_rate": 4.147487402968814e-05, "loss": 4.6949, "step": 6760 }, { "epoch": 0.18178153970173316, "grad_norm": 2.436025619506836, "learning_rate": 4.146806482364157e-05, "loss": 4.5713, "step": 6765 }, { "epoch": 0.18191589412871154, "grad_norm": 2.1950080394744873, "learning_rate": 4.146125561759499e-05, "loss": 4.577, "step": 6770 }, { "epoch": 0.1820502485556899, "grad_norm": 2.2889373302459717, "learning_rate": 4.145444641154841e-05, "loss": 4.5313, "step": 6775 }, { "epoch": 0.18218460298266828, "grad_norm": 2.360623836517334, "learning_rate": 4.1447637205501835e-05, "loss": 4.5529, "step": 6780 }, { "epoch": 0.18231895740964665, "grad_norm": 2.2756640911102295, "learning_rate": 4.1440827999455264e-05, "loss": 4.6463, "step": 6785 }, { "epoch": 0.18245331183662503, "grad_norm": 2.3278796672821045, "learning_rate": 4.143401879340869e-05, "loss": 4.6218, "step": 6790 }, { "epoch": 0.1825876662636034, "grad_norm": 2.2654950618743896, "learning_rate": 4.1427209587362116e-05, "loss": 4.6451, "step": 6795 }, { "epoch": 0.18272202069058174, "grad_norm": 2.2796127796173096, "learning_rate": 4.142040038131554e-05, "loss": 4.5603, "step": 6800 }, { "epoch": 0.18285637511756012, "grad_norm": 2.37497878074646, "learning_rate": 4.141359117526897e-05, "loss": 4.5774, "step": 6805 }, { "epoch": 0.1829907295445385, "grad_norm": 2.328749895095825, "learning_rate": 4.140678196922239e-05, "loss": 4.5676, "step": 6810 }, { "epoch": 0.18312508397151686, "grad_norm": 2.256537437438965, "learning_rate": 4.139997276317582e-05, "loss": 4.6365, "step": 6815 }, { "epoch": 0.18325943839849523, "grad_norm": 2.4028425216674805, "learning_rate": 4.139316355712924e-05, "loss": 4.4725, "step": 6820 }, { "epoch": 0.1833937928254736, "grad_norm": 2.1181318759918213, "learning_rate": 4.138635435108267e-05, "loss": 4.6774, "step": 6825 }, { "epoch": 0.18352814725245198, "grad_norm": 2.459195852279663, "learning_rate": 4.137954514503609e-05, "loss": 4.7415, "step": 6830 }, { "epoch": 0.18366250167943035, "grad_norm": 2.4519217014312744, "learning_rate": 4.1372735938989514e-05, "loss": 4.5747, "step": 6835 }, { "epoch": 0.1837968561064087, "grad_norm": 2.4589717388153076, "learning_rate": 4.1365926732942936e-05, "loss": 4.5586, "step": 6840 }, { "epoch": 0.18393121053338707, "grad_norm": 2.2202322483062744, "learning_rate": 4.1359117526896365e-05, "loss": 4.5939, "step": 6845 }, { "epoch": 0.18406556496036544, "grad_norm": 2.31744647026062, "learning_rate": 4.1352308320849794e-05, "loss": 4.5411, "step": 6850 }, { "epoch": 0.1841999193873438, "grad_norm": 2.469985008239746, "learning_rate": 4.134549911480322e-05, "loss": 4.686, "step": 6855 }, { "epoch": 0.18433427381432219, "grad_norm": 2.34891414642334, "learning_rate": 4.133868990875664e-05, "loss": 4.5964, "step": 6860 }, { "epoch": 0.18446862824130056, "grad_norm": 2.565234661102295, "learning_rate": 4.133188070271007e-05, "loss": 4.354, "step": 6865 }, { "epoch": 0.18460298266827893, "grad_norm": 2.820161819458008, "learning_rate": 4.132507149666349e-05, "loss": 4.5853, "step": 6870 }, { "epoch": 0.18473733709525728, "grad_norm": 2.431040048599243, "learning_rate": 4.131826229061691e-05, "loss": 4.5148, "step": 6875 }, { "epoch": 0.18487169152223565, "grad_norm": 2.17570161819458, "learning_rate": 4.131145308457034e-05, "loss": 4.5879, "step": 6880 }, { "epoch": 0.18500604594921402, "grad_norm": 2.269704580307007, "learning_rate": 4.130464387852377e-05, "loss": 4.6893, "step": 6885 }, { "epoch": 0.1851404003761924, "grad_norm": 2.3647964000701904, "learning_rate": 4.129783467247719e-05, "loss": 4.5912, "step": 6890 }, { "epoch": 0.18527475480317077, "grad_norm": 2.4728124141693115, "learning_rate": 4.1291025466430615e-05, "loss": 4.5582, "step": 6895 }, { "epoch": 0.18540910923014914, "grad_norm": 2.4384915828704834, "learning_rate": 4.128421626038404e-05, "loss": 4.6313, "step": 6900 }, { "epoch": 0.1855434636571275, "grad_norm": 2.3080649375915527, "learning_rate": 4.1277407054337467e-05, "loss": 4.51, "step": 6905 }, { "epoch": 0.18567781808410588, "grad_norm": 2.2000319957733154, "learning_rate": 4.1270597848290896e-05, "loss": 4.5785, "step": 6910 }, { "epoch": 0.18581217251108423, "grad_norm": 2.1962084770202637, "learning_rate": 4.126378864224432e-05, "loss": 4.4365, "step": 6915 }, { "epoch": 0.1859465269380626, "grad_norm": 2.268049478530884, "learning_rate": 4.125697943619774e-05, "loss": 4.6267, "step": 6920 }, { "epoch": 0.18608088136504097, "grad_norm": 2.5571939945220947, "learning_rate": 4.125017023015116e-05, "loss": 4.7028, "step": 6925 }, { "epoch": 0.18621523579201935, "grad_norm": 2.339184284210205, "learning_rate": 4.124336102410459e-05, "loss": 4.5349, "step": 6930 }, { "epoch": 0.18634959021899772, "grad_norm": 2.3793368339538574, "learning_rate": 4.1236551818058014e-05, "loss": 4.6581, "step": 6935 }, { "epoch": 0.1864839446459761, "grad_norm": 2.4376611709594727, "learning_rate": 4.122974261201144e-05, "loss": 4.653, "step": 6940 }, { "epoch": 0.18661829907295446, "grad_norm": 2.4305341243743896, "learning_rate": 4.1222933405964865e-05, "loss": 4.6353, "step": 6945 }, { "epoch": 0.18675265349993284, "grad_norm": 2.409497022628784, "learning_rate": 4.1216124199918294e-05, "loss": 4.5536, "step": 6950 }, { "epoch": 0.18688700792691118, "grad_norm": 2.2604665756225586, "learning_rate": 4.1209314993871716e-05, "loss": 4.6089, "step": 6955 }, { "epoch": 0.18702136235388955, "grad_norm": 2.374154806137085, "learning_rate": 4.120250578782514e-05, "loss": 4.635, "step": 6960 }, { "epoch": 0.18715571678086793, "grad_norm": 2.243194341659546, "learning_rate": 4.119569658177857e-05, "loss": 4.726, "step": 6965 }, { "epoch": 0.1872900712078463, "grad_norm": 2.1959033012390137, "learning_rate": 4.1188887375732e-05, "loss": 4.6781, "step": 6970 }, { "epoch": 0.18742442563482467, "grad_norm": 2.6171998977661133, "learning_rate": 4.118207816968542e-05, "loss": 4.6393, "step": 6975 }, { "epoch": 0.18755878006180304, "grad_norm": 2.568899393081665, "learning_rate": 4.117526896363884e-05, "loss": 4.6268, "step": 6980 }, { "epoch": 0.18769313448878142, "grad_norm": 2.537750244140625, "learning_rate": 4.1168459757592263e-05, "loss": 4.6418, "step": 6985 }, { "epoch": 0.18782748891575976, "grad_norm": 2.4498329162597656, "learning_rate": 4.116165055154569e-05, "loss": 4.527, "step": 6990 }, { "epoch": 0.18796184334273813, "grad_norm": 2.211693286895752, "learning_rate": 4.1154841345499115e-05, "loss": 4.4841, "step": 6995 }, { "epoch": 0.1880961977697165, "grad_norm": 2.204399347305298, "learning_rate": 4.1148032139452544e-05, "loss": 4.5687, "step": 7000 }, { "epoch": 0.18823055219669488, "grad_norm": 2.229062795639038, "learning_rate": 4.1141222933405966e-05, "loss": 4.6291, "step": 7005 }, { "epoch": 0.18836490662367325, "grad_norm": 2.236020565032959, "learning_rate": 4.1134413727359395e-05, "loss": 4.5222, "step": 7010 }, { "epoch": 0.18849926105065162, "grad_norm": 2.2904069423675537, "learning_rate": 4.112760452131282e-05, "loss": 4.7043, "step": 7015 }, { "epoch": 0.18863361547763, "grad_norm": 2.5037553310394287, "learning_rate": 4.112079531526624e-05, "loss": 4.4712, "step": 7020 }, { "epoch": 0.18876796990460837, "grad_norm": 2.514737606048584, "learning_rate": 4.111398610921967e-05, "loss": 4.561, "step": 7025 }, { "epoch": 0.1889023243315867, "grad_norm": 2.462205171585083, "learning_rate": 4.11071769031731e-05, "loss": 4.4063, "step": 7030 }, { "epoch": 0.18903667875856509, "grad_norm": 2.276282548904419, "learning_rate": 4.110036769712652e-05, "loss": 4.5442, "step": 7035 }, { "epoch": 0.18917103318554346, "grad_norm": 2.3674912452697754, "learning_rate": 4.109355849107994e-05, "loss": 4.5329, "step": 7040 }, { "epoch": 0.18930538761252183, "grad_norm": 2.249842643737793, "learning_rate": 4.1086749285033365e-05, "loss": 4.5903, "step": 7045 }, { "epoch": 0.1894397420395002, "grad_norm": 2.415100574493408, "learning_rate": 4.107994007898679e-05, "loss": 4.5262, "step": 7050 }, { "epoch": 0.18957409646647858, "grad_norm": 2.2215616703033447, "learning_rate": 4.1073130872940216e-05, "loss": 4.5712, "step": 7055 }, { "epoch": 0.18970845089345695, "grad_norm": 2.207418918609619, "learning_rate": 4.1066321666893645e-05, "loss": 4.7247, "step": 7060 }, { "epoch": 0.18984280532043532, "grad_norm": 2.1361162662506104, "learning_rate": 4.105951246084707e-05, "loss": 4.6041, "step": 7065 }, { "epoch": 0.18997715974741367, "grad_norm": 2.2110280990600586, "learning_rate": 4.105270325480049e-05, "loss": 4.4982, "step": 7070 }, { "epoch": 0.19011151417439204, "grad_norm": 2.2744858264923096, "learning_rate": 4.104589404875392e-05, "loss": 4.5524, "step": 7075 }, { "epoch": 0.1902458686013704, "grad_norm": 2.5295419692993164, "learning_rate": 4.103908484270734e-05, "loss": 4.4965, "step": 7080 }, { "epoch": 0.19038022302834878, "grad_norm": 2.436302423477173, "learning_rate": 4.103227563666076e-05, "loss": 4.5, "step": 7085 }, { "epoch": 0.19051457745532716, "grad_norm": 2.296764373779297, "learning_rate": 4.102546643061419e-05, "loss": 4.5614, "step": 7090 }, { "epoch": 0.19064893188230553, "grad_norm": 2.4771785736083984, "learning_rate": 4.101865722456762e-05, "loss": 4.5643, "step": 7095 }, { "epoch": 0.1907832863092839, "grad_norm": 2.495366334915161, "learning_rate": 4.1011848018521043e-05, "loss": 4.4974, "step": 7100 }, { "epoch": 0.19091764073626227, "grad_norm": 2.2070071697235107, "learning_rate": 4.1005038812474466e-05, "loss": 4.6395, "step": 7105 }, { "epoch": 0.19105199516324062, "grad_norm": 2.448603630065918, "learning_rate": 4.099822960642789e-05, "loss": 4.5929, "step": 7110 }, { "epoch": 0.191186349590219, "grad_norm": 2.2084875106811523, "learning_rate": 4.099142040038132e-05, "loss": 4.6274, "step": 7115 }, { "epoch": 0.19132070401719736, "grad_norm": 2.280116319656372, "learning_rate": 4.0984611194334746e-05, "loss": 4.5419, "step": 7120 }, { "epoch": 0.19145505844417574, "grad_norm": 2.4503848552703857, "learning_rate": 4.097780198828817e-05, "loss": 4.6398, "step": 7125 }, { "epoch": 0.1915894128711541, "grad_norm": 2.2648165225982666, "learning_rate": 4.097099278224159e-05, "loss": 4.5188, "step": 7130 }, { "epoch": 0.19172376729813248, "grad_norm": 2.187908411026001, "learning_rate": 4.096418357619502e-05, "loss": 4.3556, "step": 7135 }, { "epoch": 0.19185812172511085, "grad_norm": 2.270920753479004, "learning_rate": 4.095737437014844e-05, "loss": 4.5919, "step": 7140 }, { "epoch": 0.1919924761520892, "grad_norm": 2.424598217010498, "learning_rate": 4.0950565164101864e-05, "loss": 4.5214, "step": 7145 }, { "epoch": 0.19212683057906757, "grad_norm": 2.10286021232605, "learning_rate": 4.094375595805529e-05, "loss": 4.5211, "step": 7150 }, { "epoch": 0.19226118500604594, "grad_norm": 2.268779754638672, "learning_rate": 4.093694675200872e-05, "loss": 4.5586, "step": 7155 }, { "epoch": 0.19239553943302432, "grad_norm": 2.3719546794891357, "learning_rate": 4.0930137545962145e-05, "loss": 4.5612, "step": 7160 }, { "epoch": 0.1925298938600027, "grad_norm": 2.2428412437438965, "learning_rate": 4.092332833991557e-05, "loss": 4.5811, "step": 7165 }, { "epoch": 0.19266424828698106, "grad_norm": 2.2108829021453857, "learning_rate": 4.091651913386899e-05, "loss": 4.4952, "step": 7170 }, { "epoch": 0.19279860271395943, "grad_norm": 2.4711620807647705, "learning_rate": 4.090970992782242e-05, "loss": 4.4856, "step": 7175 }, { "epoch": 0.1929329571409378, "grad_norm": 2.248721122741699, "learning_rate": 4.090290072177585e-05, "loss": 4.4079, "step": 7180 }, { "epoch": 0.19306731156791615, "grad_norm": 2.2443575859069824, "learning_rate": 4.089609151572927e-05, "loss": 4.5448, "step": 7185 }, { "epoch": 0.19320166599489452, "grad_norm": 2.6840131282806396, "learning_rate": 4.088928230968269e-05, "loss": 4.5768, "step": 7190 }, { "epoch": 0.1933360204218729, "grad_norm": 2.3117644786834717, "learning_rate": 4.0882473103636114e-05, "loss": 4.5582, "step": 7195 }, { "epoch": 0.19347037484885127, "grad_norm": 2.3625028133392334, "learning_rate": 4.087566389758954e-05, "loss": 4.5589, "step": 7200 }, { "epoch": 0.19360472927582964, "grad_norm": 2.1942670345306396, "learning_rate": 4.0868854691542965e-05, "loss": 4.5218, "step": 7205 }, { "epoch": 0.193739083702808, "grad_norm": 2.1714208126068115, "learning_rate": 4.0862045485496394e-05, "loss": 4.5744, "step": 7210 }, { "epoch": 0.19387343812978639, "grad_norm": 2.3121001720428467, "learning_rate": 4.0855236279449817e-05, "loss": 4.4298, "step": 7215 }, { "epoch": 0.19400779255676476, "grad_norm": 2.083580493927002, "learning_rate": 4.0848427073403246e-05, "loss": 4.5238, "step": 7220 }, { "epoch": 0.1941421469837431, "grad_norm": 2.2846081256866455, "learning_rate": 4.084161786735667e-05, "loss": 4.6132, "step": 7225 }, { "epoch": 0.19427650141072147, "grad_norm": 2.436431884765625, "learning_rate": 4.083480866131009e-05, "loss": 4.6542, "step": 7230 }, { "epoch": 0.19441085583769985, "grad_norm": 2.298214912414551, "learning_rate": 4.082799945526352e-05, "loss": 4.4369, "step": 7235 }, { "epoch": 0.19454521026467822, "grad_norm": 2.5868639945983887, "learning_rate": 4.082119024921695e-05, "loss": 4.5831, "step": 7240 }, { "epoch": 0.1946795646916566, "grad_norm": 2.4647202491760254, "learning_rate": 4.081438104317037e-05, "loss": 4.6139, "step": 7245 }, { "epoch": 0.19481391911863497, "grad_norm": 2.2689716815948486, "learning_rate": 4.080757183712379e-05, "loss": 4.4812, "step": 7250 }, { "epoch": 0.19494827354561334, "grad_norm": 2.316291570663452, "learning_rate": 4.0800762631077215e-05, "loss": 4.5507, "step": 7255 }, { "epoch": 0.1950826279725917, "grad_norm": 2.335848569869995, "learning_rate": 4.0793953425030644e-05, "loss": 4.502, "step": 7260 }, { "epoch": 0.19521698239957005, "grad_norm": 2.3392393589019775, "learning_rate": 4.0787144218984066e-05, "loss": 4.524, "step": 7265 }, { "epoch": 0.19535133682654843, "grad_norm": 2.3226287364959717, "learning_rate": 4.0780335012937495e-05, "loss": 4.6766, "step": 7270 }, { "epoch": 0.1954856912535268, "grad_norm": 2.3067502975463867, "learning_rate": 4.077352580689092e-05, "loss": 4.5135, "step": 7275 }, { "epoch": 0.19562004568050517, "grad_norm": 2.4793460369110107, "learning_rate": 4.076671660084435e-05, "loss": 4.5692, "step": 7280 }, { "epoch": 0.19575440010748354, "grad_norm": 2.058846950531006, "learning_rate": 4.075990739479777e-05, "loss": 4.503, "step": 7285 }, { "epoch": 0.19588875453446192, "grad_norm": 2.3449316024780273, "learning_rate": 4.075309818875119e-05, "loss": 4.6599, "step": 7290 }, { "epoch": 0.1960231089614403, "grad_norm": 2.3653762340545654, "learning_rate": 4.0746288982704614e-05, "loss": 4.4842, "step": 7295 }, { "epoch": 0.19615746338841863, "grad_norm": 2.206763744354248, "learning_rate": 4.073947977665805e-05, "loss": 4.5657, "step": 7300 }, { "epoch": 0.196291817815397, "grad_norm": 2.232729196548462, "learning_rate": 4.073267057061147e-05, "loss": 4.4914, "step": 7305 }, { "epoch": 0.19642617224237538, "grad_norm": 2.45845103263855, "learning_rate": 4.0725861364564894e-05, "loss": 4.4808, "step": 7310 }, { "epoch": 0.19656052666935375, "grad_norm": 2.508060932159424, "learning_rate": 4.0719052158518316e-05, "loss": 4.5533, "step": 7315 }, { "epoch": 0.19669488109633212, "grad_norm": 2.391829013824463, "learning_rate": 4.0712242952471745e-05, "loss": 4.4748, "step": 7320 }, { "epoch": 0.1968292355233105, "grad_norm": 2.2996342182159424, "learning_rate": 4.070543374642517e-05, "loss": 4.5077, "step": 7325 }, { "epoch": 0.19696358995028887, "grad_norm": 2.506173610687256, "learning_rate": 4.0698624540378597e-05, "loss": 4.4025, "step": 7330 }, { "epoch": 0.19709794437726724, "grad_norm": 2.3830065727233887, "learning_rate": 4.069181533433202e-05, "loss": 4.5319, "step": 7335 }, { "epoch": 0.1972322988042456, "grad_norm": 2.4049127101898193, "learning_rate": 4.068500612828544e-05, "loss": 4.5387, "step": 7340 }, { "epoch": 0.19736665323122396, "grad_norm": 2.340664863586426, "learning_rate": 4.067819692223887e-05, "loss": 4.6736, "step": 7345 }, { "epoch": 0.19750100765820233, "grad_norm": 2.5061004161834717, "learning_rate": 4.067138771619229e-05, "loss": 4.6041, "step": 7350 }, { "epoch": 0.1976353620851807, "grad_norm": 2.234525680541992, "learning_rate": 4.0664578510145715e-05, "loss": 4.5771, "step": 7355 }, { "epoch": 0.19776971651215908, "grad_norm": 2.337937116622925, "learning_rate": 4.0657769304099144e-05, "loss": 4.52, "step": 7360 }, { "epoch": 0.19790407093913745, "grad_norm": 2.4057869911193848, "learning_rate": 4.065096009805257e-05, "loss": 4.5733, "step": 7365 }, { "epoch": 0.19803842536611582, "grad_norm": 2.2221574783325195, "learning_rate": 4.0644150892005995e-05, "loss": 4.5917, "step": 7370 }, { "epoch": 0.1981727797930942, "grad_norm": 2.2738208770751953, "learning_rate": 4.063734168595942e-05, "loss": 4.5989, "step": 7375 }, { "epoch": 0.19830713422007254, "grad_norm": 2.4379289150238037, "learning_rate": 4.063053247991284e-05, "loss": 4.4874, "step": 7380 }, { "epoch": 0.1984414886470509, "grad_norm": 2.1695709228515625, "learning_rate": 4.062372327386627e-05, "loss": 4.5976, "step": 7385 }, { "epoch": 0.19857584307402928, "grad_norm": 2.3560855388641357, "learning_rate": 4.06169140678197e-05, "loss": 4.5486, "step": 7390 }, { "epoch": 0.19871019750100766, "grad_norm": 2.196437358856201, "learning_rate": 4.061010486177312e-05, "loss": 4.5423, "step": 7395 }, { "epoch": 0.19884455192798603, "grad_norm": 2.367628812789917, "learning_rate": 4.060329565572654e-05, "loss": 4.5537, "step": 7400 }, { "epoch": 0.1989789063549644, "grad_norm": 2.389923572540283, "learning_rate": 4.059648644967997e-05, "loss": 4.5587, "step": 7405 }, { "epoch": 0.19911326078194277, "grad_norm": 2.2588648796081543, "learning_rate": 4.0589677243633394e-05, "loss": 4.5655, "step": 7410 }, { "epoch": 0.19924761520892115, "grad_norm": 2.048673629760742, "learning_rate": 4.0582868037586816e-05, "loss": 4.4984, "step": 7415 }, { "epoch": 0.1993819696358995, "grad_norm": 2.3108294010162354, "learning_rate": 4.0576058831540245e-05, "loss": 4.5398, "step": 7420 }, { "epoch": 0.19951632406287786, "grad_norm": 2.3519372940063477, "learning_rate": 4.0569249625493674e-05, "loss": 4.5037, "step": 7425 }, { "epoch": 0.19965067848985624, "grad_norm": 2.3422770500183105, "learning_rate": 4.0562440419447096e-05, "loss": 4.5414, "step": 7430 }, { "epoch": 0.1997850329168346, "grad_norm": 3.085261344909668, "learning_rate": 4.055563121340052e-05, "loss": 4.5873, "step": 7435 }, { "epoch": 0.19991938734381298, "grad_norm": 2.5949223041534424, "learning_rate": 4.054882200735394e-05, "loss": 4.6393, "step": 7440 }, { "epoch": 0.20005374177079135, "grad_norm": 2.243067502975464, "learning_rate": 4.054201280130737e-05, "loss": 4.5188, "step": 7445 }, { "epoch": 0.20018809619776973, "grad_norm": 2.3928871154785156, "learning_rate": 4.05352035952608e-05, "loss": 4.4724, "step": 7450 }, { "epoch": 0.20032245062474807, "grad_norm": 2.176637887954712, "learning_rate": 4.052839438921422e-05, "loss": 4.504, "step": 7455 }, { "epoch": 0.20045680505172644, "grad_norm": 2.3238308429718018, "learning_rate": 4.052158518316764e-05, "loss": 4.6474, "step": 7460 }, { "epoch": 0.20059115947870482, "grad_norm": 2.492518186569214, "learning_rate": 4.051477597712107e-05, "loss": 4.6531, "step": 7465 }, { "epoch": 0.2007255139056832, "grad_norm": 2.3349030017852783, "learning_rate": 4.0507966771074495e-05, "loss": 4.5371, "step": 7470 }, { "epoch": 0.20085986833266156, "grad_norm": 2.2879385948181152, "learning_rate": 4.050115756502792e-05, "loss": 4.566, "step": 7475 }, { "epoch": 0.20099422275963993, "grad_norm": 2.3234596252441406, "learning_rate": 4.0494348358981346e-05, "loss": 4.4785, "step": 7480 }, { "epoch": 0.2011285771866183, "grad_norm": 2.399777412414551, "learning_rate": 4.0487539152934775e-05, "loss": 4.6115, "step": 7485 }, { "epoch": 0.20126293161359668, "grad_norm": 2.2911088466644287, "learning_rate": 4.04807299468882e-05, "loss": 4.5406, "step": 7490 }, { "epoch": 0.20139728604057502, "grad_norm": 2.217829704284668, "learning_rate": 4.047392074084162e-05, "loss": 4.6243, "step": 7495 }, { "epoch": 0.2015316404675534, "grad_norm": 2.7082183361053467, "learning_rate": 4.046711153479504e-05, "loss": 4.5976, "step": 7500 }, { "epoch": 0.20166599489453177, "grad_norm": 2.781736373901367, "learning_rate": 4.0460302328748464e-05, "loss": 4.4752, "step": 7505 }, { "epoch": 0.20180034932151014, "grad_norm": 2.045954704284668, "learning_rate": 4.04534931227019e-05, "loss": 4.5119, "step": 7510 }, { "epoch": 0.20193470374848851, "grad_norm": 2.3848822116851807, "learning_rate": 4.044668391665532e-05, "loss": 4.5257, "step": 7515 }, { "epoch": 0.2020690581754669, "grad_norm": 2.1805412769317627, "learning_rate": 4.0439874710608744e-05, "loss": 4.5548, "step": 7520 }, { "epoch": 0.20220341260244526, "grad_norm": 2.234635829925537, "learning_rate": 4.043306550456217e-05, "loss": 4.6258, "step": 7525 }, { "epoch": 0.20233776702942363, "grad_norm": 2.2407567501068115, "learning_rate": 4.0426256298515596e-05, "loss": 4.4839, "step": 7530 }, { "epoch": 0.20247212145640198, "grad_norm": 2.2742607593536377, "learning_rate": 4.041944709246902e-05, "loss": 4.534, "step": 7535 }, { "epoch": 0.20260647588338035, "grad_norm": 2.6407322883605957, "learning_rate": 4.041263788642245e-05, "loss": 4.4093, "step": 7540 }, { "epoch": 0.20274083031035872, "grad_norm": 2.2201759815216064, "learning_rate": 4.040582868037587e-05, "loss": 4.3755, "step": 7545 }, { "epoch": 0.2028751847373371, "grad_norm": 2.5236337184906006, "learning_rate": 4.03990194743293e-05, "loss": 4.5846, "step": 7550 }, { "epoch": 0.20300953916431547, "grad_norm": 2.612666368484497, "learning_rate": 4.039221026828272e-05, "loss": 4.4706, "step": 7555 }, { "epoch": 0.20314389359129384, "grad_norm": 2.3540444374084473, "learning_rate": 4.038540106223614e-05, "loss": 4.462, "step": 7560 }, { "epoch": 0.2032782480182722, "grad_norm": 2.1694231033325195, "learning_rate": 4.0378591856189565e-05, "loss": 4.5415, "step": 7565 }, { "epoch": 0.20341260244525058, "grad_norm": 2.3195013999938965, "learning_rate": 4.0371782650142994e-05, "loss": 4.6151, "step": 7570 }, { "epoch": 0.20354695687222893, "grad_norm": 2.4916484355926514, "learning_rate": 4.036497344409642e-05, "loss": 4.4776, "step": 7575 }, { "epoch": 0.2036813112992073, "grad_norm": 2.20165753364563, "learning_rate": 4.0358164238049846e-05, "loss": 4.6301, "step": 7580 }, { "epoch": 0.20381566572618567, "grad_norm": 2.3772010803222656, "learning_rate": 4.035135503200327e-05, "loss": 4.4669, "step": 7585 }, { "epoch": 0.20395002015316405, "grad_norm": 2.5744669437408447, "learning_rate": 4.03445458259567e-05, "loss": 4.4857, "step": 7590 }, { "epoch": 0.20408437458014242, "grad_norm": 2.0861997604370117, "learning_rate": 4.033773661991012e-05, "loss": 4.5762, "step": 7595 }, { "epoch": 0.2042187290071208, "grad_norm": 2.240452527999878, "learning_rate": 4.033092741386355e-05, "loss": 4.4403, "step": 7600 }, { "epoch": 0.20435308343409916, "grad_norm": 2.383476495742798, "learning_rate": 4.032411820781697e-05, "loss": 4.483, "step": 7605 }, { "epoch": 0.2044874378610775, "grad_norm": 2.3453216552734375, "learning_rate": 4.03173090017704e-05, "loss": 4.5329, "step": 7610 }, { "epoch": 0.20462179228805588, "grad_norm": 2.2961390018463135, "learning_rate": 4.031049979572382e-05, "loss": 4.4499, "step": 7615 }, { "epoch": 0.20475614671503425, "grad_norm": 2.4231982231140137, "learning_rate": 4.0303690589677244e-05, "loss": 4.4969, "step": 7620 }, { "epoch": 0.20489050114201263, "grad_norm": 2.271404266357422, "learning_rate": 4.0296881383630666e-05, "loss": 4.5771, "step": 7625 }, { "epoch": 0.205024855568991, "grad_norm": 2.2238411903381348, "learning_rate": 4.0290072177584095e-05, "loss": 4.6415, "step": 7630 }, { "epoch": 0.20515920999596937, "grad_norm": 2.163578987121582, "learning_rate": 4.0283262971537524e-05, "loss": 4.6844, "step": 7635 }, { "epoch": 0.20529356442294774, "grad_norm": 2.4209017753601074, "learning_rate": 4.027645376549095e-05, "loss": 4.5357, "step": 7640 }, { "epoch": 0.20542791884992612, "grad_norm": 2.2471983432769775, "learning_rate": 4.026964455944437e-05, "loss": 4.664, "step": 7645 }, { "epoch": 0.20556227327690446, "grad_norm": 2.440857410430908, "learning_rate": 4.026283535339779e-05, "loss": 4.4413, "step": 7650 }, { "epoch": 0.20569662770388283, "grad_norm": 2.307403564453125, "learning_rate": 4.025602614735122e-05, "loss": 4.5154, "step": 7655 }, { "epoch": 0.2058309821308612, "grad_norm": 2.431759834289551, "learning_rate": 4.024921694130465e-05, "loss": 4.4362, "step": 7660 }, { "epoch": 0.20596533655783958, "grad_norm": 2.3965609073638916, "learning_rate": 4.024240773525807e-05, "loss": 4.4735, "step": 7665 }, { "epoch": 0.20609969098481795, "grad_norm": 2.2584922313690186, "learning_rate": 4.0235598529211494e-05, "loss": 4.5538, "step": 7670 }, { "epoch": 0.20623404541179632, "grad_norm": 2.4941208362579346, "learning_rate": 4.022878932316492e-05, "loss": 4.5343, "step": 7675 }, { "epoch": 0.2063683998387747, "grad_norm": 2.255657196044922, "learning_rate": 4.0221980117118345e-05, "loss": 4.431, "step": 7680 }, { "epoch": 0.20650275426575307, "grad_norm": 2.3047399520874023, "learning_rate": 4.021517091107177e-05, "loss": 4.6374, "step": 7685 }, { "epoch": 0.2066371086927314, "grad_norm": 2.51051664352417, "learning_rate": 4.0208361705025196e-05, "loss": 4.6606, "step": 7690 }, { "epoch": 0.2067714631197098, "grad_norm": 2.3574345111846924, "learning_rate": 4.0201552498978625e-05, "loss": 4.4689, "step": 7695 }, { "epoch": 0.20690581754668816, "grad_norm": 2.5600733757019043, "learning_rate": 4.019474329293205e-05, "loss": 4.3813, "step": 7700 }, { "epoch": 0.20704017197366653, "grad_norm": 2.2496025562286377, "learning_rate": 4.018793408688547e-05, "loss": 4.514, "step": 7705 }, { "epoch": 0.2071745264006449, "grad_norm": 2.3794093132019043, "learning_rate": 4.018112488083889e-05, "loss": 4.5071, "step": 7710 }, { "epoch": 0.20730888082762328, "grad_norm": 2.2429542541503906, "learning_rate": 4.017431567479232e-05, "loss": 4.5975, "step": 7715 }, { "epoch": 0.20744323525460165, "grad_norm": 2.5032238960266113, "learning_rate": 4.0167506468745744e-05, "loss": 4.6123, "step": 7720 }, { "epoch": 0.20757758968158002, "grad_norm": 2.3977210521698, "learning_rate": 4.016069726269917e-05, "loss": 4.4546, "step": 7725 }, { "epoch": 0.20771194410855837, "grad_norm": 2.219390869140625, "learning_rate": 4.0153888056652595e-05, "loss": 4.4911, "step": 7730 }, { "epoch": 0.20784629853553674, "grad_norm": 2.246828317642212, "learning_rate": 4.0147078850606024e-05, "loss": 4.5641, "step": 7735 }, { "epoch": 0.2079806529625151, "grad_norm": 2.7244341373443604, "learning_rate": 4.0140269644559446e-05, "loss": 4.6314, "step": 7740 }, { "epoch": 0.20811500738949348, "grad_norm": 2.2800581455230713, "learning_rate": 4.013346043851287e-05, "loss": 4.6105, "step": 7745 }, { "epoch": 0.20824936181647186, "grad_norm": 2.5239195823669434, "learning_rate": 4.01266512324663e-05, "loss": 4.4694, "step": 7750 }, { "epoch": 0.20838371624345023, "grad_norm": 2.491713523864746, "learning_rate": 4.0119842026419727e-05, "loss": 4.6261, "step": 7755 }, { "epoch": 0.2085180706704286, "grad_norm": 2.5509541034698486, "learning_rate": 4.011303282037315e-05, "loss": 4.6265, "step": 7760 }, { "epoch": 0.20865242509740695, "grad_norm": 2.385878324508667, "learning_rate": 4.010622361432657e-05, "loss": 4.5261, "step": 7765 }, { "epoch": 0.20878677952438532, "grad_norm": 2.5552380084991455, "learning_rate": 4.0099414408279993e-05, "loss": 4.6553, "step": 7770 }, { "epoch": 0.2089211339513637, "grad_norm": 2.1308703422546387, "learning_rate": 4.009260520223342e-05, "loss": 4.4779, "step": 7775 }, { "epoch": 0.20905548837834206, "grad_norm": 2.249119997024536, "learning_rate": 4.0085795996186845e-05, "loss": 4.5108, "step": 7780 }, { "epoch": 0.20918984280532044, "grad_norm": 2.231544017791748, "learning_rate": 4.0078986790140274e-05, "loss": 4.4367, "step": 7785 }, { "epoch": 0.2093241972322988, "grad_norm": 2.6178367137908936, "learning_rate": 4.0072177584093696e-05, "loss": 4.6185, "step": 7790 }, { "epoch": 0.20945855165927718, "grad_norm": 2.042090892791748, "learning_rate": 4.006536837804712e-05, "loss": 4.6021, "step": 7795 }, { "epoch": 0.20959290608625555, "grad_norm": 2.411900520324707, "learning_rate": 4.005855917200055e-05, "loss": 4.3821, "step": 7800 }, { "epoch": 0.2097272605132339, "grad_norm": 2.383136749267578, "learning_rate": 4.005174996595397e-05, "loss": 4.6023, "step": 7805 }, { "epoch": 0.20986161494021227, "grad_norm": 2.4700050354003906, "learning_rate": 4.00449407599074e-05, "loss": 4.5928, "step": 7810 }, { "epoch": 0.20999596936719064, "grad_norm": 2.20003342628479, "learning_rate": 4.003813155386082e-05, "loss": 4.5535, "step": 7815 }, { "epoch": 0.21013032379416902, "grad_norm": 2.229978322982788, "learning_rate": 4.003132234781425e-05, "loss": 4.6606, "step": 7820 }, { "epoch": 0.2102646782211474, "grad_norm": 2.4298512935638428, "learning_rate": 4.002451314176767e-05, "loss": 4.4771, "step": 7825 }, { "epoch": 0.21039903264812576, "grad_norm": 2.2240586280822754, "learning_rate": 4.0017703935721095e-05, "loss": 4.636, "step": 7830 }, { "epoch": 0.21053338707510413, "grad_norm": 2.243889808654785, "learning_rate": 4.001089472967452e-05, "loss": 4.4971, "step": 7835 }, { "epoch": 0.2106677415020825, "grad_norm": 2.212472915649414, "learning_rate": 4.0004085523627946e-05, "loss": 4.5702, "step": 7840 }, { "epoch": 0.21080209592906085, "grad_norm": 2.100231170654297, "learning_rate": 3.9997276317581375e-05, "loss": 4.4953, "step": 7845 }, { "epoch": 0.21093645035603922, "grad_norm": 2.4913434982299805, "learning_rate": 3.99904671115348e-05, "loss": 4.6602, "step": 7850 }, { "epoch": 0.2110708047830176, "grad_norm": 2.481914520263672, "learning_rate": 3.998365790548822e-05, "loss": 4.5769, "step": 7855 }, { "epoch": 0.21120515920999597, "grad_norm": 2.200019359588623, "learning_rate": 3.997684869944165e-05, "loss": 4.5763, "step": 7860 }, { "epoch": 0.21133951363697434, "grad_norm": 2.381456136703491, "learning_rate": 3.997003949339507e-05, "loss": 4.5627, "step": 7865 }, { "epoch": 0.2114738680639527, "grad_norm": 2.30440616607666, "learning_rate": 3.99632302873485e-05, "loss": 4.4959, "step": 7870 }, { "epoch": 0.21160822249093109, "grad_norm": 2.580388307571411, "learning_rate": 3.995642108130192e-05, "loss": 4.5989, "step": 7875 }, { "epoch": 0.21174257691790946, "grad_norm": 2.316093683242798, "learning_rate": 3.994961187525535e-05, "loss": 4.5402, "step": 7880 }, { "epoch": 0.2118769313448878, "grad_norm": 2.3373405933380127, "learning_rate": 3.994280266920877e-05, "loss": 4.4452, "step": 7885 }, { "epoch": 0.21201128577186618, "grad_norm": 2.2177529335021973, "learning_rate": 3.9935993463162196e-05, "loss": 4.6338, "step": 7890 }, { "epoch": 0.21214564019884455, "grad_norm": 2.2226641178131104, "learning_rate": 3.992918425711562e-05, "loss": 4.3612, "step": 7895 }, { "epoch": 0.21227999462582292, "grad_norm": 2.4098312854766846, "learning_rate": 3.992237505106905e-05, "loss": 4.5884, "step": 7900 }, { "epoch": 0.2124143490528013, "grad_norm": 2.2080001831054688, "learning_rate": 3.9915565845022476e-05, "loss": 4.465, "step": 7905 }, { "epoch": 0.21254870347977967, "grad_norm": 2.432154893875122, "learning_rate": 3.99087566389759e-05, "loss": 4.5543, "step": 7910 }, { "epoch": 0.21268305790675804, "grad_norm": 2.4245781898498535, "learning_rate": 3.990194743292932e-05, "loss": 4.5096, "step": 7915 }, { "epoch": 0.21281741233373638, "grad_norm": 2.126307249069214, "learning_rate": 3.989513822688275e-05, "loss": 4.5626, "step": 7920 }, { "epoch": 0.21295176676071476, "grad_norm": 2.131974697113037, "learning_rate": 3.988832902083617e-05, "loss": 4.4761, "step": 7925 }, { "epoch": 0.21308612118769313, "grad_norm": 2.3548102378845215, "learning_rate": 3.9881519814789594e-05, "loss": 4.4925, "step": 7930 }, { "epoch": 0.2132204756146715, "grad_norm": 2.6243896484375, "learning_rate": 3.987471060874302e-05, "loss": 4.5733, "step": 7935 }, { "epoch": 0.21335483004164987, "grad_norm": 2.3429207801818848, "learning_rate": 3.9867901402696445e-05, "loss": 4.5669, "step": 7940 }, { "epoch": 0.21348918446862825, "grad_norm": 2.5600531101226807, "learning_rate": 3.9861092196649874e-05, "loss": 4.5028, "step": 7945 }, { "epoch": 0.21362353889560662, "grad_norm": 2.3005564212799072, "learning_rate": 3.98542829906033e-05, "loss": 4.7203, "step": 7950 }, { "epoch": 0.213757893322585, "grad_norm": 2.3356740474700928, "learning_rate": 3.984747378455672e-05, "loss": 4.433, "step": 7955 }, { "epoch": 0.21389224774956334, "grad_norm": 2.6492056846618652, "learning_rate": 3.984066457851015e-05, "loss": 4.4739, "step": 7960 }, { "epoch": 0.2140266021765417, "grad_norm": 2.2943952083587646, "learning_rate": 3.983385537246358e-05, "loss": 4.5779, "step": 7965 }, { "epoch": 0.21416095660352008, "grad_norm": 2.26895809173584, "learning_rate": 3.9827046166417e-05, "loss": 4.6247, "step": 7970 }, { "epoch": 0.21429531103049845, "grad_norm": 2.3796446323394775, "learning_rate": 3.982023696037042e-05, "loss": 4.4667, "step": 7975 }, { "epoch": 0.21442966545747683, "grad_norm": 2.086341381072998, "learning_rate": 3.9813427754323844e-05, "loss": 4.5953, "step": 7980 }, { "epoch": 0.2145640198844552, "grad_norm": 2.2315192222595215, "learning_rate": 3.980661854827727e-05, "loss": 4.411, "step": 7985 }, { "epoch": 0.21469837431143357, "grad_norm": 2.3698182106018066, "learning_rate": 3.9799809342230695e-05, "loss": 4.476, "step": 7990 }, { "epoch": 0.21483272873841194, "grad_norm": 2.4338014125823975, "learning_rate": 3.9793000136184124e-05, "loss": 4.573, "step": 7995 }, { "epoch": 0.2149670831653903, "grad_norm": 2.3947324752807617, "learning_rate": 3.9786190930137547e-05, "loss": 4.5989, "step": 8000 }, { "epoch": 0.21510143759236866, "grad_norm": 2.3885648250579834, "learning_rate": 3.9779381724090976e-05, "loss": 4.623, "step": 8005 }, { "epoch": 0.21523579201934703, "grad_norm": 2.234739303588867, "learning_rate": 3.97725725180444e-05, "loss": 4.6016, "step": 8010 }, { "epoch": 0.2153701464463254, "grad_norm": 2.397773265838623, "learning_rate": 3.976576331199782e-05, "loss": 4.6686, "step": 8015 }, { "epoch": 0.21550450087330378, "grad_norm": 2.322126865386963, "learning_rate": 3.975895410595125e-05, "loss": 4.4855, "step": 8020 }, { "epoch": 0.21563885530028215, "grad_norm": 2.217851400375366, "learning_rate": 3.975214489990468e-05, "loss": 4.6483, "step": 8025 }, { "epoch": 0.21577320972726052, "grad_norm": 2.4423587322235107, "learning_rate": 3.97453356938581e-05, "loss": 4.4213, "step": 8030 }, { "epoch": 0.2159075641542389, "grad_norm": 2.4239814281463623, "learning_rate": 3.973852648781152e-05, "loss": 4.5082, "step": 8035 }, { "epoch": 0.21604191858121724, "grad_norm": 2.3297367095947266, "learning_rate": 3.9731717281764945e-05, "loss": 4.5876, "step": 8040 }, { "epoch": 0.2161762730081956, "grad_norm": 2.1813454627990723, "learning_rate": 3.9724908075718374e-05, "loss": 4.5188, "step": 8045 }, { "epoch": 0.21631062743517399, "grad_norm": 2.172741651535034, "learning_rate": 3.9718098869671796e-05, "loss": 4.5169, "step": 8050 }, { "epoch": 0.21644498186215236, "grad_norm": 2.2912702560424805, "learning_rate": 3.9711289663625225e-05, "loss": 4.4824, "step": 8055 }, { "epoch": 0.21657933628913073, "grad_norm": 2.3370800018310547, "learning_rate": 3.970448045757865e-05, "loss": 4.5895, "step": 8060 }, { "epoch": 0.2167136907161091, "grad_norm": 2.283795118331909, "learning_rate": 3.969767125153208e-05, "loss": 4.4954, "step": 8065 }, { "epoch": 0.21684804514308748, "grad_norm": 2.253535032272339, "learning_rate": 3.96908620454855e-05, "loss": 4.4669, "step": 8070 }, { "epoch": 0.21698239957006582, "grad_norm": 2.4070489406585693, "learning_rate": 3.968405283943892e-05, "loss": 4.4722, "step": 8075 }, { "epoch": 0.2171167539970442, "grad_norm": 2.3137452602386475, "learning_rate": 3.967724363339235e-05, "loss": 4.3965, "step": 8080 }, { "epoch": 0.21725110842402257, "grad_norm": 2.4391114711761475, "learning_rate": 3.967043442734577e-05, "loss": 4.5998, "step": 8085 }, { "epoch": 0.21738546285100094, "grad_norm": 2.4084930419921875, "learning_rate": 3.96636252212992e-05, "loss": 4.5332, "step": 8090 }, { "epoch": 0.2175198172779793, "grad_norm": 2.2953662872314453, "learning_rate": 3.9656816015252624e-05, "loss": 4.5608, "step": 8095 }, { "epoch": 0.21765417170495768, "grad_norm": 2.214895009994507, "learning_rate": 3.9650006809206046e-05, "loss": 4.5307, "step": 8100 }, { "epoch": 0.21778852613193606, "grad_norm": 2.494039297103882, "learning_rate": 3.964319760315947e-05, "loss": 4.489, "step": 8105 }, { "epoch": 0.21792288055891443, "grad_norm": 2.4110639095306396, "learning_rate": 3.96363883971129e-05, "loss": 4.4221, "step": 8110 }, { "epoch": 0.21805723498589277, "grad_norm": 2.4896717071533203, "learning_rate": 3.9629579191066327e-05, "loss": 4.6174, "step": 8115 }, { "epoch": 0.21819158941287115, "grad_norm": 2.271714925765991, "learning_rate": 3.962276998501975e-05, "loss": 4.6011, "step": 8120 }, { "epoch": 0.21832594383984952, "grad_norm": 2.264744758605957, "learning_rate": 3.961596077897317e-05, "loss": 4.343, "step": 8125 }, { "epoch": 0.2184602982668279, "grad_norm": 2.3860461711883545, "learning_rate": 3.96091515729266e-05, "loss": 4.6053, "step": 8130 }, { "epoch": 0.21859465269380626, "grad_norm": 2.51029896736145, "learning_rate": 3.960234236688002e-05, "loss": 4.5263, "step": 8135 }, { "epoch": 0.21872900712078464, "grad_norm": 2.1984598636627197, "learning_rate": 3.9595533160833445e-05, "loss": 4.5182, "step": 8140 }, { "epoch": 0.218863361547763, "grad_norm": 2.2205660343170166, "learning_rate": 3.9588723954786874e-05, "loss": 4.4588, "step": 8145 }, { "epoch": 0.21899771597474138, "grad_norm": 2.320554256439209, "learning_rate": 3.95819147487403e-05, "loss": 4.4623, "step": 8150 }, { "epoch": 0.21913207040171973, "grad_norm": 2.437398910522461, "learning_rate": 3.9575105542693725e-05, "loss": 4.5282, "step": 8155 }, { "epoch": 0.2192664248286981, "grad_norm": 2.2741754055023193, "learning_rate": 3.956829633664715e-05, "loss": 4.4615, "step": 8160 }, { "epoch": 0.21940077925567647, "grad_norm": 2.379453659057617, "learning_rate": 3.956148713060057e-05, "loss": 4.552, "step": 8165 }, { "epoch": 0.21953513368265484, "grad_norm": 2.46022891998291, "learning_rate": 3.9554677924554e-05, "loss": 4.5353, "step": 8170 }, { "epoch": 0.21966948810963322, "grad_norm": 2.3031411170959473, "learning_rate": 3.954786871850743e-05, "loss": 4.5261, "step": 8175 }, { "epoch": 0.2198038425366116, "grad_norm": 2.389411687850952, "learning_rate": 3.954105951246085e-05, "loss": 4.4525, "step": 8180 }, { "epoch": 0.21993819696358996, "grad_norm": 2.386228084564209, "learning_rate": 3.953425030641427e-05, "loss": 4.5909, "step": 8185 }, { "epoch": 0.22007255139056833, "grad_norm": 2.4298057556152344, "learning_rate": 3.95274411003677e-05, "loss": 4.4921, "step": 8190 }, { "epoch": 0.22020690581754668, "grad_norm": 2.4861178398132324, "learning_rate": 3.9520631894321123e-05, "loss": 4.5671, "step": 8195 }, { "epoch": 0.22034126024452505, "grad_norm": 2.6088831424713135, "learning_rate": 3.9513822688274546e-05, "loss": 4.5072, "step": 8200 }, { "epoch": 0.22047561467150342, "grad_norm": 2.4835610389709473, "learning_rate": 3.9507013482227975e-05, "loss": 4.4169, "step": 8205 }, { "epoch": 0.2206099690984818, "grad_norm": 2.3210246562957764, "learning_rate": 3.9500204276181404e-05, "loss": 4.4646, "step": 8210 }, { "epoch": 0.22074432352546017, "grad_norm": 2.2627928256988525, "learning_rate": 3.9493395070134826e-05, "loss": 4.4458, "step": 8215 }, { "epoch": 0.22087867795243854, "grad_norm": 2.3606464862823486, "learning_rate": 3.948658586408825e-05, "loss": 4.5374, "step": 8220 }, { "epoch": 0.2210130323794169, "grad_norm": 2.201375961303711, "learning_rate": 3.947977665804167e-05, "loss": 4.5093, "step": 8225 }, { "epoch": 0.22114738680639526, "grad_norm": 2.2698469161987305, "learning_rate": 3.94729674519951e-05, "loss": 4.5452, "step": 8230 }, { "epoch": 0.22128174123337363, "grad_norm": 2.178785800933838, "learning_rate": 3.946615824594853e-05, "loss": 4.5701, "step": 8235 }, { "epoch": 0.221416095660352, "grad_norm": 2.27838397026062, "learning_rate": 3.945934903990195e-05, "loss": 4.552, "step": 8240 }, { "epoch": 0.22155045008733038, "grad_norm": 2.4617958068847656, "learning_rate": 3.945253983385537e-05, "loss": 4.4574, "step": 8245 }, { "epoch": 0.22168480451430875, "grad_norm": 2.309865713119507, "learning_rate": 3.9445730627808796e-05, "loss": 4.4584, "step": 8250 }, { "epoch": 0.22181915894128712, "grad_norm": 2.5133237838745117, "learning_rate": 3.9438921421762225e-05, "loss": 4.5018, "step": 8255 }, { "epoch": 0.2219535133682655, "grad_norm": 2.528859853744507, "learning_rate": 3.943211221571565e-05, "loss": 4.5619, "step": 8260 }, { "epoch": 0.22208786779524387, "grad_norm": 2.5606112480163574, "learning_rate": 3.9425303009669076e-05, "loss": 4.5617, "step": 8265 }, { "epoch": 0.2222222222222222, "grad_norm": 2.261852979660034, "learning_rate": 3.94184938036225e-05, "loss": 4.6211, "step": 8270 }, { "epoch": 0.22235657664920058, "grad_norm": 2.1548967361450195, "learning_rate": 3.941168459757593e-05, "loss": 4.5193, "step": 8275 }, { "epoch": 0.22249093107617895, "grad_norm": 2.4585540294647217, "learning_rate": 3.940487539152935e-05, "loss": 4.5097, "step": 8280 }, { "epoch": 0.22262528550315733, "grad_norm": 2.482358932495117, "learning_rate": 3.939806618548277e-05, "loss": 4.5414, "step": 8285 }, { "epoch": 0.2227596399301357, "grad_norm": 2.2209229469299316, "learning_rate": 3.93912569794362e-05, "loss": 4.5977, "step": 8290 }, { "epoch": 0.22289399435711407, "grad_norm": 2.4583818912506104, "learning_rate": 3.938444777338963e-05, "loss": 4.5733, "step": 8295 }, { "epoch": 0.22302834878409245, "grad_norm": 2.567460536956787, "learning_rate": 3.937763856734305e-05, "loss": 4.4606, "step": 8300 }, { "epoch": 0.22316270321107082, "grad_norm": 2.2279093265533447, "learning_rate": 3.9370829361296474e-05, "loss": 4.6824, "step": 8305 }, { "epoch": 0.22329705763804916, "grad_norm": 2.3980607986450195, "learning_rate": 3.93640201552499e-05, "loss": 4.5479, "step": 8310 }, { "epoch": 0.22343141206502753, "grad_norm": 2.3374099731445312, "learning_rate": 3.9357210949203326e-05, "loss": 4.4721, "step": 8315 }, { "epoch": 0.2235657664920059, "grad_norm": 2.3053574562072754, "learning_rate": 3.935040174315675e-05, "loss": 4.4035, "step": 8320 }, { "epoch": 0.22370012091898428, "grad_norm": 2.326688289642334, "learning_rate": 3.934359253711018e-05, "loss": 4.5543, "step": 8325 }, { "epoch": 0.22383447534596265, "grad_norm": 2.328249931335449, "learning_rate": 3.93367833310636e-05, "loss": 4.5431, "step": 8330 }, { "epoch": 0.22396882977294102, "grad_norm": 2.3143770694732666, "learning_rate": 3.932997412501703e-05, "loss": 4.4885, "step": 8335 }, { "epoch": 0.2241031841999194, "grad_norm": 2.438821792602539, "learning_rate": 3.932316491897045e-05, "loss": 4.5825, "step": 8340 }, { "epoch": 0.22423753862689777, "grad_norm": 2.4263737201690674, "learning_rate": 3.931635571292387e-05, "loss": 4.5807, "step": 8345 }, { "epoch": 0.22437189305387611, "grad_norm": 2.2539799213409424, "learning_rate": 3.9309546506877295e-05, "loss": 4.5062, "step": 8350 }, { "epoch": 0.2245062474808545, "grad_norm": 2.293640375137329, "learning_rate": 3.930273730083073e-05, "loss": 4.4612, "step": 8355 }, { "epoch": 0.22464060190783286, "grad_norm": 2.491832733154297, "learning_rate": 3.929592809478415e-05, "loss": 4.5282, "step": 8360 }, { "epoch": 0.22477495633481123, "grad_norm": 2.5787856578826904, "learning_rate": 3.9289118888737575e-05, "loss": 4.5486, "step": 8365 }, { "epoch": 0.2249093107617896, "grad_norm": 2.106255054473877, "learning_rate": 3.9282309682691e-05, "loss": 4.5548, "step": 8370 }, { "epoch": 0.22504366518876798, "grad_norm": 2.4151759147644043, "learning_rate": 3.927550047664443e-05, "loss": 4.4303, "step": 8375 }, { "epoch": 0.22517801961574635, "grad_norm": 2.3859143257141113, "learning_rate": 3.926869127059785e-05, "loss": 4.4398, "step": 8380 }, { "epoch": 0.2253123740427247, "grad_norm": 2.494396686553955, "learning_rate": 3.926188206455128e-05, "loss": 4.3921, "step": 8385 }, { "epoch": 0.22544672846970307, "grad_norm": 3.0563199520111084, "learning_rate": 3.92550728585047e-05, "loss": 4.4749, "step": 8390 }, { "epoch": 0.22558108289668144, "grad_norm": 2.2936222553253174, "learning_rate": 3.924826365245812e-05, "loss": 4.458, "step": 8395 }, { "epoch": 0.2257154373236598, "grad_norm": 2.1167876720428467, "learning_rate": 3.924145444641155e-05, "loss": 4.506, "step": 8400 }, { "epoch": 0.22584979175063818, "grad_norm": 2.3850529193878174, "learning_rate": 3.9234645240364974e-05, "loss": 4.4521, "step": 8405 }, { "epoch": 0.22598414617761656, "grad_norm": 2.3167271614074707, "learning_rate": 3.9227836034318396e-05, "loss": 4.4543, "step": 8410 }, { "epoch": 0.22611850060459493, "grad_norm": 2.427035093307495, "learning_rate": 3.9221026828271825e-05, "loss": 4.5651, "step": 8415 }, { "epoch": 0.2262528550315733, "grad_norm": 2.547551155090332, "learning_rate": 3.9214217622225254e-05, "loss": 4.3067, "step": 8420 }, { "epoch": 0.22638720945855165, "grad_norm": 2.3174264430999756, "learning_rate": 3.9207408416178677e-05, "loss": 4.5975, "step": 8425 }, { "epoch": 0.22652156388553002, "grad_norm": 2.3896431922912598, "learning_rate": 3.92005992101321e-05, "loss": 4.3266, "step": 8430 }, { "epoch": 0.2266559183125084, "grad_norm": 2.4772348403930664, "learning_rate": 3.919379000408552e-05, "loss": 4.4856, "step": 8435 }, { "epoch": 0.22679027273948676, "grad_norm": 2.077833890914917, "learning_rate": 3.918698079803895e-05, "loss": 4.4156, "step": 8440 }, { "epoch": 0.22692462716646514, "grad_norm": 2.3736624717712402, "learning_rate": 3.918017159199238e-05, "loss": 4.4443, "step": 8445 }, { "epoch": 0.2270589815934435, "grad_norm": 2.409799337387085, "learning_rate": 3.91733623859458e-05, "loss": 4.5994, "step": 8450 }, { "epoch": 0.22719333602042188, "grad_norm": 2.3161847591400146, "learning_rate": 3.9166553179899224e-05, "loss": 4.5108, "step": 8455 }, { "epoch": 0.22732769044740025, "grad_norm": 2.3115291595458984, "learning_rate": 3.915974397385265e-05, "loss": 4.5899, "step": 8460 }, { "epoch": 0.2274620448743786, "grad_norm": 2.250058650970459, "learning_rate": 3.9152934767806075e-05, "loss": 4.5587, "step": 8465 }, { "epoch": 0.22759639930135697, "grad_norm": 2.448368787765503, "learning_rate": 3.91461255617595e-05, "loss": 4.519, "step": 8470 }, { "epoch": 0.22773075372833534, "grad_norm": 2.4634785652160645, "learning_rate": 3.9139316355712926e-05, "loss": 4.4725, "step": 8475 }, { "epoch": 0.22786510815531372, "grad_norm": 2.447174310684204, "learning_rate": 3.9132507149666355e-05, "loss": 4.5119, "step": 8480 }, { "epoch": 0.2279994625822921, "grad_norm": 2.297747850418091, "learning_rate": 3.912569794361978e-05, "loss": 4.47, "step": 8485 }, { "epoch": 0.22813381700927046, "grad_norm": 2.327514410018921, "learning_rate": 3.91188887375732e-05, "loss": 4.5262, "step": 8490 }, { "epoch": 0.22826817143624883, "grad_norm": 2.523096799850464, "learning_rate": 3.911207953152662e-05, "loss": 4.5991, "step": 8495 }, { "epoch": 0.2284025258632272, "grad_norm": 2.069838285446167, "learning_rate": 3.910527032548005e-05, "loss": 4.4604, "step": 8500 }, { "epoch": 0.22853688029020555, "grad_norm": 2.2846767902374268, "learning_rate": 3.909846111943348e-05, "loss": 4.3883, "step": 8505 }, { "epoch": 0.22867123471718392, "grad_norm": 2.458731174468994, "learning_rate": 3.90916519133869e-05, "loss": 4.4619, "step": 8510 }, { "epoch": 0.2288055891441623, "grad_norm": 2.4549267292022705, "learning_rate": 3.9084842707340325e-05, "loss": 4.5344, "step": 8515 }, { "epoch": 0.22893994357114067, "grad_norm": 2.4725306034088135, "learning_rate": 3.9078033501293754e-05, "loss": 4.4581, "step": 8520 }, { "epoch": 0.22907429799811904, "grad_norm": 2.3027560710906982, "learning_rate": 3.9071224295247176e-05, "loss": 4.4477, "step": 8525 }, { "epoch": 0.22920865242509741, "grad_norm": 2.1911067962646484, "learning_rate": 3.90644150892006e-05, "loss": 4.4504, "step": 8530 }, { "epoch": 0.2293430068520758, "grad_norm": 2.380733013153076, "learning_rate": 3.905760588315403e-05, "loss": 4.3704, "step": 8535 }, { "epoch": 0.22947736127905413, "grad_norm": 2.0949010848999023, "learning_rate": 3.905079667710745e-05, "loss": 4.4612, "step": 8540 }, { "epoch": 0.2296117157060325, "grad_norm": 2.3797502517700195, "learning_rate": 3.904398747106088e-05, "loss": 4.4963, "step": 8545 }, { "epoch": 0.22974607013301088, "grad_norm": 2.5473086833953857, "learning_rate": 3.90371782650143e-05, "loss": 4.4186, "step": 8550 }, { "epoch": 0.22988042455998925, "grad_norm": 2.38205885887146, "learning_rate": 3.903036905896772e-05, "loss": 4.5383, "step": 8555 }, { "epoch": 0.23001477898696762, "grad_norm": 2.4715940952301025, "learning_rate": 3.9023559852921146e-05, "loss": 4.4418, "step": 8560 }, { "epoch": 0.230149133413946, "grad_norm": 2.22713041305542, "learning_rate": 3.9016750646874575e-05, "loss": 4.5647, "step": 8565 }, { "epoch": 0.23028348784092437, "grad_norm": 2.3160481452941895, "learning_rate": 3.9009941440828004e-05, "loss": 4.5537, "step": 8570 }, { "epoch": 0.23041784226790274, "grad_norm": 2.2823052406311035, "learning_rate": 3.9003132234781426e-05, "loss": 4.56, "step": 8575 }, { "epoch": 0.23055219669488108, "grad_norm": 2.2835984230041504, "learning_rate": 3.899632302873485e-05, "loss": 4.4945, "step": 8580 }, { "epoch": 0.23068655112185946, "grad_norm": 2.418950319290161, "learning_rate": 3.898951382268828e-05, "loss": 4.4775, "step": 8585 }, { "epoch": 0.23082090554883783, "grad_norm": 2.2225818634033203, "learning_rate": 3.89827046166417e-05, "loss": 4.5085, "step": 8590 }, { "epoch": 0.2309552599758162, "grad_norm": 2.150221347808838, "learning_rate": 3.897589541059513e-05, "loss": 4.4651, "step": 8595 }, { "epoch": 0.23108961440279457, "grad_norm": 2.4287526607513428, "learning_rate": 3.896908620454855e-05, "loss": 4.616, "step": 8600 }, { "epoch": 0.23122396882977295, "grad_norm": 2.370195150375366, "learning_rate": 3.896227699850198e-05, "loss": 4.5388, "step": 8605 }, { "epoch": 0.23135832325675132, "grad_norm": 2.3494715690612793, "learning_rate": 3.89554677924554e-05, "loss": 4.4648, "step": 8610 }, { "epoch": 0.2314926776837297, "grad_norm": 2.2470271587371826, "learning_rate": 3.8948658586408824e-05, "loss": 4.4926, "step": 8615 }, { "epoch": 0.23162703211070804, "grad_norm": 2.0862324237823486, "learning_rate": 3.894184938036225e-05, "loss": 4.5883, "step": 8620 }, { "epoch": 0.2317613865376864, "grad_norm": 2.2382023334503174, "learning_rate": 3.8935040174315676e-05, "loss": 4.4649, "step": 8625 }, { "epoch": 0.23189574096466478, "grad_norm": 2.4097044467926025, "learning_rate": 3.8928230968269105e-05, "loss": 4.5404, "step": 8630 }, { "epoch": 0.23203009539164315, "grad_norm": 2.0999863147735596, "learning_rate": 3.892142176222253e-05, "loss": 4.53, "step": 8635 }, { "epoch": 0.23216444981862153, "grad_norm": 2.3411190509796143, "learning_rate": 3.891461255617595e-05, "loss": 4.5707, "step": 8640 }, { "epoch": 0.2322988042455999, "grad_norm": 2.1405792236328125, "learning_rate": 3.890780335012938e-05, "loss": 4.3955, "step": 8645 }, { "epoch": 0.23243315867257827, "grad_norm": 2.4790520668029785, "learning_rate": 3.89009941440828e-05, "loss": 4.4506, "step": 8650 }, { "epoch": 0.23256751309955664, "grad_norm": 2.6332178115844727, "learning_rate": 3.889418493803623e-05, "loss": 4.5147, "step": 8655 }, { "epoch": 0.232701867526535, "grad_norm": 2.314347505569458, "learning_rate": 3.888737573198965e-05, "loss": 4.4196, "step": 8660 }, { "epoch": 0.23283622195351336, "grad_norm": 2.422787666320801, "learning_rate": 3.888056652594308e-05, "loss": 4.4499, "step": 8665 }, { "epoch": 0.23297057638049173, "grad_norm": 2.620337724685669, "learning_rate": 3.88737573198965e-05, "loss": 4.5771, "step": 8670 }, { "epoch": 0.2331049308074701, "grad_norm": 2.2113358974456787, "learning_rate": 3.8866948113849926e-05, "loss": 4.5354, "step": 8675 }, { "epoch": 0.23323928523444848, "grad_norm": 2.293414354324341, "learning_rate": 3.886013890780335e-05, "loss": 4.5862, "step": 8680 }, { "epoch": 0.23337363966142685, "grad_norm": 2.2656936645507812, "learning_rate": 3.885332970175678e-05, "loss": 4.4915, "step": 8685 }, { "epoch": 0.23350799408840522, "grad_norm": 2.1302578449249268, "learning_rate": 3.8846520495710206e-05, "loss": 4.5319, "step": 8690 }, { "epoch": 0.23364234851538357, "grad_norm": 2.1620612144470215, "learning_rate": 3.883971128966363e-05, "loss": 4.5304, "step": 8695 }, { "epoch": 0.23377670294236194, "grad_norm": 2.3192574977874756, "learning_rate": 3.883290208361705e-05, "loss": 4.4739, "step": 8700 }, { "epoch": 0.23391105736934031, "grad_norm": 2.243558645248413, "learning_rate": 3.882609287757047e-05, "loss": 4.5051, "step": 8705 }, { "epoch": 0.2340454117963187, "grad_norm": 2.341973066329956, "learning_rate": 3.88192836715239e-05, "loss": 4.3077, "step": 8710 }, { "epoch": 0.23417976622329706, "grad_norm": 2.272465705871582, "learning_rate": 3.881247446547733e-05, "loss": 4.4325, "step": 8715 }, { "epoch": 0.23431412065027543, "grad_norm": 2.4624316692352295, "learning_rate": 3.880566525943075e-05, "loss": 4.5258, "step": 8720 }, { "epoch": 0.2344484750772538, "grad_norm": 2.387444019317627, "learning_rate": 3.8798856053384175e-05, "loss": 4.4817, "step": 8725 }, { "epoch": 0.23458282950423218, "grad_norm": 2.8348617553710938, "learning_rate": 3.8792046847337604e-05, "loss": 4.4936, "step": 8730 }, { "epoch": 0.23471718393121052, "grad_norm": 2.4759609699249268, "learning_rate": 3.878523764129103e-05, "loss": 4.4255, "step": 8735 }, { "epoch": 0.2348515383581889, "grad_norm": 2.4378538131713867, "learning_rate": 3.877842843524445e-05, "loss": 4.5988, "step": 8740 }, { "epoch": 0.23498589278516727, "grad_norm": 2.287172555923462, "learning_rate": 3.877161922919788e-05, "loss": 4.5726, "step": 8745 }, { "epoch": 0.23512024721214564, "grad_norm": 2.2528538703918457, "learning_rate": 3.876481002315131e-05, "loss": 4.5549, "step": 8750 }, { "epoch": 0.235254601639124, "grad_norm": 2.2703864574432373, "learning_rate": 3.875800081710473e-05, "loss": 4.4715, "step": 8755 }, { "epoch": 0.23538895606610238, "grad_norm": 2.429065704345703, "learning_rate": 3.875119161105815e-05, "loss": 4.4493, "step": 8760 }, { "epoch": 0.23552331049308076, "grad_norm": 2.346167802810669, "learning_rate": 3.8744382405011574e-05, "loss": 4.5248, "step": 8765 }, { "epoch": 0.23565766492005913, "grad_norm": 2.167602062225342, "learning_rate": 3.8737573198965e-05, "loss": 4.466, "step": 8770 }, { "epoch": 0.23579201934703747, "grad_norm": 2.6131200790405273, "learning_rate": 3.8730763992918425e-05, "loss": 4.5559, "step": 8775 }, { "epoch": 0.23592637377401585, "grad_norm": 2.3826704025268555, "learning_rate": 3.8723954786871854e-05, "loss": 4.4304, "step": 8780 }, { "epoch": 0.23606072820099422, "grad_norm": 2.5408835411071777, "learning_rate": 3.8717145580825277e-05, "loss": 4.5271, "step": 8785 }, { "epoch": 0.2361950826279726, "grad_norm": 2.528095245361328, "learning_rate": 3.8710336374778706e-05, "loss": 4.3296, "step": 8790 }, { "epoch": 0.23632943705495096, "grad_norm": 2.3806045055389404, "learning_rate": 3.870352716873213e-05, "loss": 4.3957, "step": 8795 }, { "epoch": 0.23646379148192934, "grad_norm": 2.254794120788574, "learning_rate": 3.869671796268555e-05, "loss": 4.4552, "step": 8800 }, { "epoch": 0.2365981459089077, "grad_norm": 2.223202705383301, "learning_rate": 3.868990875663898e-05, "loss": 4.4416, "step": 8805 }, { "epoch": 0.23673250033588608, "grad_norm": 2.321711301803589, "learning_rate": 3.868309955059241e-05, "loss": 4.5761, "step": 8810 }, { "epoch": 0.23686685476286443, "grad_norm": 2.138576030731201, "learning_rate": 3.867629034454583e-05, "loss": 4.3813, "step": 8815 }, { "epoch": 0.2370012091898428, "grad_norm": 2.2315521240234375, "learning_rate": 3.866948113849925e-05, "loss": 4.3917, "step": 8820 }, { "epoch": 0.23713556361682117, "grad_norm": 2.1541736125946045, "learning_rate": 3.8662671932452675e-05, "loss": 4.4306, "step": 8825 }, { "epoch": 0.23726991804379954, "grad_norm": 2.490097761154175, "learning_rate": 3.86558627264061e-05, "loss": 4.5851, "step": 8830 }, { "epoch": 0.23740427247077792, "grad_norm": 2.5991132259368896, "learning_rate": 3.8649053520359526e-05, "loss": 4.4972, "step": 8835 }, { "epoch": 0.2375386268977563, "grad_norm": 2.345184803009033, "learning_rate": 3.8642244314312955e-05, "loss": 4.4133, "step": 8840 }, { "epoch": 0.23767298132473466, "grad_norm": 2.3713176250457764, "learning_rate": 3.863543510826638e-05, "loss": 4.5444, "step": 8845 }, { "epoch": 0.237807335751713, "grad_norm": 2.3636770248413086, "learning_rate": 3.86286259022198e-05, "loss": 4.4242, "step": 8850 }, { "epoch": 0.23794169017869138, "grad_norm": 2.3559272289276123, "learning_rate": 3.862181669617323e-05, "loss": 4.528, "step": 8855 }, { "epoch": 0.23807604460566975, "grad_norm": 2.328324556350708, "learning_rate": 3.861500749012665e-05, "loss": 4.4464, "step": 8860 }, { "epoch": 0.23821039903264812, "grad_norm": 2.1875815391540527, "learning_rate": 3.860819828408008e-05, "loss": 4.3888, "step": 8865 }, { "epoch": 0.2383447534596265, "grad_norm": 2.2936065196990967, "learning_rate": 3.86013890780335e-05, "loss": 4.4511, "step": 8870 }, { "epoch": 0.23847910788660487, "grad_norm": 2.5145628452301025, "learning_rate": 3.859457987198693e-05, "loss": 4.4446, "step": 8875 }, { "epoch": 0.23861346231358324, "grad_norm": 2.300708293914795, "learning_rate": 3.8587770665940354e-05, "loss": 4.4348, "step": 8880 }, { "epoch": 0.2387478167405616, "grad_norm": 2.4654135704040527, "learning_rate": 3.8580961459893776e-05, "loss": 4.3634, "step": 8885 }, { "epoch": 0.23888217116753996, "grad_norm": 2.2008254528045654, "learning_rate": 3.85741522538472e-05, "loss": 4.5456, "step": 8890 }, { "epoch": 0.23901652559451833, "grad_norm": 2.32795786857605, "learning_rate": 3.856734304780063e-05, "loss": 4.3412, "step": 8895 }, { "epoch": 0.2391508800214967, "grad_norm": 2.6038146018981934, "learning_rate": 3.8560533841754056e-05, "loss": 4.5066, "step": 8900 }, { "epoch": 0.23928523444847508, "grad_norm": 2.289285898208618, "learning_rate": 3.855372463570748e-05, "loss": 4.4866, "step": 8905 }, { "epoch": 0.23941958887545345, "grad_norm": 2.2476844787597656, "learning_rate": 3.85469154296609e-05, "loss": 4.3769, "step": 8910 }, { "epoch": 0.23955394330243182, "grad_norm": 2.1925363540649414, "learning_rate": 3.854010622361433e-05, "loss": 4.4785, "step": 8915 }, { "epoch": 0.2396882977294102, "grad_norm": 2.213681697845459, "learning_rate": 3.853329701756775e-05, "loss": 4.3922, "step": 8920 }, { "epoch": 0.23982265215638857, "grad_norm": 2.39823055267334, "learning_rate": 3.852648781152118e-05, "loss": 4.4942, "step": 8925 }, { "epoch": 0.2399570065833669, "grad_norm": 2.709286689758301, "learning_rate": 3.8519678605474604e-05, "loss": 4.4324, "step": 8930 }, { "epoch": 0.24009136101034528, "grad_norm": 2.4521644115448, "learning_rate": 3.851286939942803e-05, "loss": 4.3661, "step": 8935 }, { "epoch": 0.24022571543732366, "grad_norm": 2.222060203552246, "learning_rate": 3.8506060193381455e-05, "loss": 4.5599, "step": 8940 }, { "epoch": 0.24036006986430203, "grad_norm": 2.293104648590088, "learning_rate": 3.849925098733488e-05, "loss": 4.5562, "step": 8945 }, { "epoch": 0.2404944242912804, "grad_norm": 2.38869571685791, "learning_rate": 3.84924417812883e-05, "loss": 4.5339, "step": 8950 }, { "epoch": 0.24062877871825877, "grad_norm": 2.325330972671509, "learning_rate": 3.848563257524173e-05, "loss": 4.5861, "step": 8955 }, { "epoch": 0.24076313314523715, "grad_norm": 2.5010159015655518, "learning_rate": 3.847882336919516e-05, "loss": 4.3589, "step": 8960 }, { "epoch": 0.2408974875722155, "grad_norm": 2.554168224334717, "learning_rate": 3.847201416314858e-05, "loss": 4.4188, "step": 8965 }, { "epoch": 0.24103184199919386, "grad_norm": 2.268612861633301, "learning_rate": 3.8465204957102e-05, "loss": 4.4852, "step": 8970 }, { "epoch": 0.24116619642617224, "grad_norm": 2.3356919288635254, "learning_rate": 3.8458395751055424e-05, "loss": 4.4218, "step": 8975 }, { "epoch": 0.2413005508531506, "grad_norm": 2.5257294178009033, "learning_rate": 3.8451586545008853e-05, "loss": 4.5004, "step": 8980 }, { "epoch": 0.24143490528012898, "grad_norm": 2.127638578414917, "learning_rate": 3.8444777338962276e-05, "loss": 4.3875, "step": 8985 }, { "epoch": 0.24156925970710735, "grad_norm": 2.363490581512451, "learning_rate": 3.8437968132915705e-05, "loss": 4.4058, "step": 8990 }, { "epoch": 0.24170361413408573, "grad_norm": 2.069204092025757, "learning_rate": 3.843115892686913e-05, "loss": 4.4866, "step": 8995 }, { "epoch": 0.2418379685610641, "grad_norm": 2.451849937438965, "learning_rate": 3.8424349720822556e-05, "loss": 4.2889, "step": 9000 }, { "epoch": 0.24197232298804244, "grad_norm": 2.3634588718414307, "learning_rate": 3.841754051477598e-05, "loss": 4.4167, "step": 9005 }, { "epoch": 0.24210667741502082, "grad_norm": 2.740828275680542, "learning_rate": 3.84107313087294e-05, "loss": 4.5071, "step": 9010 }, { "epoch": 0.2422410318419992, "grad_norm": 2.3479790687561035, "learning_rate": 3.840392210268283e-05, "loss": 4.4122, "step": 9015 }, { "epoch": 0.24237538626897756, "grad_norm": 2.4052951335906982, "learning_rate": 3.839711289663626e-05, "loss": 4.4962, "step": 9020 }, { "epoch": 0.24250974069595593, "grad_norm": 2.1615025997161865, "learning_rate": 3.839030369058968e-05, "loss": 4.4436, "step": 9025 }, { "epoch": 0.2426440951229343, "grad_norm": 2.497177839279175, "learning_rate": 3.83834944845431e-05, "loss": 4.4895, "step": 9030 }, { "epoch": 0.24277844954991268, "grad_norm": 2.334991216659546, "learning_rate": 3.8376685278496525e-05, "loss": 4.4673, "step": 9035 }, { "epoch": 0.24291280397689105, "grad_norm": 2.167146921157837, "learning_rate": 3.8369876072449955e-05, "loss": 4.4415, "step": 9040 }, { "epoch": 0.2430471584038694, "grad_norm": 2.415437936782837, "learning_rate": 3.836306686640338e-05, "loss": 4.4382, "step": 9045 }, { "epoch": 0.24318151283084777, "grad_norm": 2.3628439903259277, "learning_rate": 3.8356257660356806e-05, "loss": 4.3692, "step": 9050 }, { "epoch": 0.24331586725782614, "grad_norm": 2.343013286590576, "learning_rate": 3.834944845431023e-05, "loss": 4.4133, "step": 9055 }, { "epoch": 0.2434502216848045, "grad_norm": 2.317664384841919, "learning_rate": 3.834263924826366e-05, "loss": 4.4779, "step": 9060 }, { "epoch": 0.24358457611178289, "grad_norm": 2.4156370162963867, "learning_rate": 3.833583004221708e-05, "loss": 4.3418, "step": 9065 }, { "epoch": 0.24371893053876126, "grad_norm": 2.3349876403808594, "learning_rate": 3.83290208361705e-05, "loss": 4.3203, "step": 9070 }, { "epoch": 0.24385328496573963, "grad_norm": 2.193326234817505, "learning_rate": 3.832221163012393e-05, "loss": 4.525, "step": 9075 }, { "epoch": 0.243987639392718, "grad_norm": 2.5219979286193848, "learning_rate": 3.831540242407736e-05, "loss": 4.6186, "step": 9080 }, { "epoch": 0.24412199381969635, "grad_norm": 2.4387123584747314, "learning_rate": 3.830859321803078e-05, "loss": 4.4607, "step": 9085 }, { "epoch": 0.24425634824667472, "grad_norm": 2.266798973083496, "learning_rate": 3.8301784011984204e-05, "loss": 4.5249, "step": 9090 }, { "epoch": 0.2443907026736531, "grad_norm": 2.3985793590545654, "learning_rate": 3.8294974805937627e-05, "loss": 4.509, "step": 9095 }, { "epoch": 0.24452505710063147, "grad_norm": 2.538137912750244, "learning_rate": 3.8288165599891056e-05, "loss": 4.5016, "step": 9100 }, { "epoch": 0.24465941152760984, "grad_norm": 2.610562562942505, "learning_rate": 3.828135639384448e-05, "loss": 4.6595, "step": 9105 }, { "epoch": 0.2447937659545882, "grad_norm": 2.4562532901763916, "learning_rate": 3.827454718779791e-05, "loss": 4.4739, "step": 9110 }, { "epoch": 0.24492812038156658, "grad_norm": 2.3436148166656494, "learning_rate": 3.826773798175133e-05, "loss": 4.5131, "step": 9115 }, { "epoch": 0.24506247480854493, "grad_norm": 2.5559749603271484, "learning_rate": 3.826092877570476e-05, "loss": 4.37, "step": 9120 }, { "epoch": 0.2451968292355233, "grad_norm": 2.1195874214172363, "learning_rate": 3.825411956965818e-05, "loss": 4.5165, "step": 9125 }, { "epoch": 0.24533118366250167, "grad_norm": 2.151759147644043, "learning_rate": 3.82473103636116e-05, "loss": 4.5568, "step": 9130 }, { "epoch": 0.24546553808948005, "grad_norm": 2.2366557121276855, "learning_rate": 3.824050115756503e-05, "loss": 4.5393, "step": 9135 }, { "epoch": 0.24559989251645842, "grad_norm": 2.3647451400756836, "learning_rate": 3.8233691951518454e-05, "loss": 4.4958, "step": 9140 }, { "epoch": 0.2457342469434368, "grad_norm": 2.345599889755249, "learning_rate": 3.822688274547188e-05, "loss": 4.5471, "step": 9145 }, { "epoch": 0.24586860137041516, "grad_norm": 2.45613169670105, "learning_rate": 3.8220073539425305e-05, "loss": 4.439, "step": 9150 }, { "epoch": 0.24600295579739354, "grad_norm": 2.329835891723633, "learning_rate": 3.821326433337873e-05, "loss": 4.6392, "step": 9155 }, { "epoch": 0.24613731022437188, "grad_norm": 2.1473677158355713, "learning_rate": 3.820645512733215e-05, "loss": 4.3028, "step": 9160 }, { "epoch": 0.24627166465135025, "grad_norm": 2.5760326385498047, "learning_rate": 3.819964592128558e-05, "loss": 4.4658, "step": 9165 }, { "epoch": 0.24640601907832863, "grad_norm": 2.2796411514282227, "learning_rate": 3.819283671523901e-05, "loss": 4.544, "step": 9170 }, { "epoch": 0.246540373505307, "grad_norm": 2.3019275665283203, "learning_rate": 3.818602750919243e-05, "loss": 4.4505, "step": 9175 }, { "epoch": 0.24667472793228537, "grad_norm": 2.1995794773101807, "learning_rate": 3.817921830314585e-05, "loss": 4.4372, "step": 9180 }, { "epoch": 0.24680908235926374, "grad_norm": 2.2534918785095215, "learning_rate": 3.817240909709928e-05, "loss": 4.5352, "step": 9185 }, { "epoch": 0.24694343678624212, "grad_norm": 2.312190532684326, "learning_rate": 3.8165599891052704e-05, "loss": 4.4169, "step": 9190 }, { "epoch": 0.2470777912132205, "grad_norm": 2.111131191253662, "learning_rate": 3.8158790685006126e-05, "loss": 4.5093, "step": 9195 }, { "epoch": 0.24721214564019883, "grad_norm": 2.5359935760498047, "learning_rate": 3.8151981478959555e-05, "loss": 4.4724, "step": 9200 }, { "epoch": 0.2473465000671772, "grad_norm": 2.1045002937316895, "learning_rate": 3.8145172272912984e-05, "loss": 4.5626, "step": 9205 }, { "epoch": 0.24748085449415558, "grad_norm": 2.2645952701568604, "learning_rate": 3.8138363066866407e-05, "loss": 4.3804, "step": 9210 }, { "epoch": 0.24761520892113395, "grad_norm": 2.149087905883789, "learning_rate": 3.813155386081983e-05, "loss": 4.4206, "step": 9215 }, { "epoch": 0.24774956334811232, "grad_norm": 2.368006467819214, "learning_rate": 3.812474465477325e-05, "loss": 4.4235, "step": 9220 }, { "epoch": 0.2478839177750907, "grad_norm": 2.2952077388763428, "learning_rate": 3.811793544872668e-05, "loss": 4.4773, "step": 9225 }, { "epoch": 0.24801827220206907, "grad_norm": 2.197417736053467, "learning_rate": 3.811112624268011e-05, "loss": 4.4541, "step": 9230 }, { "epoch": 0.24815262662904744, "grad_norm": 2.4365744590759277, "learning_rate": 3.810431703663353e-05, "loss": 4.3973, "step": 9235 }, { "epoch": 0.24828698105602579, "grad_norm": 2.6713833808898926, "learning_rate": 3.8097507830586954e-05, "loss": 4.5353, "step": 9240 }, { "epoch": 0.24842133548300416, "grad_norm": 2.548422336578369, "learning_rate": 3.809069862454038e-05, "loss": 4.4258, "step": 9245 }, { "epoch": 0.24855568990998253, "grad_norm": 2.3496651649475098, "learning_rate": 3.8083889418493805e-05, "loss": 4.6104, "step": 9250 }, { "epoch": 0.2486900443369609, "grad_norm": 2.1973977088928223, "learning_rate": 3.807708021244723e-05, "loss": 4.4787, "step": 9255 }, { "epoch": 0.24882439876393928, "grad_norm": 2.3225276470184326, "learning_rate": 3.8070271006400656e-05, "loss": 4.4483, "step": 9260 }, { "epoch": 0.24895875319091765, "grad_norm": 2.1749799251556396, "learning_rate": 3.8063461800354085e-05, "loss": 4.5217, "step": 9265 }, { "epoch": 0.24909310761789602, "grad_norm": 2.273516893386841, "learning_rate": 3.805665259430751e-05, "loss": 4.4474, "step": 9270 }, { "epoch": 0.24922746204487436, "grad_norm": 2.4002926349639893, "learning_rate": 3.804984338826093e-05, "loss": 4.5451, "step": 9275 }, { "epoch": 0.24936181647185274, "grad_norm": 2.4176535606384277, "learning_rate": 3.804303418221435e-05, "loss": 4.3978, "step": 9280 }, { "epoch": 0.2494961708988311, "grad_norm": 2.40787410736084, "learning_rate": 3.803622497616778e-05, "loss": 4.474, "step": 9285 }, { "epoch": 0.24963052532580948, "grad_norm": 2.287524461746216, "learning_rate": 3.802941577012121e-05, "loss": 4.5904, "step": 9290 }, { "epoch": 0.24976487975278786, "grad_norm": 2.2771718502044678, "learning_rate": 3.802260656407463e-05, "loss": 4.507, "step": 9295 }, { "epoch": 0.24989923417976623, "grad_norm": 2.3056728839874268, "learning_rate": 3.8015797358028055e-05, "loss": 4.4315, "step": 9300 }, { "epoch": 0.2500335886067446, "grad_norm": 2.362565755844116, "learning_rate": 3.800898815198148e-05, "loss": 4.4862, "step": 9305 }, { "epoch": 0.250167943033723, "grad_norm": 2.3187263011932373, "learning_rate": 3.8002178945934906e-05, "loss": 4.4179, "step": 9310 }, { "epoch": 0.2503022974607013, "grad_norm": 2.4111485481262207, "learning_rate": 3.799536973988833e-05, "loss": 4.4404, "step": 9315 }, { "epoch": 0.2504366518876797, "grad_norm": 2.3228800296783447, "learning_rate": 3.798856053384176e-05, "loss": 4.4668, "step": 9320 }, { "epoch": 0.25057100631465806, "grad_norm": 2.4349305629730225, "learning_rate": 3.798175132779518e-05, "loss": 4.3094, "step": 9325 }, { "epoch": 0.25070536074163646, "grad_norm": 2.2491023540496826, "learning_rate": 3.797494212174861e-05, "loss": 4.5083, "step": 9330 }, { "epoch": 0.2508397151686148, "grad_norm": 2.369739532470703, "learning_rate": 3.796813291570203e-05, "loss": 4.4218, "step": 9335 }, { "epoch": 0.25097406959559315, "grad_norm": 2.3309056758880615, "learning_rate": 3.796132370965545e-05, "loss": 4.4126, "step": 9340 }, { "epoch": 0.25110842402257155, "grad_norm": 2.1860311031341553, "learning_rate": 3.7954514503608876e-05, "loss": 4.4451, "step": 9345 }, { "epoch": 0.2512427784495499, "grad_norm": 2.3741095066070557, "learning_rate": 3.794770529756231e-05, "loss": 4.415, "step": 9350 }, { "epoch": 0.2513771328765283, "grad_norm": 2.463059425354004, "learning_rate": 3.7940896091515734e-05, "loss": 4.4605, "step": 9355 }, { "epoch": 0.25151148730350664, "grad_norm": 2.4005463123321533, "learning_rate": 3.7934086885469156e-05, "loss": 4.5199, "step": 9360 }, { "epoch": 0.25164584173048504, "grad_norm": 2.7171578407287598, "learning_rate": 3.792727767942258e-05, "loss": 4.4095, "step": 9365 }, { "epoch": 0.2517801961574634, "grad_norm": 2.1840732097625732, "learning_rate": 3.792046847337601e-05, "loss": 4.6307, "step": 9370 }, { "epoch": 0.25191455058444173, "grad_norm": 2.632274866104126, "learning_rate": 3.791365926732943e-05, "loss": 4.3953, "step": 9375 }, { "epoch": 0.25204890501142013, "grad_norm": 2.2114365100860596, "learning_rate": 3.790685006128286e-05, "loss": 4.4049, "step": 9380 }, { "epoch": 0.2521832594383985, "grad_norm": 2.2385265827178955, "learning_rate": 3.790004085523628e-05, "loss": 4.4441, "step": 9385 }, { "epoch": 0.2523176138653769, "grad_norm": 2.204002618789673, "learning_rate": 3.789323164918971e-05, "loss": 4.3315, "step": 9390 }, { "epoch": 0.2524519682923552, "grad_norm": 2.837421178817749, "learning_rate": 3.788642244314313e-05, "loss": 4.4636, "step": 9395 }, { "epoch": 0.2525863227193336, "grad_norm": 2.271209716796875, "learning_rate": 3.7879613237096554e-05, "loss": 4.4716, "step": 9400 }, { "epoch": 0.25272067714631197, "grad_norm": 2.3757760524749756, "learning_rate": 3.787280403104998e-05, "loss": 4.2638, "step": 9405 }, { "epoch": 0.25285503157329037, "grad_norm": 2.3093769550323486, "learning_rate": 3.7865994825003406e-05, "loss": 4.3698, "step": 9410 }, { "epoch": 0.2529893860002687, "grad_norm": 2.305525064468384, "learning_rate": 3.7859185618956835e-05, "loss": 4.546, "step": 9415 }, { "epoch": 0.25312374042724706, "grad_norm": 2.623656749725342, "learning_rate": 3.785237641291026e-05, "loss": 4.5084, "step": 9420 }, { "epoch": 0.25325809485422546, "grad_norm": 2.4141461849212646, "learning_rate": 3.784556720686368e-05, "loss": 4.5286, "step": 9425 }, { "epoch": 0.2533924492812038, "grad_norm": 2.3922817707061768, "learning_rate": 3.78387580008171e-05, "loss": 4.3568, "step": 9430 }, { "epoch": 0.2535268037081822, "grad_norm": 2.359942674636841, "learning_rate": 3.783194879477053e-05, "loss": 4.4546, "step": 9435 }, { "epoch": 0.25366115813516055, "grad_norm": 2.224031925201416, "learning_rate": 3.782513958872396e-05, "loss": 4.5532, "step": 9440 }, { "epoch": 0.25379551256213895, "grad_norm": 2.1719138622283936, "learning_rate": 3.781833038267738e-05, "loss": 4.3671, "step": 9445 }, { "epoch": 0.2539298669891173, "grad_norm": 2.376143217086792, "learning_rate": 3.7811521176630804e-05, "loss": 4.4805, "step": 9450 }, { "epoch": 0.25406422141609564, "grad_norm": 2.81508731842041, "learning_rate": 3.780471197058423e-05, "loss": 4.533, "step": 9455 }, { "epoch": 0.25419857584307404, "grad_norm": 2.3779349327087402, "learning_rate": 3.7797902764537656e-05, "loss": 4.4187, "step": 9460 }, { "epoch": 0.2543329302700524, "grad_norm": 2.4163334369659424, "learning_rate": 3.779109355849108e-05, "loss": 4.3931, "step": 9465 }, { "epoch": 0.2544672846970308, "grad_norm": 2.3255393505096436, "learning_rate": 3.778428435244451e-05, "loss": 4.3101, "step": 9470 }, { "epoch": 0.2546016391240091, "grad_norm": 2.352632999420166, "learning_rate": 3.7777475146397936e-05, "loss": 4.4209, "step": 9475 }, { "epoch": 0.2547359935509875, "grad_norm": 2.4712870121002197, "learning_rate": 3.777066594035136e-05, "loss": 4.4008, "step": 9480 }, { "epoch": 0.25487034797796587, "grad_norm": 2.5995113849639893, "learning_rate": 3.776385673430478e-05, "loss": 4.5172, "step": 9485 }, { "epoch": 0.2550047024049442, "grad_norm": 2.4173362255096436, "learning_rate": 3.77570475282582e-05, "loss": 4.3905, "step": 9490 }, { "epoch": 0.2551390568319226, "grad_norm": 2.448518991470337, "learning_rate": 3.775023832221163e-05, "loss": 4.5255, "step": 9495 }, { "epoch": 0.25527341125890096, "grad_norm": 2.4114794731140137, "learning_rate": 3.774342911616506e-05, "loss": 4.4253, "step": 9500 }, { "epoch": 0.25540776568587936, "grad_norm": 2.410508394241333, "learning_rate": 3.773661991011848e-05, "loss": 4.4523, "step": 9505 }, { "epoch": 0.2555421201128577, "grad_norm": 2.3750996589660645, "learning_rate": 3.7729810704071905e-05, "loss": 4.5379, "step": 9510 }, { "epoch": 0.2556764745398361, "grad_norm": 2.520965576171875, "learning_rate": 3.7723001498025334e-05, "loss": 4.4393, "step": 9515 }, { "epoch": 0.25581082896681445, "grad_norm": 2.3848297595977783, "learning_rate": 3.771619229197876e-05, "loss": 4.4583, "step": 9520 }, { "epoch": 0.25594518339379285, "grad_norm": 2.315286874771118, "learning_rate": 3.770938308593218e-05, "loss": 4.5366, "step": 9525 }, { "epoch": 0.2560795378207712, "grad_norm": 2.2986245155334473, "learning_rate": 3.770257387988561e-05, "loss": 4.4422, "step": 9530 }, { "epoch": 0.25621389224774954, "grad_norm": 2.204393148422241, "learning_rate": 3.769576467383904e-05, "loss": 4.4746, "step": 9535 }, { "epoch": 0.25634824667472794, "grad_norm": 2.602311134338379, "learning_rate": 3.768895546779246e-05, "loss": 4.407, "step": 9540 }, { "epoch": 0.2564826011017063, "grad_norm": 2.519596576690674, "learning_rate": 3.768214626174588e-05, "loss": 4.404, "step": 9545 }, { "epoch": 0.2566169555286847, "grad_norm": 2.361661672592163, "learning_rate": 3.7675337055699304e-05, "loss": 4.3837, "step": 9550 }, { "epoch": 0.25675130995566303, "grad_norm": 2.2274861335754395, "learning_rate": 3.766852784965273e-05, "loss": 4.4144, "step": 9555 }, { "epoch": 0.25688566438264143, "grad_norm": 2.3781468868255615, "learning_rate": 3.766171864360616e-05, "loss": 4.4335, "step": 9560 }, { "epoch": 0.2570200188096198, "grad_norm": 2.120197296142578, "learning_rate": 3.7654909437559584e-05, "loss": 4.5055, "step": 9565 }, { "epoch": 0.2571543732365981, "grad_norm": 2.295009136199951, "learning_rate": 3.7648100231513006e-05, "loss": 4.3532, "step": 9570 }, { "epoch": 0.2572887276635765, "grad_norm": 2.354764461517334, "learning_rate": 3.764129102546643e-05, "loss": 4.4306, "step": 9575 }, { "epoch": 0.25742308209055487, "grad_norm": 2.46093487739563, "learning_rate": 3.763448181941986e-05, "loss": 4.4204, "step": 9580 }, { "epoch": 0.25755743651753327, "grad_norm": 2.1906797885894775, "learning_rate": 3.762767261337328e-05, "loss": 4.5042, "step": 9585 }, { "epoch": 0.2576917909445116, "grad_norm": 2.438903570175171, "learning_rate": 3.762086340732671e-05, "loss": 4.49, "step": 9590 }, { "epoch": 0.25782614537149, "grad_norm": 2.0955286026000977, "learning_rate": 3.761405420128013e-05, "loss": 4.418, "step": 9595 }, { "epoch": 0.25796049979846836, "grad_norm": 2.417271852493286, "learning_rate": 3.760724499523356e-05, "loss": 4.4343, "step": 9600 }, { "epoch": 0.2580948542254467, "grad_norm": 2.311737298965454, "learning_rate": 3.760043578918698e-05, "loss": 4.3654, "step": 9605 }, { "epoch": 0.2582292086524251, "grad_norm": 2.292494535446167, "learning_rate": 3.7593626583140405e-05, "loss": 4.5129, "step": 9610 }, { "epoch": 0.25836356307940345, "grad_norm": 2.616590738296509, "learning_rate": 3.758681737709383e-05, "loss": 4.4328, "step": 9615 }, { "epoch": 0.25849791750638185, "grad_norm": 2.3497307300567627, "learning_rate": 3.7580008171047256e-05, "loss": 4.5104, "step": 9620 }, { "epoch": 0.2586322719333602, "grad_norm": 2.2566702365875244, "learning_rate": 3.7573198965000685e-05, "loss": 4.4515, "step": 9625 }, { "epoch": 0.2587666263603386, "grad_norm": 2.3225743770599365, "learning_rate": 3.756638975895411e-05, "loss": 4.4815, "step": 9630 }, { "epoch": 0.25890098078731694, "grad_norm": 1.9311445951461792, "learning_rate": 3.755958055290753e-05, "loss": 4.4402, "step": 9635 }, { "epoch": 0.25903533521429534, "grad_norm": 2.3557026386260986, "learning_rate": 3.755277134686096e-05, "loss": 4.4655, "step": 9640 }, { "epoch": 0.2591696896412737, "grad_norm": 2.4471280574798584, "learning_rate": 3.754596214081438e-05, "loss": 4.4554, "step": 9645 }, { "epoch": 0.259304044068252, "grad_norm": 2.389927864074707, "learning_rate": 3.753915293476781e-05, "loss": 4.4767, "step": 9650 }, { "epoch": 0.2594383984952304, "grad_norm": 2.284169912338257, "learning_rate": 3.753234372872123e-05, "loss": 4.3781, "step": 9655 }, { "epoch": 0.25957275292220877, "grad_norm": 2.5234127044677734, "learning_rate": 3.752553452267466e-05, "loss": 4.4721, "step": 9660 }, { "epoch": 0.25970710734918717, "grad_norm": 2.4971258640289307, "learning_rate": 3.7518725316628084e-05, "loss": 4.4469, "step": 9665 }, { "epoch": 0.2598414617761655, "grad_norm": 2.0405125617980957, "learning_rate": 3.7511916110581506e-05, "loss": 4.3943, "step": 9670 }, { "epoch": 0.2599758162031439, "grad_norm": 2.242678165435791, "learning_rate": 3.750510690453493e-05, "loss": 4.4172, "step": 9675 }, { "epoch": 0.26011017063012226, "grad_norm": 2.285696506500244, "learning_rate": 3.749829769848836e-05, "loss": 4.3642, "step": 9680 }, { "epoch": 0.2602445250571006, "grad_norm": 2.272531509399414, "learning_rate": 3.7491488492441786e-05, "loss": 4.4344, "step": 9685 }, { "epoch": 0.260378879484079, "grad_norm": 2.4874634742736816, "learning_rate": 3.748467928639521e-05, "loss": 4.5439, "step": 9690 }, { "epoch": 0.26051323391105735, "grad_norm": 2.549420118331909, "learning_rate": 3.747787008034863e-05, "loss": 4.5267, "step": 9695 }, { "epoch": 0.26064758833803575, "grad_norm": 2.2837493419647217, "learning_rate": 3.747106087430206e-05, "loss": 4.319, "step": 9700 }, { "epoch": 0.2607819427650141, "grad_norm": 2.5838429927825928, "learning_rate": 3.746425166825548e-05, "loss": 4.4437, "step": 9705 }, { "epoch": 0.2609162971919925, "grad_norm": 2.4651379585266113, "learning_rate": 3.745744246220891e-05, "loss": 4.4631, "step": 9710 }, { "epoch": 0.26105065161897084, "grad_norm": 2.6954050064086914, "learning_rate": 3.7450633256162334e-05, "loss": 4.5162, "step": 9715 }, { "epoch": 0.26118500604594924, "grad_norm": 2.3330328464508057, "learning_rate": 3.7443824050115756e-05, "loss": 4.4874, "step": 9720 }, { "epoch": 0.2613193604729276, "grad_norm": 2.405008554458618, "learning_rate": 3.7437014844069185e-05, "loss": 4.483, "step": 9725 }, { "epoch": 0.26145371489990593, "grad_norm": 2.2013437747955322, "learning_rate": 3.743020563802261e-05, "loss": 4.4779, "step": 9730 }, { "epoch": 0.26158806932688433, "grad_norm": 2.2661209106445312, "learning_rate": 3.742339643197603e-05, "loss": 4.4922, "step": 9735 }, { "epoch": 0.2617224237538627, "grad_norm": 2.4545578956604004, "learning_rate": 3.741658722592946e-05, "loss": 4.5286, "step": 9740 }, { "epoch": 0.2618567781808411, "grad_norm": 2.7013602256774902, "learning_rate": 3.740977801988289e-05, "loss": 4.491, "step": 9745 }, { "epoch": 0.2619911326078194, "grad_norm": 2.254404067993164, "learning_rate": 3.740296881383631e-05, "loss": 4.4938, "step": 9750 }, { "epoch": 0.2621254870347978, "grad_norm": 2.3829667568206787, "learning_rate": 3.739615960778973e-05, "loss": 4.4186, "step": 9755 }, { "epoch": 0.26225984146177617, "grad_norm": 2.2982051372528076, "learning_rate": 3.7389350401743154e-05, "loss": 4.5197, "step": 9760 }, { "epoch": 0.2623941958887545, "grad_norm": 2.291553497314453, "learning_rate": 3.738254119569658e-05, "loss": 4.3647, "step": 9765 }, { "epoch": 0.2625285503157329, "grad_norm": 2.597904682159424, "learning_rate": 3.737573198965001e-05, "loss": 4.4534, "step": 9770 }, { "epoch": 0.26266290474271126, "grad_norm": 2.16689133644104, "learning_rate": 3.7368922783603435e-05, "loss": 4.4744, "step": 9775 }, { "epoch": 0.26279725916968966, "grad_norm": 2.276440382003784, "learning_rate": 3.736211357755686e-05, "loss": 4.47, "step": 9780 }, { "epoch": 0.262931613596668, "grad_norm": 2.2739968299865723, "learning_rate": 3.7355304371510286e-05, "loss": 4.4552, "step": 9785 }, { "epoch": 0.2630659680236464, "grad_norm": 2.153620481491089, "learning_rate": 3.734849516546371e-05, "loss": 4.6209, "step": 9790 }, { "epoch": 0.26320032245062475, "grad_norm": 2.309554100036621, "learning_rate": 3.734168595941713e-05, "loss": 4.4911, "step": 9795 }, { "epoch": 0.2633346768776031, "grad_norm": 2.1431453227996826, "learning_rate": 3.733487675337056e-05, "loss": 4.38, "step": 9800 }, { "epoch": 0.2634690313045815, "grad_norm": 2.358398914337158, "learning_rate": 3.732806754732399e-05, "loss": 4.5943, "step": 9805 }, { "epoch": 0.26360338573155984, "grad_norm": 2.278433322906494, "learning_rate": 3.732125834127741e-05, "loss": 4.4525, "step": 9810 }, { "epoch": 0.26373774015853824, "grad_norm": 2.511446475982666, "learning_rate": 3.731444913523083e-05, "loss": 4.3777, "step": 9815 }, { "epoch": 0.2638720945855166, "grad_norm": 2.6643190383911133, "learning_rate": 3.7307639929184255e-05, "loss": 4.644, "step": 9820 }, { "epoch": 0.264006449012495, "grad_norm": 2.4004883766174316, "learning_rate": 3.7300830723137684e-05, "loss": 4.4165, "step": 9825 }, { "epoch": 0.2641408034394733, "grad_norm": 2.514808416366577, "learning_rate": 3.729402151709111e-05, "loss": 4.436, "step": 9830 }, { "epoch": 0.2642751578664517, "grad_norm": 2.4849557876586914, "learning_rate": 3.7287212311044536e-05, "loss": 4.5066, "step": 9835 }, { "epoch": 0.26440951229343007, "grad_norm": 2.407912492752075, "learning_rate": 3.728040310499796e-05, "loss": 4.4581, "step": 9840 }, { "epoch": 0.2645438667204084, "grad_norm": 2.3319485187530518, "learning_rate": 3.727359389895139e-05, "loss": 4.5831, "step": 9845 }, { "epoch": 0.2646782211473868, "grad_norm": 2.386629819869995, "learning_rate": 3.726678469290481e-05, "loss": 4.4346, "step": 9850 }, { "epoch": 0.26481257557436516, "grad_norm": 2.3634583950042725, "learning_rate": 3.725997548685823e-05, "loss": 4.3664, "step": 9855 }, { "epoch": 0.26494693000134356, "grad_norm": 2.2978546619415283, "learning_rate": 3.725316628081166e-05, "loss": 4.3566, "step": 9860 }, { "epoch": 0.2650812844283219, "grad_norm": 2.196061134338379, "learning_rate": 3.724635707476508e-05, "loss": 4.2927, "step": 9865 }, { "epoch": 0.2652156388553003, "grad_norm": 2.523641347885132, "learning_rate": 3.723954786871851e-05, "loss": 4.3428, "step": 9870 }, { "epoch": 0.26534999328227865, "grad_norm": 2.3317599296569824, "learning_rate": 3.7232738662671934e-05, "loss": 4.4409, "step": 9875 }, { "epoch": 0.265484347709257, "grad_norm": 2.573763132095337, "learning_rate": 3.7225929456625357e-05, "loss": 4.2988, "step": 9880 }, { "epoch": 0.2656187021362354, "grad_norm": 2.4619436264038086, "learning_rate": 3.721912025057878e-05, "loss": 4.284, "step": 9885 }, { "epoch": 0.26575305656321374, "grad_norm": 2.335601568222046, "learning_rate": 3.721231104453221e-05, "loss": 4.5262, "step": 9890 }, { "epoch": 0.26588741099019214, "grad_norm": 2.5007078647613525, "learning_rate": 3.720550183848564e-05, "loss": 4.4662, "step": 9895 }, { "epoch": 0.2660217654171705, "grad_norm": 2.466279983520508, "learning_rate": 3.719869263243906e-05, "loss": 4.4318, "step": 9900 }, { "epoch": 0.2661561198441489, "grad_norm": 2.2760276794433594, "learning_rate": 3.719188342639248e-05, "loss": 4.4646, "step": 9905 }, { "epoch": 0.26629047427112723, "grad_norm": 2.3255913257598877, "learning_rate": 3.718507422034591e-05, "loss": 4.4533, "step": 9910 }, { "epoch": 0.2664248286981056, "grad_norm": 2.167842149734497, "learning_rate": 3.717826501429933e-05, "loss": 4.3864, "step": 9915 }, { "epoch": 0.266559183125084, "grad_norm": 2.4722235202789307, "learning_rate": 3.717145580825276e-05, "loss": 4.3986, "step": 9920 }, { "epoch": 0.2666935375520623, "grad_norm": 2.270608425140381, "learning_rate": 3.7164646602206184e-05, "loss": 4.4628, "step": 9925 }, { "epoch": 0.2668278919790407, "grad_norm": 2.0205249786376953, "learning_rate": 3.715783739615961e-05, "loss": 4.3178, "step": 9930 }, { "epoch": 0.26696224640601907, "grad_norm": 2.2083804607391357, "learning_rate": 3.7151028190113035e-05, "loss": 4.3497, "step": 9935 }, { "epoch": 0.26709660083299747, "grad_norm": 2.111360549926758, "learning_rate": 3.714421898406646e-05, "loss": 4.5364, "step": 9940 }, { "epoch": 0.2672309552599758, "grad_norm": 2.542346954345703, "learning_rate": 3.713740977801988e-05, "loss": 4.5795, "step": 9945 }, { "epoch": 0.2673653096869542, "grad_norm": 2.4858274459838867, "learning_rate": 3.713060057197331e-05, "loss": 4.4883, "step": 9950 }, { "epoch": 0.26749966411393256, "grad_norm": 2.094383955001831, "learning_rate": 3.712379136592674e-05, "loss": 4.4766, "step": 9955 }, { "epoch": 0.2676340185409109, "grad_norm": 2.312220811843872, "learning_rate": 3.711698215988016e-05, "loss": 4.4426, "step": 9960 }, { "epoch": 0.2677683729678893, "grad_norm": 2.50146746635437, "learning_rate": 3.711017295383358e-05, "loss": 4.4527, "step": 9965 }, { "epoch": 0.26790272739486765, "grad_norm": 2.3963401317596436, "learning_rate": 3.710336374778701e-05, "loss": 4.4038, "step": 9970 }, { "epoch": 0.26803708182184605, "grad_norm": 2.8256218433380127, "learning_rate": 3.7096554541740434e-05, "loss": 4.4912, "step": 9975 }, { "epoch": 0.2681714362488244, "grad_norm": 2.066718101501465, "learning_rate": 3.708974533569386e-05, "loss": 4.3603, "step": 9980 }, { "epoch": 0.2683057906758028, "grad_norm": 2.409860610961914, "learning_rate": 3.7082936129647285e-05, "loss": 4.5533, "step": 9985 }, { "epoch": 0.26844014510278114, "grad_norm": 2.360959768295288, "learning_rate": 3.7076126923600714e-05, "loss": 4.498, "step": 9990 }, { "epoch": 0.2685744995297595, "grad_norm": 2.37809681892395, "learning_rate": 3.7069317717554137e-05, "loss": 4.4151, "step": 9995 }, { "epoch": 0.2687088539567379, "grad_norm": 2.400447130203247, "learning_rate": 3.706250851150756e-05, "loss": 4.3155, "step": 10000 }, { "epoch": 0.2688432083837162, "grad_norm": 2.4247653484344482, "learning_rate": 3.705569930546098e-05, "loss": 4.3683, "step": 10005 }, { "epoch": 0.2689775628106946, "grad_norm": 2.2654831409454346, "learning_rate": 3.704889009941441e-05, "loss": 4.4586, "step": 10010 }, { "epoch": 0.26911191723767297, "grad_norm": 2.1549532413482666, "learning_rate": 3.704208089336784e-05, "loss": 4.4431, "step": 10015 }, { "epoch": 0.26924627166465137, "grad_norm": 2.375157117843628, "learning_rate": 3.703527168732126e-05, "loss": 4.4134, "step": 10020 }, { "epoch": 0.2693806260916297, "grad_norm": 2.561429977416992, "learning_rate": 3.7028462481274684e-05, "loss": 4.4236, "step": 10025 }, { "epoch": 0.26951498051860806, "grad_norm": 2.3778979778289795, "learning_rate": 3.7021653275228106e-05, "loss": 4.5557, "step": 10030 }, { "epoch": 0.26964933494558646, "grad_norm": 2.3131585121154785, "learning_rate": 3.7014844069181535e-05, "loss": 4.5653, "step": 10035 }, { "epoch": 0.2697836893725648, "grad_norm": 2.4765501022338867, "learning_rate": 3.700803486313496e-05, "loss": 4.4802, "step": 10040 }, { "epoch": 0.2699180437995432, "grad_norm": 2.3807547092437744, "learning_rate": 3.7001225657088386e-05, "loss": 4.4466, "step": 10045 }, { "epoch": 0.27005239822652155, "grad_norm": 2.347968816757202, "learning_rate": 3.699441645104181e-05, "loss": 4.3955, "step": 10050 }, { "epoch": 0.27018675265349995, "grad_norm": 2.708430528640747, "learning_rate": 3.698760724499524e-05, "loss": 4.5, "step": 10055 }, { "epoch": 0.2703211070804783, "grad_norm": 2.138028621673584, "learning_rate": 3.698079803894866e-05, "loss": 4.2832, "step": 10060 }, { "epoch": 0.2704554615074567, "grad_norm": 2.2226572036743164, "learning_rate": 3.697398883290208e-05, "loss": 4.4274, "step": 10065 }, { "epoch": 0.27058981593443504, "grad_norm": 2.441422939300537, "learning_rate": 3.696717962685551e-05, "loss": 4.5429, "step": 10070 }, { "epoch": 0.2707241703614134, "grad_norm": 2.2961292266845703, "learning_rate": 3.696037042080894e-05, "loss": 4.4615, "step": 10075 }, { "epoch": 0.2708585247883918, "grad_norm": 2.362414598464966, "learning_rate": 3.695356121476236e-05, "loss": 4.3932, "step": 10080 }, { "epoch": 0.27099287921537013, "grad_norm": 2.345384120941162, "learning_rate": 3.6946752008715785e-05, "loss": 4.4961, "step": 10085 }, { "epoch": 0.27112723364234853, "grad_norm": 2.411900758743286, "learning_rate": 3.693994280266921e-05, "loss": 4.4627, "step": 10090 }, { "epoch": 0.2712615880693269, "grad_norm": 2.3504819869995117, "learning_rate": 3.6933133596622636e-05, "loss": 4.3472, "step": 10095 }, { "epoch": 0.2713959424963053, "grad_norm": 2.5856263637542725, "learning_rate": 3.692632439057606e-05, "loss": 4.3207, "step": 10100 }, { "epoch": 0.2715302969232836, "grad_norm": 2.3424105644226074, "learning_rate": 3.691951518452949e-05, "loss": 4.5254, "step": 10105 }, { "epoch": 0.27166465135026197, "grad_norm": 2.2347137928009033, "learning_rate": 3.691270597848291e-05, "loss": 4.2565, "step": 10110 }, { "epoch": 0.27179900577724037, "grad_norm": 2.503782033920288, "learning_rate": 3.690589677243634e-05, "loss": 4.5038, "step": 10115 }, { "epoch": 0.2719333602042187, "grad_norm": 2.5684449672698975, "learning_rate": 3.689908756638976e-05, "loss": 4.3585, "step": 10120 }, { "epoch": 0.2720677146311971, "grad_norm": 2.4398772716522217, "learning_rate": 3.689227836034318e-05, "loss": 4.4603, "step": 10125 }, { "epoch": 0.27220206905817546, "grad_norm": 2.327562093734741, "learning_rate": 3.688546915429661e-05, "loss": 4.3809, "step": 10130 }, { "epoch": 0.27233642348515386, "grad_norm": 2.2435462474823, "learning_rate": 3.687865994825004e-05, "loss": 4.509, "step": 10135 }, { "epoch": 0.2724707779121322, "grad_norm": 2.418958902359009, "learning_rate": 3.6871850742203464e-05, "loss": 4.4447, "step": 10140 }, { "epoch": 0.2726051323391106, "grad_norm": 2.468705177307129, "learning_rate": 3.6865041536156886e-05, "loss": 4.5599, "step": 10145 }, { "epoch": 0.27273948676608895, "grad_norm": 2.3407859802246094, "learning_rate": 3.685823233011031e-05, "loss": 4.3735, "step": 10150 }, { "epoch": 0.2728738411930673, "grad_norm": 2.11291241645813, "learning_rate": 3.685142312406374e-05, "loss": 4.3764, "step": 10155 }, { "epoch": 0.2730081956200457, "grad_norm": 2.2554802894592285, "learning_rate": 3.684461391801716e-05, "loss": 4.5011, "step": 10160 }, { "epoch": 0.27314255004702404, "grad_norm": 2.3598947525024414, "learning_rate": 3.683780471197059e-05, "loss": 4.3916, "step": 10165 }, { "epoch": 0.27327690447400244, "grad_norm": 2.562696695327759, "learning_rate": 3.683099550592401e-05, "loss": 4.49, "step": 10170 }, { "epoch": 0.2734112589009808, "grad_norm": 2.5074269771575928, "learning_rate": 3.682418629987743e-05, "loss": 4.4121, "step": 10175 }, { "epoch": 0.2735456133279592, "grad_norm": 2.2123053073883057, "learning_rate": 3.681737709383086e-05, "loss": 4.5272, "step": 10180 }, { "epoch": 0.2736799677549375, "grad_norm": 2.396061658859253, "learning_rate": 3.6810567887784284e-05, "loss": 4.477, "step": 10185 }, { "epoch": 0.27381432218191587, "grad_norm": 2.271411418914795, "learning_rate": 3.680375868173771e-05, "loss": 4.4562, "step": 10190 }, { "epoch": 0.27394867660889427, "grad_norm": 2.235460042953491, "learning_rate": 3.6796949475691136e-05, "loss": 4.3722, "step": 10195 }, { "epoch": 0.2740830310358726, "grad_norm": 2.320960760116577, "learning_rate": 3.6790140269644565e-05, "loss": 4.4411, "step": 10200 }, { "epoch": 0.274217385462851, "grad_norm": 2.3031654357910156, "learning_rate": 3.678333106359799e-05, "loss": 4.3898, "step": 10205 }, { "epoch": 0.27435173988982936, "grad_norm": 2.1843700408935547, "learning_rate": 3.677652185755141e-05, "loss": 4.4716, "step": 10210 }, { "epoch": 0.27448609431680776, "grad_norm": 2.8004093170166016, "learning_rate": 3.676971265150483e-05, "loss": 4.4639, "step": 10215 }, { "epoch": 0.2746204487437861, "grad_norm": 2.3705034255981445, "learning_rate": 3.676290344545826e-05, "loss": 4.4949, "step": 10220 }, { "epoch": 0.27475480317076445, "grad_norm": 2.4161531925201416, "learning_rate": 3.675609423941169e-05, "loss": 4.4946, "step": 10225 }, { "epoch": 0.27488915759774285, "grad_norm": 2.491004228591919, "learning_rate": 3.674928503336511e-05, "loss": 4.5377, "step": 10230 }, { "epoch": 0.2750235120247212, "grad_norm": 2.0976486206054688, "learning_rate": 3.6742475827318534e-05, "loss": 4.4516, "step": 10235 }, { "epoch": 0.2751578664516996, "grad_norm": 2.4337034225463867, "learning_rate": 3.673566662127196e-05, "loss": 4.6469, "step": 10240 }, { "epoch": 0.27529222087867794, "grad_norm": 2.5322659015655518, "learning_rate": 3.6728857415225385e-05, "loss": 4.3459, "step": 10245 }, { "epoch": 0.27542657530565634, "grad_norm": 2.3156890869140625, "learning_rate": 3.672204820917881e-05, "loss": 4.4699, "step": 10250 }, { "epoch": 0.2755609297326347, "grad_norm": 2.657050609588623, "learning_rate": 3.671523900313224e-05, "loss": 4.4103, "step": 10255 }, { "epoch": 0.2756952841596131, "grad_norm": 2.39984130859375, "learning_rate": 3.6708429797085666e-05, "loss": 4.3438, "step": 10260 }, { "epoch": 0.27582963858659143, "grad_norm": 2.0272397994995117, "learning_rate": 3.670162059103909e-05, "loss": 4.3583, "step": 10265 }, { "epoch": 0.2759639930135698, "grad_norm": 2.1731221675872803, "learning_rate": 3.669481138499251e-05, "loss": 4.4528, "step": 10270 }, { "epoch": 0.2760983474405482, "grad_norm": 2.176909923553467, "learning_rate": 3.668800217894593e-05, "loss": 4.531, "step": 10275 }, { "epoch": 0.2762327018675265, "grad_norm": 2.547961473464966, "learning_rate": 3.668119297289936e-05, "loss": 4.346, "step": 10280 }, { "epoch": 0.2763670562945049, "grad_norm": 2.530648708343506, "learning_rate": 3.667438376685279e-05, "loss": 4.4228, "step": 10285 }, { "epoch": 0.27650141072148327, "grad_norm": 2.6035149097442627, "learning_rate": 3.666757456080621e-05, "loss": 4.3907, "step": 10290 }, { "epoch": 0.27663576514846167, "grad_norm": 2.4263744354248047, "learning_rate": 3.6660765354759635e-05, "loss": 4.403, "step": 10295 }, { "epoch": 0.27677011957544, "grad_norm": 2.1926891803741455, "learning_rate": 3.6653956148713064e-05, "loss": 4.5117, "step": 10300 }, { "epoch": 0.27690447400241835, "grad_norm": 2.2880146503448486, "learning_rate": 3.6647146942666487e-05, "loss": 4.4092, "step": 10305 }, { "epoch": 0.27703882842939676, "grad_norm": 2.328742742538452, "learning_rate": 3.664033773661991e-05, "loss": 4.4167, "step": 10310 }, { "epoch": 0.2771731828563751, "grad_norm": 2.5610458850860596, "learning_rate": 3.663352853057334e-05, "loss": 4.3274, "step": 10315 }, { "epoch": 0.2773075372833535, "grad_norm": 2.256901741027832, "learning_rate": 3.662671932452676e-05, "loss": 4.4294, "step": 10320 }, { "epoch": 0.27744189171033184, "grad_norm": 2.2029168605804443, "learning_rate": 3.661991011848019e-05, "loss": 4.3384, "step": 10325 }, { "epoch": 0.27757624613731025, "grad_norm": 2.049301862716675, "learning_rate": 3.661310091243361e-05, "loss": 4.4614, "step": 10330 }, { "epoch": 0.2777106005642886, "grad_norm": 2.651298999786377, "learning_rate": 3.6606291706387034e-05, "loss": 4.5494, "step": 10335 }, { "epoch": 0.27784495499126693, "grad_norm": 2.457087993621826, "learning_rate": 3.659948250034046e-05, "loss": 4.4691, "step": 10340 }, { "epoch": 0.27797930941824534, "grad_norm": 2.158074378967285, "learning_rate": 3.659267329429389e-05, "loss": 4.5279, "step": 10345 }, { "epoch": 0.2781136638452237, "grad_norm": 2.334291458129883, "learning_rate": 3.6585864088247314e-05, "loss": 4.4637, "step": 10350 }, { "epoch": 0.2782480182722021, "grad_norm": 2.2675631046295166, "learning_rate": 3.6579054882200736e-05, "loss": 4.4725, "step": 10355 }, { "epoch": 0.2783823726991804, "grad_norm": 3.0601279735565186, "learning_rate": 3.657224567615416e-05, "loss": 4.265, "step": 10360 }, { "epoch": 0.2785167271261588, "grad_norm": 2.5093634128570557, "learning_rate": 3.656543647010759e-05, "loss": 4.3841, "step": 10365 }, { "epoch": 0.27865108155313717, "grad_norm": 2.434187650680542, "learning_rate": 3.655862726406101e-05, "loss": 4.4422, "step": 10370 }, { "epoch": 0.27878543598011557, "grad_norm": 3.026028871536255, "learning_rate": 3.655181805801444e-05, "loss": 4.3557, "step": 10375 }, { "epoch": 0.2789197904070939, "grad_norm": 2.350429058074951, "learning_rate": 3.654500885196786e-05, "loss": 4.401, "step": 10380 }, { "epoch": 0.27905414483407226, "grad_norm": 2.2864737510681152, "learning_rate": 3.653819964592129e-05, "loss": 4.5293, "step": 10385 }, { "epoch": 0.27918849926105066, "grad_norm": 2.4145596027374268, "learning_rate": 3.653139043987471e-05, "loss": 4.4275, "step": 10390 }, { "epoch": 0.279322853688029, "grad_norm": 2.5522289276123047, "learning_rate": 3.6524581233828135e-05, "loss": 4.5022, "step": 10395 }, { "epoch": 0.2794572081150074, "grad_norm": 2.3700194358825684, "learning_rate": 3.651777202778156e-05, "loss": 4.3757, "step": 10400 }, { "epoch": 0.27959156254198575, "grad_norm": 2.554453134536743, "learning_rate": 3.651096282173499e-05, "loss": 4.3841, "step": 10405 }, { "epoch": 0.27972591696896415, "grad_norm": 2.4297685623168945, "learning_rate": 3.6504153615688415e-05, "loss": 4.4564, "step": 10410 }, { "epoch": 0.2798602713959425, "grad_norm": 2.2345170974731445, "learning_rate": 3.649734440964184e-05, "loss": 4.5281, "step": 10415 }, { "epoch": 0.27999462582292084, "grad_norm": 2.3470749855041504, "learning_rate": 3.649053520359526e-05, "loss": 4.4515, "step": 10420 }, { "epoch": 0.28012898024989924, "grad_norm": 2.357478618621826, "learning_rate": 3.648372599754869e-05, "loss": 4.4801, "step": 10425 }, { "epoch": 0.2802633346768776, "grad_norm": 2.428985118865967, "learning_rate": 3.647691679150211e-05, "loss": 4.4497, "step": 10430 }, { "epoch": 0.280397689103856, "grad_norm": 2.398602247238159, "learning_rate": 3.647010758545554e-05, "loss": 4.3434, "step": 10435 }, { "epoch": 0.28053204353083433, "grad_norm": 2.3062753677368164, "learning_rate": 3.646329837940896e-05, "loss": 4.4495, "step": 10440 }, { "epoch": 0.28066639795781273, "grad_norm": 2.2553212642669678, "learning_rate": 3.645648917336239e-05, "loss": 4.4459, "step": 10445 }, { "epoch": 0.2808007523847911, "grad_norm": 2.2880945205688477, "learning_rate": 3.6449679967315814e-05, "loss": 4.4541, "step": 10450 }, { "epoch": 0.2809351068117695, "grad_norm": 2.3593807220458984, "learning_rate": 3.6442870761269236e-05, "loss": 4.4576, "step": 10455 }, { "epoch": 0.2810694612387478, "grad_norm": 2.469602108001709, "learning_rate": 3.643606155522266e-05, "loss": 4.4915, "step": 10460 }, { "epoch": 0.28120381566572616, "grad_norm": 2.403080701828003, "learning_rate": 3.642925234917609e-05, "loss": 4.5021, "step": 10465 }, { "epoch": 0.28133817009270456, "grad_norm": 2.3165926933288574, "learning_rate": 3.6422443143129516e-05, "loss": 4.4755, "step": 10470 }, { "epoch": 0.2814725245196829, "grad_norm": 2.2108871936798096, "learning_rate": 3.641563393708294e-05, "loss": 4.4574, "step": 10475 }, { "epoch": 0.2816068789466613, "grad_norm": 2.4958202838897705, "learning_rate": 3.640882473103636e-05, "loss": 4.565, "step": 10480 }, { "epoch": 0.28174123337363965, "grad_norm": 2.2051994800567627, "learning_rate": 3.640201552498978e-05, "loss": 4.3889, "step": 10485 }, { "epoch": 0.28187558780061805, "grad_norm": 2.220006227493286, "learning_rate": 3.639520631894321e-05, "loss": 4.3849, "step": 10490 }, { "epoch": 0.2820099422275964, "grad_norm": 2.4894070625305176, "learning_rate": 3.638839711289664e-05, "loss": 4.4398, "step": 10495 }, { "epoch": 0.28214429665457474, "grad_norm": 2.513720750808716, "learning_rate": 3.6381587906850064e-05, "loss": 4.4799, "step": 10500 }, { "epoch": 0.28227865108155314, "grad_norm": 2.4000284671783447, "learning_rate": 3.6374778700803486e-05, "loss": 4.5191, "step": 10505 }, { "epoch": 0.2824130055085315, "grad_norm": 2.2847397327423096, "learning_rate": 3.6367969494756915e-05, "loss": 4.4295, "step": 10510 }, { "epoch": 0.2825473599355099, "grad_norm": 2.3887035846710205, "learning_rate": 3.636116028871034e-05, "loss": 4.4475, "step": 10515 }, { "epoch": 0.28268171436248823, "grad_norm": 2.2457971572875977, "learning_rate": 3.635435108266376e-05, "loss": 4.4302, "step": 10520 }, { "epoch": 0.28281606878946663, "grad_norm": 2.4918901920318604, "learning_rate": 3.634754187661719e-05, "loss": 4.4958, "step": 10525 }, { "epoch": 0.282950423216445, "grad_norm": 2.3126444816589355, "learning_rate": 3.634073267057062e-05, "loss": 4.4957, "step": 10530 }, { "epoch": 0.2830847776434233, "grad_norm": 2.216681957244873, "learning_rate": 3.633392346452404e-05, "loss": 4.484, "step": 10535 }, { "epoch": 0.2832191320704017, "grad_norm": 2.374052047729492, "learning_rate": 3.632711425847746e-05, "loss": 4.3977, "step": 10540 }, { "epoch": 0.28335348649738007, "grad_norm": 2.356959342956543, "learning_rate": 3.6320305052430884e-05, "loss": 4.4048, "step": 10545 }, { "epoch": 0.28348784092435847, "grad_norm": 2.2532029151916504, "learning_rate": 3.631349584638431e-05, "loss": 4.3279, "step": 10550 }, { "epoch": 0.2836221953513368, "grad_norm": 2.3313169479370117, "learning_rate": 3.630668664033774e-05, "loss": 4.4452, "step": 10555 }, { "epoch": 0.2837565497783152, "grad_norm": 2.250466823577881, "learning_rate": 3.6299877434291165e-05, "loss": 4.4416, "step": 10560 }, { "epoch": 0.28389090420529356, "grad_norm": 2.3865766525268555, "learning_rate": 3.629306822824459e-05, "loss": 4.357, "step": 10565 }, { "epoch": 0.28402525863227196, "grad_norm": 2.56585955619812, "learning_rate": 3.6286259022198016e-05, "loss": 4.4071, "step": 10570 }, { "epoch": 0.2841596130592503, "grad_norm": 2.4112820625305176, "learning_rate": 3.627944981615144e-05, "loss": 4.496, "step": 10575 }, { "epoch": 0.28429396748622865, "grad_norm": 2.4157190322875977, "learning_rate": 3.627264061010486e-05, "loss": 4.4601, "step": 10580 }, { "epoch": 0.28442832191320705, "grad_norm": 2.5503697395324707, "learning_rate": 3.626583140405829e-05, "loss": 4.3415, "step": 10585 }, { "epoch": 0.2845626763401854, "grad_norm": 2.1146814823150635, "learning_rate": 3.625902219801172e-05, "loss": 4.3361, "step": 10590 }, { "epoch": 0.2846970307671638, "grad_norm": 2.23808217048645, "learning_rate": 3.625221299196514e-05, "loss": 4.4614, "step": 10595 }, { "epoch": 0.28483138519414214, "grad_norm": 2.391080379486084, "learning_rate": 3.624540378591856e-05, "loss": 4.3429, "step": 10600 }, { "epoch": 0.28496573962112054, "grad_norm": 2.510503053665161, "learning_rate": 3.6238594579871985e-05, "loss": 4.4833, "step": 10605 }, { "epoch": 0.2851000940480989, "grad_norm": 2.296050786972046, "learning_rate": 3.623178537382541e-05, "loss": 4.4417, "step": 10610 }, { "epoch": 0.28523444847507723, "grad_norm": 2.3357465267181396, "learning_rate": 3.6224976167778843e-05, "loss": 4.4396, "step": 10615 }, { "epoch": 0.28536880290205563, "grad_norm": 2.3391647338867188, "learning_rate": 3.6218166961732266e-05, "loss": 4.4079, "step": 10620 }, { "epoch": 0.285503157329034, "grad_norm": 2.3151395320892334, "learning_rate": 3.621135775568569e-05, "loss": 4.4692, "step": 10625 }, { "epoch": 0.2856375117560124, "grad_norm": 2.2852585315704346, "learning_rate": 3.620454854963911e-05, "loss": 4.4648, "step": 10630 }, { "epoch": 0.2857718661829907, "grad_norm": 2.4205870628356934, "learning_rate": 3.619773934359254e-05, "loss": 4.3853, "step": 10635 }, { "epoch": 0.2859062206099691, "grad_norm": 2.181964635848999, "learning_rate": 3.619093013754596e-05, "loss": 4.4435, "step": 10640 }, { "epoch": 0.28604057503694746, "grad_norm": 2.2425522804260254, "learning_rate": 3.618412093149939e-05, "loss": 4.3077, "step": 10645 }, { "epoch": 0.2861749294639258, "grad_norm": 2.562311887741089, "learning_rate": 3.617731172545281e-05, "loss": 4.3823, "step": 10650 }, { "epoch": 0.2863092838909042, "grad_norm": 2.3054237365722656, "learning_rate": 3.617050251940624e-05, "loss": 4.3243, "step": 10655 }, { "epoch": 0.28644363831788255, "grad_norm": 2.220569133758545, "learning_rate": 3.6163693313359664e-05, "loss": 4.3406, "step": 10660 }, { "epoch": 0.28657799274486095, "grad_norm": 2.321383476257324, "learning_rate": 3.6156884107313087e-05, "loss": 4.3767, "step": 10665 }, { "epoch": 0.2867123471718393, "grad_norm": 2.3288893699645996, "learning_rate": 3.615007490126651e-05, "loss": 4.4126, "step": 10670 }, { "epoch": 0.2868467015988177, "grad_norm": 2.47627329826355, "learning_rate": 3.614326569521994e-05, "loss": 4.3013, "step": 10675 }, { "epoch": 0.28698105602579604, "grad_norm": 2.422590732574463, "learning_rate": 3.613645648917337e-05, "loss": 4.4255, "step": 10680 }, { "epoch": 0.28711541045277444, "grad_norm": 2.144662857055664, "learning_rate": 3.612964728312679e-05, "loss": 4.4167, "step": 10685 }, { "epoch": 0.2872497648797528, "grad_norm": 2.419856309890747, "learning_rate": 3.612283807708021e-05, "loss": 4.3795, "step": 10690 }, { "epoch": 0.28738411930673113, "grad_norm": 2.5130300521850586, "learning_rate": 3.611602887103364e-05, "loss": 4.3167, "step": 10695 }, { "epoch": 0.28751847373370953, "grad_norm": 2.4698777198791504, "learning_rate": 3.610921966498706e-05, "loss": 4.3989, "step": 10700 }, { "epoch": 0.2876528281606879, "grad_norm": 2.261470079421997, "learning_rate": 3.610241045894049e-05, "loss": 4.4147, "step": 10705 }, { "epoch": 0.2877871825876663, "grad_norm": 2.4074389934539795, "learning_rate": 3.6095601252893914e-05, "loss": 4.3915, "step": 10710 }, { "epoch": 0.2879215370146446, "grad_norm": 2.2203352451324463, "learning_rate": 3.608879204684734e-05, "loss": 4.4411, "step": 10715 }, { "epoch": 0.288055891441623, "grad_norm": 2.2533769607543945, "learning_rate": 3.6081982840800765e-05, "loss": 4.4036, "step": 10720 }, { "epoch": 0.28819024586860137, "grad_norm": 2.5158045291900635, "learning_rate": 3.607517363475419e-05, "loss": 4.3132, "step": 10725 }, { "epoch": 0.2883246002955797, "grad_norm": 2.354360818862915, "learning_rate": 3.606836442870761e-05, "loss": 4.4459, "step": 10730 }, { "epoch": 0.2884589547225581, "grad_norm": 2.3289177417755127, "learning_rate": 3.606155522266104e-05, "loss": 4.4517, "step": 10735 }, { "epoch": 0.28859330914953646, "grad_norm": 2.3871381282806396, "learning_rate": 3.605474601661447e-05, "loss": 4.3293, "step": 10740 }, { "epoch": 0.28872766357651486, "grad_norm": 2.4907991886138916, "learning_rate": 3.604793681056789e-05, "loss": 4.4504, "step": 10745 }, { "epoch": 0.2888620180034932, "grad_norm": 2.368387222290039, "learning_rate": 3.604112760452131e-05, "loss": 4.4293, "step": 10750 }, { "epoch": 0.2889963724304716, "grad_norm": 2.35514497756958, "learning_rate": 3.603431839847474e-05, "loss": 4.4665, "step": 10755 }, { "epoch": 0.28913072685744995, "grad_norm": 2.514583110809326, "learning_rate": 3.6027509192428164e-05, "loss": 4.2985, "step": 10760 }, { "epoch": 0.28926508128442835, "grad_norm": 2.2475037574768066, "learning_rate": 3.602069998638159e-05, "loss": 4.4275, "step": 10765 }, { "epoch": 0.2893994357114067, "grad_norm": 2.2607076168060303, "learning_rate": 3.6013890780335015e-05, "loss": 4.4705, "step": 10770 }, { "epoch": 0.28953379013838504, "grad_norm": 2.3649258613586426, "learning_rate": 3.600708157428844e-05, "loss": 4.4056, "step": 10775 }, { "epoch": 0.28966814456536344, "grad_norm": 2.1760125160217285, "learning_rate": 3.6000272368241866e-05, "loss": 4.4091, "step": 10780 }, { "epoch": 0.2898024989923418, "grad_norm": 2.48974347114563, "learning_rate": 3.599346316219529e-05, "loss": 4.4789, "step": 10785 }, { "epoch": 0.2899368534193202, "grad_norm": 2.404629945755005, "learning_rate": 3.598665395614871e-05, "loss": 4.4131, "step": 10790 }, { "epoch": 0.29007120784629853, "grad_norm": 2.3304474353790283, "learning_rate": 3.597984475010214e-05, "loss": 4.4051, "step": 10795 }, { "epoch": 0.29020556227327693, "grad_norm": 2.4542365074157715, "learning_rate": 3.597303554405557e-05, "loss": 4.3341, "step": 10800 }, { "epoch": 0.2903399167002553, "grad_norm": 2.328561782836914, "learning_rate": 3.596622633800899e-05, "loss": 4.3188, "step": 10805 }, { "epoch": 0.2904742711272336, "grad_norm": 2.4523963928222656, "learning_rate": 3.5959417131962414e-05, "loss": 4.3761, "step": 10810 }, { "epoch": 0.290608625554212, "grad_norm": 2.400569200515747, "learning_rate": 3.5952607925915836e-05, "loss": 4.5373, "step": 10815 }, { "epoch": 0.29074297998119036, "grad_norm": 2.5078623294830322, "learning_rate": 3.5945798719869265e-05, "loss": 4.4075, "step": 10820 }, { "epoch": 0.29087733440816876, "grad_norm": 2.3044204711914062, "learning_rate": 3.5938989513822694e-05, "loss": 4.367, "step": 10825 }, { "epoch": 0.2910116888351471, "grad_norm": 2.2870519161224365, "learning_rate": 3.5932180307776116e-05, "loss": 4.4056, "step": 10830 }, { "epoch": 0.2911460432621255, "grad_norm": 2.4397590160369873, "learning_rate": 3.592537110172954e-05, "loss": 4.3579, "step": 10835 }, { "epoch": 0.29128039768910385, "grad_norm": 2.2334470748901367, "learning_rate": 3.591856189568297e-05, "loss": 4.4265, "step": 10840 }, { "epoch": 0.2914147521160822, "grad_norm": 2.2962162494659424, "learning_rate": 3.591175268963639e-05, "loss": 4.3412, "step": 10845 }, { "epoch": 0.2915491065430606, "grad_norm": 2.281691551208496, "learning_rate": 3.590494348358981e-05, "loss": 4.4389, "step": 10850 }, { "epoch": 0.29168346097003894, "grad_norm": 2.365532398223877, "learning_rate": 3.589813427754324e-05, "loss": 4.4951, "step": 10855 }, { "epoch": 0.29181781539701734, "grad_norm": 2.326634168624878, "learning_rate": 3.589132507149667e-05, "loss": 4.3434, "step": 10860 }, { "epoch": 0.2919521698239957, "grad_norm": 2.6413421630859375, "learning_rate": 3.588451586545009e-05, "loss": 4.3872, "step": 10865 }, { "epoch": 0.2920865242509741, "grad_norm": 2.3470871448516846, "learning_rate": 3.5877706659403515e-05, "loss": 4.3722, "step": 10870 }, { "epoch": 0.29222087867795243, "grad_norm": 2.4437406063079834, "learning_rate": 3.587089745335694e-05, "loss": 4.4265, "step": 10875 }, { "epoch": 0.29235523310493083, "grad_norm": 2.169276475906372, "learning_rate": 3.5864088247310366e-05, "loss": 4.3829, "step": 10880 }, { "epoch": 0.2924895875319092, "grad_norm": 2.2414743900299072, "learning_rate": 3.585727904126379e-05, "loss": 4.3935, "step": 10885 }, { "epoch": 0.2926239419588875, "grad_norm": 2.283107042312622, "learning_rate": 3.585046983521722e-05, "loss": 4.3185, "step": 10890 }, { "epoch": 0.2927582963858659, "grad_norm": 2.3491806983947754, "learning_rate": 3.584366062917064e-05, "loss": 4.4095, "step": 10895 }, { "epoch": 0.29289265081284427, "grad_norm": 2.252155065536499, "learning_rate": 3.583685142312407e-05, "loss": 4.456, "step": 10900 }, { "epoch": 0.29302700523982267, "grad_norm": 2.1191248893737793, "learning_rate": 3.583004221707749e-05, "loss": 4.4297, "step": 10905 }, { "epoch": 0.293161359666801, "grad_norm": 2.2293434143066406, "learning_rate": 3.582323301103091e-05, "loss": 4.4096, "step": 10910 }, { "epoch": 0.2932957140937794, "grad_norm": 2.3526315689086914, "learning_rate": 3.581642380498434e-05, "loss": 4.3077, "step": 10915 }, { "epoch": 0.29343006852075776, "grad_norm": 2.2286932468414307, "learning_rate": 3.5809614598937765e-05, "loss": 4.3041, "step": 10920 }, { "epoch": 0.2935644229477361, "grad_norm": 2.38466739654541, "learning_rate": 3.5802805392891194e-05, "loss": 4.4351, "step": 10925 }, { "epoch": 0.2936987773747145, "grad_norm": 2.3098201751708984, "learning_rate": 3.5795996186844616e-05, "loss": 4.4392, "step": 10930 }, { "epoch": 0.29383313180169285, "grad_norm": 2.259516954421997, "learning_rate": 3.578918698079804e-05, "loss": 4.4694, "step": 10935 }, { "epoch": 0.29396748622867125, "grad_norm": 2.221287965774536, "learning_rate": 3.578237777475146e-05, "loss": 4.2827, "step": 10940 }, { "epoch": 0.2941018406556496, "grad_norm": 2.5505993366241455, "learning_rate": 3.577556856870489e-05, "loss": 4.4528, "step": 10945 }, { "epoch": 0.294236195082628, "grad_norm": 2.2073144912719727, "learning_rate": 3.576875936265832e-05, "loss": 4.4195, "step": 10950 }, { "epoch": 0.29437054950960634, "grad_norm": 2.5149948596954346, "learning_rate": 3.576195015661174e-05, "loss": 4.4233, "step": 10955 }, { "epoch": 0.2945049039365847, "grad_norm": 2.5190043449401855, "learning_rate": 3.575514095056516e-05, "loss": 4.3336, "step": 10960 }, { "epoch": 0.2946392583635631, "grad_norm": 2.2740297317504883, "learning_rate": 3.574833174451859e-05, "loss": 4.434, "step": 10965 }, { "epoch": 0.29477361279054143, "grad_norm": 2.2321646213531494, "learning_rate": 3.5741522538472014e-05, "loss": 4.5357, "step": 10970 }, { "epoch": 0.29490796721751983, "grad_norm": 2.4944801330566406, "learning_rate": 3.573471333242544e-05, "loss": 4.4857, "step": 10975 }, { "epoch": 0.2950423216444982, "grad_norm": 2.4806230068206787, "learning_rate": 3.5727904126378866e-05, "loss": 4.422, "step": 10980 }, { "epoch": 0.2951766760714766, "grad_norm": 2.392289400100708, "learning_rate": 3.5721094920332295e-05, "loss": 4.3546, "step": 10985 }, { "epoch": 0.2953110304984549, "grad_norm": 2.4640696048736572, "learning_rate": 3.571428571428572e-05, "loss": 4.5044, "step": 10990 }, { "epoch": 0.2954453849254333, "grad_norm": 2.2156856060028076, "learning_rate": 3.570747650823914e-05, "loss": 4.4228, "step": 10995 }, { "epoch": 0.29557973935241166, "grad_norm": 2.3098814487457275, "learning_rate": 3.570066730219256e-05, "loss": 4.3766, "step": 11000 }, { "epoch": 0.29571409377939, "grad_norm": 2.0859768390655518, "learning_rate": 3.569385809614599e-05, "loss": 4.4271, "step": 11005 }, { "epoch": 0.2958484482063684, "grad_norm": 2.573568820953369, "learning_rate": 3.568704889009942e-05, "loss": 4.43, "step": 11010 }, { "epoch": 0.29598280263334675, "grad_norm": 2.2669425010681152, "learning_rate": 3.568023968405284e-05, "loss": 4.2922, "step": 11015 }, { "epoch": 0.29611715706032515, "grad_norm": 2.443179130554199, "learning_rate": 3.5673430478006264e-05, "loss": 4.3523, "step": 11020 }, { "epoch": 0.2962515114873035, "grad_norm": 2.3064255714416504, "learning_rate": 3.566662127195969e-05, "loss": 4.4146, "step": 11025 }, { "epoch": 0.2963858659142819, "grad_norm": 2.366471767425537, "learning_rate": 3.5659812065913115e-05, "loss": 4.4128, "step": 11030 }, { "epoch": 0.29652022034126024, "grad_norm": 2.3048744201660156, "learning_rate": 3.565300285986654e-05, "loss": 4.4668, "step": 11035 }, { "epoch": 0.2966545747682386, "grad_norm": 2.431608200073242, "learning_rate": 3.564619365381997e-05, "loss": 4.3948, "step": 11040 }, { "epoch": 0.296788929195217, "grad_norm": 2.1769654750823975, "learning_rate": 3.5639384447773396e-05, "loss": 4.4429, "step": 11045 }, { "epoch": 0.29692328362219533, "grad_norm": 2.2304766178131104, "learning_rate": 3.563257524172682e-05, "loss": 4.4558, "step": 11050 }, { "epoch": 0.29705763804917373, "grad_norm": 2.6495771408081055, "learning_rate": 3.562576603568024e-05, "loss": 4.3541, "step": 11055 }, { "epoch": 0.2971919924761521, "grad_norm": 2.193000078201294, "learning_rate": 3.561895682963366e-05, "loss": 4.398, "step": 11060 }, { "epoch": 0.2973263469031305, "grad_norm": 2.5126960277557373, "learning_rate": 3.561214762358709e-05, "loss": 4.3713, "step": 11065 }, { "epoch": 0.2974607013301088, "grad_norm": 2.3089957237243652, "learning_rate": 3.560533841754052e-05, "loss": 4.3482, "step": 11070 }, { "epoch": 0.2975950557570872, "grad_norm": 2.3385934829711914, "learning_rate": 3.559852921149394e-05, "loss": 4.34, "step": 11075 }, { "epoch": 0.29772941018406557, "grad_norm": 2.3425333499908447, "learning_rate": 3.5591720005447365e-05, "loss": 4.3819, "step": 11080 }, { "epoch": 0.2978637646110439, "grad_norm": 2.419217824935913, "learning_rate": 3.558491079940079e-05, "loss": 4.3309, "step": 11085 }, { "epoch": 0.2979981190380223, "grad_norm": 2.4940073490142822, "learning_rate": 3.5578101593354217e-05, "loss": 4.4694, "step": 11090 }, { "epoch": 0.29813247346500066, "grad_norm": 2.1517605781555176, "learning_rate": 3.557129238730764e-05, "loss": 4.4293, "step": 11095 }, { "epoch": 0.29826682789197906, "grad_norm": 2.553548574447632, "learning_rate": 3.556448318126107e-05, "loss": 4.3153, "step": 11100 }, { "epoch": 0.2984011823189574, "grad_norm": 2.276975154876709, "learning_rate": 3.555767397521449e-05, "loss": 4.2643, "step": 11105 }, { "epoch": 0.2985355367459358, "grad_norm": 2.2542800903320312, "learning_rate": 3.555086476916792e-05, "loss": 4.5045, "step": 11110 }, { "epoch": 0.29866989117291415, "grad_norm": 2.1629130840301514, "learning_rate": 3.554405556312134e-05, "loss": 4.3954, "step": 11115 }, { "epoch": 0.2988042455998925, "grad_norm": 2.6322877407073975, "learning_rate": 3.5537246357074764e-05, "loss": 4.2769, "step": 11120 }, { "epoch": 0.2989386000268709, "grad_norm": 2.171420097351074, "learning_rate": 3.553043715102819e-05, "loss": 4.4171, "step": 11125 }, { "epoch": 0.29907295445384924, "grad_norm": 2.386714458465576, "learning_rate": 3.552362794498162e-05, "loss": 4.469, "step": 11130 }, { "epoch": 0.29920730888082764, "grad_norm": 2.2246439456939697, "learning_rate": 3.5516818738935044e-05, "loss": 4.3756, "step": 11135 }, { "epoch": 0.299341663307806, "grad_norm": 2.513181447982788, "learning_rate": 3.5510009532888466e-05, "loss": 4.4257, "step": 11140 }, { "epoch": 0.2994760177347844, "grad_norm": 2.3215301036834717, "learning_rate": 3.550320032684189e-05, "loss": 4.4151, "step": 11145 }, { "epoch": 0.29961037216176273, "grad_norm": 2.5489184856414795, "learning_rate": 3.549639112079532e-05, "loss": 4.4168, "step": 11150 }, { "epoch": 0.2997447265887411, "grad_norm": 2.383864164352417, "learning_rate": 3.548958191474874e-05, "loss": 4.3205, "step": 11155 }, { "epoch": 0.2998790810157195, "grad_norm": 2.7077414989471436, "learning_rate": 3.548277270870217e-05, "loss": 4.3645, "step": 11160 }, { "epoch": 0.3000134354426978, "grad_norm": 2.3905186653137207, "learning_rate": 3.547596350265559e-05, "loss": 4.5806, "step": 11165 }, { "epoch": 0.3001477898696762, "grad_norm": 2.443939208984375, "learning_rate": 3.546915429660902e-05, "loss": 4.3603, "step": 11170 }, { "epoch": 0.30028214429665456, "grad_norm": 2.447462558746338, "learning_rate": 3.546234509056244e-05, "loss": 4.2862, "step": 11175 }, { "epoch": 0.30041649872363296, "grad_norm": 2.6068060398101807, "learning_rate": 3.5455535884515865e-05, "loss": 4.3966, "step": 11180 }, { "epoch": 0.3005508531506113, "grad_norm": 2.393958568572998, "learning_rate": 3.5448726678469294e-05, "loss": 4.3843, "step": 11185 }, { "epoch": 0.3006852075775897, "grad_norm": 2.32204008102417, "learning_rate": 3.544191747242272e-05, "loss": 4.3656, "step": 11190 }, { "epoch": 0.30081956200456805, "grad_norm": 2.2890074253082275, "learning_rate": 3.5435108266376145e-05, "loss": 4.2652, "step": 11195 }, { "epoch": 0.3009539164315464, "grad_norm": 2.2149224281311035, "learning_rate": 3.542829906032957e-05, "loss": 4.4849, "step": 11200 }, { "epoch": 0.3010882708585248, "grad_norm": 2.5467464923858643, "learning_rate": 3.542148985428299e-05, "loss": 4.508, "step": 11205 }, { "epoch": 0.30122262528550314, "grad_norm": 2.1042325496673584, "learning_rate": 3.541468064823641e-05, "loss": 4.3913, "step": 11210 }, { "epoch": 0.30135697971248154, "grad_norm": 2.135972738265991, "learning_rate": 3.540787144218984e-05, "loss": 4.5184, "step": 11215 }, { "epoch": 0.3014913341394599, "grad_norm": 2.2988927364349365, "learning_rate": 3.540106223614327e-05, "loss": 4.4383, "step": 11220 }, { "epoch": 0.3016256885664383, "grad_norm": 2.4023237228393555, "learning_rate": 3.539425303009669e-05, "loss": 4.4747, "step": 11225 }, { "epoch": 0.30176004299341663, "grad_norm": 2.3424360752105713, "learning_rate": 3.5387443824050115e-05, "loss": 4.326, "step": 11230 }, { "epoch": 0.301894397420395, "grad_norm": 2.3838813304901123, "learning_rate": 3.5380634618003544e-05, "loss": 4.4709, "step": 11235 }, { "epoch": 0.3020287518473734, "grad_norm": 2.0433359146118164, "learning_rate": 3.5373825411956966e-05, "loss": 4.2943, "step": 11240 }, { "epoch": 0.3021631062743517, "grad_norm": 2.2191832065582275, "learning_rate": 3.536701620591039e-05, "loss": 4.4336, "step": 11245 }, { "epoch": 0.3022974607013301, "grad_norm": 2.4385364055633545, "learning_rate": 3.536020699986382e-05, "loss": 4.3519, "step": 11250 }, { "epoch": 0.30243181512830847, "grad_norm": 2.5782687664031982, "learning_rate": 3.5353397793817246e-05, "loss": 4.4066, "step": 11255 }, { "epoch": 0.30256616955528687, "grad_norm": 2.1897218227386475, "learning_rate": 3.534658858777067e-05, "loss": 4.4081, "step": 11260 }, { "epoch": 0.3027005239822652, "grad_norm": 2.364387273788452, "learning_rate": 3.533977938172409e-05, "loss": 4.4163, "step": 11265 }, { "epoch": 0.30283487840924356, "grad_norm": 2.163578748703003, "learning_rate": 3.533297017567751e-05, "loss": 4.4617, "step": 11270 }, { "epoch": 0.30296923283622196, "grad_norm": 2.3879506587982178, "learning_rate": 3.532616096963094e-05, "loss": 4.4299, "step": 11275 }, { "epoch": 0.3031035872632003, "grad_norm": 2.589524269104004, "learning_rate": 3.531935176358437e-05, "loss": 4.3473, "step": 11280 }, { "epoch": 0.3032379416901787, "grad_norm": 2.3256139755249023, "learning_rate": 3.5312542557537793e-05, "loss": 4.4461, "step": 11285 }, { "epoch": 0.30337229611715705, "grad_norm": 2.5100226402282715, "learning_rate": 3.5305733351491216e-05, "loss": 4.4542, "step": 11290 }, { "epoch": 0.30350665054413545, "grad_norm": 2.4867424964904785, "learning_rate": 3.5298924145444645e-05, "loss": 4.479, "step": 11295 }, { "epoch": 0.3036410049711138, "grad_norm": 2.80302095413208, "learning_rate": 3.529211493939807e-05, "loss": 4.4085, "step": 11300 }, { "epoch": 0.3037753593980922, "grad_norm": 2.315727949142456, "learning_rate": 3.528530573335149e-05, "loss": 4.4746, "step": 11305 }, { "epoch": 0.30390971382507054, "grad_norm": 2.3622002601623535, "learning_rate": 3.527849652730492e-05, "loss": 4.3331, "step": 11310 }, { "epoch": 0.3040440682520489, "grad_norm": 2.4788386821746826, "learning_rate": 3.527168732125835e-05, "loss": 4.3672, "step": 11315 }, { "epoch": 0.3041784226790273, "grad_norm": 2.3090922832489014, "learning_rate": 3.526487811521177e-05, "loss": 4.391, "step": 11320 }, { "epoch": 0.3043127771060056, "grad_norm": 2.557281255722046, "learning_rate": 3.525806890916519e-05, "loss": 4.4872, "step": 11325 }, { "epoch": 0.304447131532984, "grad_norm": 2.4267678260803223, "learning_rate": 3.5251259703118614e-05, "loss": 4.3654, "step": 11330 }, { "epoch": 0.3045814859599624, "grad_norm": 2.4860310554504395, "learning_rate": 3.524445049707204e-05, "loss": 4.3791, "step": 11335 }, { "epoch": 0.3047158403869408, "grad_norm": 2.445606231689453, "learning_rate": 3.523764129102547e-05, "loss": 4.3557, "step": 11340 }, { "epoch": 0.3048501948139191, "grad_norm": 2.7110044956207275, "learning_rate": 3.5230832084978895e-05, "loss": 4.3676, "step": 11345 }, { "epoch": 0.30498454924089746, "grad_norm": 2.3757598400115967, "learning_rate": 3.522402287893232e-05, "loss": 4.2883, "step": 11350 }, { "epoch": 0.30511890366787586, "grad_norm": 2.3647594451904297, "learning_rate": 3.521721367288574e-05, "loss": 4.2846, "step": 11355 }, { "epoch": 0.3052532580948542, "grad_norm": 2.4244589805603027, "learning_rate": 3.521040446683917e-05, "loss": 4.5249, "step": 11360 }, { "epoch": 0.3053876125218326, "grad_norm": 2.1759564876556396, "learning_rate": 3.520359526079259e-05, "loss": 4.3643, "step": 11365 }, { "epoch": 0.30552196694881095, "grad_norm": 2.3249053955078125, "learning_rate": 3.519678605474602e-05, "loss": 4.3989, "step": 11370 }, { "epoch": 0.30565632137578935, "grad_norm": 2.5302419662475586, "learning_rate": 3.518997684869944e-05, "loss": 4.3542, "step": 11375 }, { "epoch": 0.3057906758027677, "grad_norm": 2.640538215637207, "learning_rate": 3.518316764265287e-05, "loss": 4.4415, "step": 11380 }, { "epoch": 0.3059250302297461, "grad_norm": 2.354644536972046, "learning_rate": 3.517635843660629e-05, "loss": 4.3588, "step": 11385 }, { "epoch": 0.30605938465672444, "grad_norm": 2.2276113033294678, "learning_rate": 3.5169549230559715e-05, "loss": 4.3923, "step": 11390 }, { "epoch": 0.3061937390837028, "grad_norm": 2.4160144329071045, "learning_rate": 3.5162740024513144e-05, "loss": 4.3419, "step": 11395 }, { "epoch": 0.3063280935106812, "grad_norm": 2.4271352291107178, "learning_rate": 3.5155930818466573e-05, "loss": 4.4491, "step": 11400 }, { "epoch": 0.30646244793765953, "grad_norm": 2.392716646194458, "learning_rate": 3.5149121612419996e-05, "loss": 4.4226, "step": 11405 }, { "epoch": 0.30659680236463793, "grad_norm": 2.318420886993408, "learning_rate": 3.514231240637342e-05, "loss": 4.4825, "step": 11410 }, { "epoch": 0.3067311567916163, "grad_norm": 2.277195930480957, "learning_rate": 3.513550320032684e-05, "loss": 4.429, "step": 11415 }, { "epoch": 0.3068655112185947, "grad_norm": 2.4826502799987793, "learning_rate": 3.512869399428027e-05, "loss": 4.3727, "step": 11420 }, { "epoch": 0.306999865645573, "grad_norm": 2.258009672164917, "learning_rate": 3.512188478823369e-05, "loss": 4.3508, "step": 11425 }, { "epoch": 0.30713422007255137, "grad_norm": 2.3815462589263916, "learning_rate": 3.511507558218712e-05, "loss": 4.4836, "step": 11430 }, { "epoch": 0.30726857449952977, "grad_norm": 2.2452762126922607, "learning_rate": 3.510826637614054e-05, "loss": 4.472, "step": 11435 }, { "epoch": 0.3074029289265081, "grad_norm": 2.2901456356048584, "learning_rate": 3.510145717009397e-05, "loss": 4.3503, "step": 11440 }, { "epoch": 0.3075372833534865, "grad_norm": 2.1576340198516846, "learning_rate": 3.5094647964047394e-05, "loss": 4.2907, "step": 11445 }, { "epoch": 0.30767163778046486, "grad_norm": 2.279529571533203, "learning_rate": 3.5087838758000816e-05, "loss": 4.341, "step": 11450 }, { "epoch": 0.30780599220744326, "grad_norm": 2.130805492401123, "learning_rate": 3.508102955195424e-05, "loss": 4.35, "step": 11455 }, { "epoch": 0.3079403466344216, "grad_norm": 2.3189432621002197, "learning_rate": 3.5074220345907675e-05, "loss": 4.3288, "step": 11460 }, { "epoch": 0.30807470106139995, "grad_norm": 2.7252275943756104, "learning_rate": 3.50674111398611e-05, "loss": 4.4134, "step": 11465 }, { "epoch": 0.30820905548837835, "grad_norm": 2.7170603275299072, "learning_rate": 3.506060193381452e-05, "loss": 4.3571, "step": 11470 }, { "epoch": 0.3083434099153567, "grad_norm": 2.2511637210845947, "learning_rate": 3.505379272776794e-05, "loss": 4.399, "step": 11475 }, { "epoch": 0.3084777643423351, "grad_norm": 2.2870779037475586, "learning_rate": 3.504698352172137e-05, "loss": 4.2764, "step": 11480 }, { "epoch": 0.30861211876931344, "grad_norm": 2.5355606079101562, "learning_rate": 3.504017431567479e-05, "loss": 4.3383, "step": 11485 }, { "epoch": 0.30874647319629184, "grad_norm": 2.5333197116851807, "learning_rate": 3.503336510962822e-05, "loss": 4.3589, "step": 11490 }, { "epoch": 0.3088808276232702, "grad_norm": 2.444096565246582, "learning_rate": 3.5026555903581644e-05, "loss": 4.4517, "step": 11495 }, { "epoch": 0.3090151820502486, "grad_norm": 2.6205103397369385, "learning_rate": 3.5019746697535066e-05, "loss": 4.2829, "step": 11500 }, { "epoch": 0.3091495364772269, "grad_norm": 2.2746925354003906, "learning_rate": 3.5012937491488495e-05, "loss": 4.4552, "step": 11505 }, { "epoch": 0.30928389090420527, "grad_norm": 2.1340034008026123, "learning_rate": 3.500612828544192e-05, "loss": 4.3261, "step": 11510 }, { "epoch": 0.30941824533118367, "grad_norm": 2.2832868099212646, "learning_rate": 3.499931907939534e-05, "loss": 4.3634, "step": 11515 }, { "epoch": 0.309552599758162, "grad_norm": 2.423352003097534, "learning_rate": 3.499250987334877e-05, "loss": 4.4669, "step": 11520 }, { "epoch": 0.3096869541851404, "grad_norm": 2.549466133117676, "learning_rate": 3.49857006673022e-05, "loss": 4.4056, "step": 11525 }, { "epoch": 0.30982130861211876, "grad_norm": 2.4861085414886475, "learning_rate": 3.497889146125562e-05, "loss": 4.3539, "step": 11530 }, { "epoch": 0.30995566303909716, "grad_norm": 2.405954360961914, "learning_rate": 3.497208225520904e-05, "loss": 4.4271, "step": 11535 }, { "epoch": 0.3100900174660755, "grad_norm": 2.41330623626709, "learning_rate": 3.4965273049162465e-05, "loss": 4.336, "step": 11540 }, { "epoch": 0.31022437189305385, "grad_norm": 2.473904848098755, "learning_rate": 3.4958463843115894e-05, "loss": 4.322, "step": 11545 }, { "epoch": 0.31035872632003225, "grad_norm": 2.2048988342285156, "learning_rate": 3.495165463706932e-05, "loss": 4.3809, "step": 11550 }, { "epoch": 0.3104930807470106, "grad_norm": 2.625628709793091, "learning_rate": 3.4944845431022745e-05, "loss": 4.3046, "step": 11555 }, { "epoch": 0.310627435173989, "grad_norm": 2.1766016483306885, "learning_rate": 3.493803622497617e-05, "loss": 4.4934, "step": 11560 }, { "epoch": 0.31076178960096734, "grad_norm": 2.361522674560547, "learning_rate": 3.4931227018929596e-05, "loss": 4.3739, "step": 11565 }, { "epoch": 0.31089614402794574, "grad_norm": 2.4544107913970947, "learning_rate": 3.492441781288302e-05, "loss": 4.2935, "step": 11570 }, { "epoch": 0.3110304984549241, "grad_norm": 2.2605831623077393, "learning_rate": 3.491760860683644e-05, "loss": 4.3944, "step": 11575 }, { "epoch": 0.31116485288190243, "grad_norm": 2.448289155960083, "learning_rate": 3.491079940078987e-05, "loss": 4.413, "step": 11580 }, { "epoch": 0.31129920730888083, "grad_norm": 2.324584484100342, "learning_rate": 3.49039901947433e-05, "loss": 4.3521, "step": 11585 }, { "epoch": 0.3114335617358592, "grad_norm": 2.1733994483947754, "learning_rate": 3.489718098869672e-05, "loss": 4.2927, "step": 11590 }, { "epoch": 0.3115679161628376, "grad_norm": 2.2224202156066895, "learning_rate": 3.4890371782650144e-05, "loss": 4.3877, "step": 11595 }, { "epoch": 0.3117022705898159, "grad_norm": 2.3119382858276367, "learning_rate": 3.4883562576603566e-05, "loss": 4.4716, "step": 11600 }, { "epoch": 0.3118366250167943, "grad_norm": 2.497523307800293, "learning_rate": 3.4876753370556995e-05, "loss": 4.3796, "step": 11605 }, { "epoch": 0.31197097944377267, "grad_norm": 2.5951461791992188, "learning_rate": 3.4869944164510424e-05, "loss": 4.4597, "step": 11610 }, { "epoch": 0.31210533387075107, "grad_norm": 2.310804605484009, "learning_rate": 3.4863134958463846e-05, "loss": 4.3686, "step": 11615 }, { "epoch": 0.3122396882977294, "grad_norm": 2.299835205078125, "learning_rate": 3.485632575241727e-05, "loss": 4.3243, "step": 11620 }, { "epoch": 0.31237404272470776, "grad_norm": 2.4097862243652344, "learning_rate": 3.48495165463707e-05, "loss": 4.2957, "step": 11625 }, { "epoch": 0.31250839715168616, "grad_norm": 2.3484880924224854, "learning_rate": 3.484270734032412e-05, "loss": 4.4187, "step": 11630 }, { "epoch": 0.3126427515786645, "grad_norm": 2.4458000659942627, "learning_rate": 3.483589813427754e-05, "loss": 4.4133, "step": 11635 }, { "epoch": 0.3127771060056429, "grad_norm": 2.198395252227783, "learning_rate": 3.482908892823097e-05, "loss": 4.3864, "step": 11640 }, { "epoch": 0.31291146043262125, "grad_norm": 2.3579471111297607, "learning_rate": 3.48222797221844e-05, "loss": 4.364, "step": 11645 }, { "epoch": 0.31304581485959965, "grad_norm": 2.3208608627319336, "learning_rate": 3.481547051613782e-05, "loss": 4.3395, "step": 11650 }, { "epoch": 0.313180169286578, "grad_norm": 2.4523866176605225, "learning_rate": 3.4808661310091245e-05, "loss": 4.3132, "step": 11655 }, { "epoch": 0.31331452371355634, "grad_norm": 2.200252056121826, "learning_rate": 3.480185210404467e-05, "loss": 4.4443, "step": 11660 }, { "epoch": 0.31344887814053474, "grad_norm": 2.851299524307251, "learning_rate": 3.479504289799809e-05, "loss": 4.3955, "step": 11665 }, { "epoch": 0.3135832325675131, "grad_norm": 2.353484630584717, "learning_rate": 3.4788233691951525e-05, "loss": 4.4971, "step": 11670 }, { "epoch": 0.3137175869944915, "grad_norm": 2.378593921661377, "learning_rate": 3.478142448590495e-05, "loss": 4.3491, "step": 11675 }, { "epoch": 0.3138519414214698, "grad_norm": 2.4765584468841553, "learning_rate": 3.477461527985837e-05, "loss": 4.2711, "step": 11680 }, { "epoch": 0.3139862958484482, "grad_norm": 2.45198655128479, "learning_rate": 3.476780607381179e-05, "loss": 4.5146, "step": 11685 }, { "epoch": 0.31412065027542657, "grad_norm": 2.3231348991394043, "learning_rate": 3.476099686776522e-05, "loss": 4.4728, "step": 11690 }, { "epoch": 0.31425500470240497, "grad_norm": 2.1213226318359375, "learning_rate": 3.475418766171864e-05, "loss": 4.3763, "step": 11695 }, { "epoch": 0.3143893591293833, "grad_norm": 2.402273416519165, "learning_rate": 3.474737845567207e-05, "loss": 4.333, "step": 11700 }, { "epoch": 0.31452371355636166, "grad_norm": 2.523179531097412, "learning_rate": 3.4740569249625494e-05, "loss": 4.4392, "step": 11705 }, { "epoch": 0.31465806798334006, "grad_norm": 2.1239614486694336, "learning_rate": 3.4733760043578924e-05, "loss": 4.2906, "step": 11710 }, { "epoch": 0.3147924224103184, "grad_norm": 2.474504232406616, "learning_rate": 3.4726950837532346e-05, "loss": 4.3859, "step": 11715 }, { "epoch": 0.3149267768372968, "grad_norm": 2.3247148990631104, "learning_rate": 3.472014163148577e-05, "loss": 4.3754, "step": 11720 }, { "epoch": 0.31506113126427515, "grad_norm": 2.348541021347046, "learning_rate": 3.471333242543919e-05, "loss": 4.4282, "step": 11725 }, { "epoch": 0.31519548569125355, "grad_norm": 2.3271644115448, "learning_rate": 3.470652321939262e-05, "loss": 4.3513, "step": 11730 }, { "epoch": 0.3153298401182319, "grad_norm": 2.366567850112915, "learning_rate": 3.469971401334605e-05, "loss": 4.3926, "step": 11735 }, { "epoch": 0.31546419454521024, "grad_norm": 2.4325003623962402, "learning_rate": 3.469290480729947e-05, "loss": 4.2803, "step": 11740 }, { "epoch": 0.31559854897218864, "grad_norm": 2.245438814163208, "learning_rate": 3.468609560125289e-05, "loss": 4.3194, "step": 11745 }, { "epoch": 0.315732903399167, "grad_norm": 2.609093427658081, "learning_rate": 3.467928639520632e-05, "loss": 4.4137, "step": 11750 }, { "epoch": 0.3158672578261454, "grad_norm": 2.327552556991577, "learning_rate": 3.4672477189159744e-05, "loss": 4.2121, "step": 11755 }, { "epoch": 0.31600161225312373, "grad_norm": 2.444973945617676, "learning_rate": 3.466566798311317e-05, "loss": 4.5011, "step": 11760 }, { "epoch": 0.31613596668010213, "grad_norm": 2.243318796157837, "learning_rate": 3.4658858777066596e-05, "loss": 4.5054, "step": 11765 }, { "epoch": 0.3162703211070805, "grad_norm": 2.444439649581909, "learning_rate": 3.4652049571020025e-05, "loss": 4.429, "step": 11770 }, { "epoch": 0.3164046755340588, "grad_norm": 2.259615182876587, "learning_rate": 3.464524036497345e-05, "loss": 4.3728, "step": 11775 }, { "epoch": 0.3165390299610372, "grad_norm": 2.425847053527832, "learning_rate": 3.463843115892687e-05, "loss": 4.4365, "step": 11780 }, { "epoch": 0.31667338438801557, "grad_norm": 2.1365108489990234, "learning_rate": 3.463162195288029e-05, "loss": 4.2796, "step": 11785 }, { "epoch": 0.31680773881499397, "grad_norm": 2.366035223007202, "learning_rate": 3.462481274683372e-05, "loss": 4.3081, "step": 11790 }, { "epoch": 0.3169420932419723, "grad_norm": 2.532768964767456, "learning_rate": 3.461800354078715e-05, "loss": 4.4481, "step": 11795 }, { "epoch": 0.3170764476689507, "grad_norm": 2.1273610591888428, "learning_rate": 3.461119433474057e-05, "loss": 4.2419, "step": 11800 }, { "epoch": 0.31721080209592906, "grad_norm": 2.3638885021209717, "learning_rate": 3.4604385128693994e-05, "loss": 4.3347, "step": 11805 }, { "epoch": 0.31734515652290746, "grad_norm": 2.5616016387939453, "learning_rate": 3.4597575922647416e-05, "loss": 4.3735, "step": 11810 }, { "epoch": 0.3174795109498858, "grad_norm": 2.497040033340454, "learning_rate": 3.4590766716600845e-05, "loss": 4.3, "step": 11815 }, { "epoch": 0.31761386537686415, "grad_norm": 2.1773123741149902, "learning_rate": 3.4583957510554274e-05, "loss": 4.3572, "step": 11820 }, { "epoch": 0.31774821980384255, "grad_norm": 2.2486603260040283, "learning_rate": 3.45771483045077e-05, "loss": 4.3517, "step": 11825 }, { "epoch": 0.3178825742308209, "grad_norm": 2.387951374053955, "learning_rate": 3.457033909846112e-05, "loss": 4.3378, "step": 11830 }, { "epoch": 0.3180169286577993, "grad_norm": 2.4602630138397217, "learning_rate": 3.456352989241455e-05, "loss": 4.2555, "step": 11835 }, { "epoch": 0.31815128308477764, "grad_norm": 2.4527013301849365, "learning_rate": 3.455672068636797e-05, "loss": 4.3989, "step": 11840 }, { "epoch": 0.31828563751175604, "grad_norm": 2.3894920349121094, "learning_rate": 3.454991148032139e-05, "loss": 4.3874, "step": 11845 }, { "epoch": 0.3184199919387344, "grad_norm": 2.2721242904663086, "learning_rate": 3.454310227427482e-05, "loss": 4.3077, "step": 11850 }, { "epoch": 0.3185543463657127, "grad_norm": 2.3930463790893555, "learning_rate": 3.453629306822825e-05, "loss": 4.1812, "step": 11855 }, { "epoch": 0.3186887007926911, "grad_norm": 2.3637478351593018, "learning_rate": 3.452948386218167e-05, "loss": 4.4238, "step": 11860 }, { "epoch": 0.31882305521966947, "grad_norm": 2.376488208770752, "learning_rate": 3.4522674656135095e-05, "loss": 4.3669, "step": 11865 }, { "epoch": 0.31895740964664787, "grad_norm": 2.1426193714141846, "learning_rate": 3.451586545008852e-05, "loss": 4.2486, "step": 11870 }, { "epoch": 0.3190917640736262, "grad_norm": 2.336909055709839, "learning_rate": 3.4509056244041947e-05, "loss": 4.2129, "step": 11875 }, { "epoch": 0.3192261185006046, "grad_norm": 2.4798941612243652, "learning_rate": 3.450224703799537e-05, "loss": 4.2543, "step": 11880 }, { "epoch": 0.31936047292758296, "grad_norm": 2.344862699508667, "learning_rate": 3.44954378319488e-05, "loss": 4.3187, "step": 11885 }, { "epoch": 0.3194948273545613, "grad_norm": 2.3275115489959717, "learning_rate": 3.448862862590222e-05, "loss": 4.429, "step": 11890 }, { "epoch": 0.3196291817815397, "grad_norm": 2.7610864639282227, "learning_rate": 3.448181941985565e-05, "loss": 4.3432, "step": 11895 }, { "epoch": 0.31976353620851805, "grad_norm": 2.594911813735962, "learning_rate": 3.447501021380907e-05, "loss": 4.3449, "step": 11900 }, { "epoch": 0.31989789063549645, "grad_norm": 2.357860565185547, "learning_rate": 3.4468201007762494e-05, "loss": 4.3676, "step": 11905 }, { "epoch": 0.3200322450624748, "grad_norm": 2.3438656330108643, "learning_rate": 3.446139180171592e-05, "loss": 4.4071, "step": 11910 }, { "epoch": 0.3201665994894532, "grad_norm": 2.2888834476470947, "learning_rate": 3.445458259566935e-05, "loss": 4.3398, "step": 11915 }, { "epoch": 0.32030095391643154, "grad_norm": 2.390756368637085, "learning_rate": 3.4447773389622774e-05, "loss": 4.4164, "step": 11920 }, { "epoch": 0.32043530834340994, "grad_norm": 2.4769511222839355, "learning_rate": 3.4440964183576196e-05, "loss": 4.3921, "step": 11925 }, { "epoch": 0.3205696627703883, "grad_norm": 2.3073854446411133, "learning_rate": 3.443415497752962e-05, "loss": 4.4784, "step": 11930 }, { "epoch": 0.32070401719736663, "grad_norm": 2.395254135131836, "learning_rate": 3.442734577148305e-05, "loss": 4.3011, "step": 11935 }, { "epoch": 0.32083837162434503, "grad_norm": 2.1752660274505615, "learning_rate": 3.442053656543647e-05, "loss": 4.403, "step": 11940 }, { "epoch": 0.3209727260513234, "grad_norm": 2.0925235748291016, "learning_rate": 3.44137273593899e-05, "loss": 4.3205, "step": 11945 }, { "epoch": 0.3211070804783018, "grad_norm": 2.3411688804626465, "learning_rate": 3.440691815334332e-05, "loss": 4.2918, "step": 11950 }, { "epoch": 0.3212414349052801, "grad_norm": 2.327207326889038, "learning_rate": 3.4400108947296743e-05, "loss": 4.4743, "step": 11955 }, { "epoch": 0.3213757893322585, "grad_norm": 2.190216541290283, "learning_rate": 3.439329974125017e-05, "loss": 4.3268, "step": 11960 }, { "epoch": 0.32151014375923687, "grad_norm": 2.3104262351989746, "learning_rate": 3.4386490535203595e-05, "loss": 4.3758, "step": 11965 }, { "epoch": 0.3216444981862152, "grad_norm": 2.736016035079956, "learning_rate": 3.4379681329157024e-05, "loss": 4.3665, "step": 11970 }, { "epoch": 0.3217788526131936, "grad_norm": 2.324835777282715, "learning_rate": 3.4372872123110446e-05, "loss": 4.4447, "step": 11975 }, { "epoch": 0.32191320704017196, "grad_norm": 2.1983792781829834, "learning_rate": 3.4366062917063875e-05, "loss": 4.3595, "step": 11980 }, { "epoch": 0.32204756146715036, "grad_norm": 2.416890859603882, "learning_rate": 3.43592537110173e-05, "loss": 4.2341, "step": 11985 }, { "epoch": 0.3221819158941287, "grad_norm": 2.243213653564453, "learning_rate": 3.435244450497072e-05, "loss": 4.3102, "step": 11990 }, { "epoch": 0.3223162703211071, "grad_norm": 2.4709558486938477, "learning_rate": 3.434563529892414e-05, "loss": 4.3355, "step": 11995 }, { "epoch": 0.32245062474808545, "grad_norm": 2.502027750015259, "learning_rate": 3.433882609287757e-05, "loss": 4.3304, "step": 12000 }, { "epoch": 0.3225849791750638, "grad_norm": 2.3686423301696777, "learning_rate": 3.4332016886831e-05, "loss": 4.3382, "step": 12005 }, { "epoch": 0.3227193336020422, "grad_norm": 2.3989109992980957, "learning_rate": 3.432520768078442e-05, "loss": 4.4672, "step": 12010 }, { "epoch": 0.32285368802902054, "grad_norm": 2.2610034942626953, "learning_rate": 3.4318398474737845e-05, "loss": 4.4217, "step": 12015 }, { "epoch": 0.32298804245599894, "grad_norm": 2.316645860671997, "learning_rate": 3.4311589268691274e-05, "loss": 4.2564, "step": 12020 }, { "epoch": 0.3231223968829773, "grad_norm": 2.1691601276397705, "learning_rate": 3.4304780062644696e-05, "loss": 4.3506, "step": 12025 }, { "epoch": 0.3232567513099557, "grad_norm": 2.4404349327087402, "learning_rate": 3.4297970856598125e-05, "loss": 4.4703, "step": 12030 }, { "epoch": 0.323391105736934, "grad_norm": 2.2524938583374023, "learning_rate": 3.429116165055155e-05, "loss": 4.418, "step": 12035 }, { "epoch": 0.3235254601639124, "grad_norm": 2.566941976547241, "learning_rate": 3.4284352444504976e-05, "loss": 4.2888, "step": 12040 }, { "epoch": 0.32365981459089077, "grad_norm": 2.5094141960144043, "learning_rate": 3.42775432384584e-05, "loss": 4.2487, "step": 12045 }, { "epoch": 0.3237941690178691, "grad_norm": 2.543335199356079, "learning_rate": 3.427073403241182e-05, "loss": 4.4021, "step": 12050 }, { "epoch": 0.3239285234448475, "grad_norm": 2.3157460689544678, "learning_rate": 3.426392482636524e-05, "loss": 4.2615, "step": 12055 }, { "epoch": 0.32406287787182586, "grad_norm": 2.25266170501709, "learning_rate": 3.425711562031867e-05, "loss": 4.4013, "step": 12060 }, { "epoch": 0.32419723229880426, "grad_norm": 2.391850709915161, "learning_rate": 3.42503064142721e-05, "loss": 4.4549, "step": 12065 }, { "epoch": 0.3243315867257826, "grad_norm": 2.4575178623199463, "learning_rate": 3.4243497208225523e-05, "loss": 4.3986, "step": 12070 }, { "epoch": 0.324465941152761, "grad_norm": 2.5197598934173584, "learning_rate": 3.4236688002178946e-05, "loss": 4.1211, "step": 12075 }, { "epoch": 0.32460029557973935, "grad_norm": 2.5386927127838135, "learning_rate": 3.4229878796132375e-05, "loss": 4.5548, "step": 12080 }, { "epoch": 0.3247346500067177, "grad_norm": 2.444425582885742, "learning_rate": 3.42230695900858e-05, "loss": 4.3745, "step": 12085 }, { "epoch": 0.3248690044336961, "grad_norm": 2.4903130531311035, "learning_rate": 3.421626038403922e-05, "loss": 4.2791, "step": 12090 }, { "epoch": 0.32500335886067444, "grad_norm": 2.2152304649353027, "learning_rate": 3.420945117799265e-05, "loss": 4.3719, "step": 12095 }, { "epoch": 0.32513771328765284, "grad_norm": 2.3170926570892334, "learning_rate": 3.420264197194607e-05, "loss": 4.2828, "step": 12100 }, { "epoch": 0.3252720677146312, "grad_norm": 2.5880281925201416, "learning_rate": 3.41958327658995e-05, "loss": 4.3803, "step": 12105 }, { "epoch": 0.3254064221416096, "grad_norm": 2.205782175064087, "learning_rate": 3.418902355985292e-05, "loss": 4.3611, "step": 12110 }, { "epoch": 0.32554077656858793, "grad_norm": 2.1587607860565186, "learning_rate": 3.4182214353806344e-05, "loss": 4.4166, "step": 12115 }, { "epoch": 0.32567513099556633, "grad_norm": 2.41005539894104, "learning_rate": 3.417540514775977e-05, "loss": 4.4173, "step": 12120 }, { "epoch": 0.3258094854225447, "grad_norm": 2.3019816875457764, "learning_rate": 3.41685959417132e-05, "loss": 4.251, "step": 12125 }, { "epoch": 0.325943839849523, "grad_norm": 2.3289191722869873, "learning_rate": 3.4161786735666625e-05, "loss": 4.2742, "step": 12130 }, { "epoch": 0.3260781942765014, "grad_norm": 2.285503625869751, "learning_rate": 3.415497752962005e-05, "loss": 4.4542, "step": 12135 }, { "epoch": 0.32621254870347977, "grad_norm": 2.4369699954986572, "learning_rate": 3.414816832357347e-05, "loss": 4.4588, "step": 12140 }, { "epoch": 0.32634690313045817, "grad_norm": 2.3111672401428223, "learning_rate": 3.41413591175269e-05, "loss": 4.3306, "step": 12145 }, { "epoch": 0.3264812575574365, "grad_norm": 2.472026824951172, "learning_rate": 3.413454991148032e-05, "loss": 4.3568, "step": 12150 }, { "epoch": 0.3266156119844149, "grad_norm": 2.2053983211517334, "learning_rate": 3.412774070543375e-05, "loss": 4.4839, "step": 12155 }, { "epoch": 0.32674996641139326, "grad_norm": 2.419576406478882, "learning_rate": 3.412093149938717e-05, "loss": 4.3624, "step": 12160 }, { "epoch": 0.3268843208383716, "grad_norm": 2.6585793495178223, "learning_rate": 3.41141222933406e-05, "loss": 4.4714, "step": 12165 }, { "epoch": 0.32701867526535, "grad_norm": 2.3292903900146484, "learning_rate": 3.410731308729402e-05, "loss": 4.4811, "step": 12170 }, { "epoch": 0.32715302969232835, "grad_norm": 2.4216301441192627, "learning_rate": 3.4100503881247445e-05, "loss": 4.3285, "step": 12175 }, { "epoch": 0.32728738411930675, "grad_norm": 2.373793363571167, "learning_rate": 3.4093694675200874e-05, "loss": 4.3334, "step": 12180 }, { "epoch": 0.3274217385462851, "grad_norm": 2.598907709121704, "learning_rate": 3.40868854691543e-05, "loss": 4.4658, "step": 12185 }, { "epoch": 0.3275560929732635, "grad_norm": 2.5631680488586426, "learning_rate": 3.4080076263107726e-05, "loss": 4.3555, "step": 12190 }, { "epoch": 0.32769044740024184, "grad_norm": 2.4126274585723877, "learning_rate": 3.407326705706115e-05, "loss": 4.3303, "step": 12195 }, { "epoch": 0.3278248018272202, "grad_norm": 2.44565486907959, "learning_rate": 3.406645785101457e-05, "loss": 4.3847, "step": 12200 }, { "epoch": 0.3279591562541986, "grad_norm": 2.249321699142456, "learning_rate": 3.4059648644968e-05, "loss": 4.3886, "step": 12205 }, { "epoch": 0.3280935106811769, "grad_norm": 2.511990785598755, "learning_rate": 3.405283943892142e-05, "loss": 4.3684, "step": 12210 }, { "epoch": 0.3282278651081553, "grad_norm": 2.346261978149414, "learning_rate": 3.404603023287485e-05, "loss": 4.3515, "step": 12215 }, { "epoch": 0.32836221953513367, "grad_norm": 2.244824171066284, "learning_rate": 3.403922102682827e-05, "loss": 4.4045, "step": 12220 }, { "epoch": 0.32849657396211207, "grad_norm": 2.2844960689544678, "learning_rate": 3.40324118207817e-05, "loss": 4.2933, "step": 12225 }, { "epoch": 0.3286309283890904, "grad_norm": 2.235485553741455, "learning_rate": 3.4025602614735124e-05, "loss": 4.3726, "step": 12230 }, { "epoch": 0.3287652828160688, "grad_norm": 2.3331940174102783, "learning_rate": 3.4018793408688546e-05, "loss": 4.2503, "step": 12235 }, { "epoch": 0.32889963724304716, "grad_norm": 2.458235025405884, "learning_rate": 3.4011984202641975e-05, "loss": 4.5644, "step": 12240 }, { "epoch": 0.3290339916700255, "grad_norm": 2.502300262451172, "learning_rate": 3.40051749965954e-05, "loss": 4.4537, "step": 12245 }, { "epoch": 0.3291683460970039, "grad_norm": 2.342432737350464, "learning_rate": 3.399836579054883e-05, "loss": 4.2628, "step": 12250 }, { "epoch": 0.32930270052398225, "grad_norm": 2.379096746444702, "learning_rate": 3.399155658450225e-05, "loss": 4.3394, "step": 12255 }, { "epoch": 0.32943705495096065, "grad_norm": 2.4168946743011475, "learning_rate": 3.398474737845567e-05, "loss": 4.3453, "step": 12260 }, { "epoch": 0.329571409377939, "grad_norm": 2.3565354347229004, "learning_rate": 3.3977938172409094e-05, "loss": 4.351, "step": 12265 }, { "epoch": 0.3297057638049174, "grad_norm": 2.2729830741882324, "learning_rate": 3.397112896636252e-05, "loss": 4.2997, "step": 12270 }, { "epoch": 0.32984011823189574, "grad_norm": 2.5583789348602295, "learning_rate": 3.396431976031595e-05, "loss": 4.3637, "step": 12275 }, { "epoch": 0.3299744726588741, "grad_norm": 2.4167089462280273, "learning_rate": 3.3957510554269374e-05, "loss": 4.3675, "step": 12280 }, { "epoch": 0.3301088270858525, "grad_norm": 2.198096513748169, "learning_rate": 3.3950701348222796e-05, "loss": 4.2933, "step": 12285 }, { "epoch": 0.33024318151283083, "grad_norm": 2.355056047439575, "learning_rate": 3.3943892142176225e-05, "loss": 4.3765, "step": 12290 }, { "epoch": 0.33037753593980923, "grad_norm": 2.283850908279419, "learning_rate": 3.393708293612965e-05, "loss": 4.3287, "step": 12295 }, { "epoch": 0.3305118903667876, "grad_norm": 2.4181854724884033, "learning_rate": 3.393027373008307e-05, "loss": 4.5121, "step": 12300 }, { "epoch": 0.330646244793766, "grad_norm": 2.1839256286621094, "learning_rate": 3.39234645240365e-05, "loss": 4.2679, "step": 12305 }, { "epoch": 0.3307805992207443, "grad_norm": 2.5157525539398193, "learning_rate": 3.391665531798993e-05, "loss": 4.4237, "step": 12310 }, { "epoch": 0.33091495364772266, "grad_norm": 2.530059814453125, "learning_rate": 3.390984611194335e-05, "loss": 4.4203, "step": 12315 }, { "epoch": 0.33104930807470107, "grad_norm": 2.445878267288208, "learning_rate": 3.390303690589677e-05, "loss": 4.2602, "step": 12320 }, { "epoch": 0.3311836625016794, "grad_norm": 2.3912410736083984, "learning_rate": 3.3896227699850195e-05, "loss": 4.4294, "step": 12325 }, { "epoch": 0.3313180169286578, "grad_norm": 2.694054126739502, "learning_rate": 3.3889418493803624e-05, "loss": 4.3965, "step": 12330 }, { "epoch": 0.33145237135563616, "grad_norm": 2.2209055423736572, "learning_rate": 3.388260928775705e-05, "loss": 4.3231, "step": 12335 }, { "epoch": 0.33158672578261456, "grad_norm": 2.3513989448547363, "learning_rate": 3.3875800081710475e-05, "loss": 4.2444, "step": 12340 }, { "epoch": 0.3317210802095929, "grad_norm": 2.4620614051818848, "learning_rate": 3.38689908756639e-05, "loss": 4.4604, "step": 12345 }, { "epoch": 0.3318554346365713, "grad_norm": 2.3156909942626953, "learning_rate": 3.3862181669617326e-05, "loss": 4.3777, "step": 12350 }, { "epoch": 0.33198978906354965, "grad_norm": 2.2513797283172607, "learning_rate": 3.385537246357075e-05, "loss": 4.3868, "step": 12355 }, { "epoch": 0.332124143490528, "grad_norm": 2.136876106262207, "learning_rate": 3.384856325752417e-05, "loss": 4.3392, "step": 12360 }, { "epoch": 0.3322584979175064, "grad_norm": 2.2603936195373535, "learning_rate": 3.38417540514776e-05, "loss": 4.2622, "step": 12365 }, { "epoch": 0.33239285234448473, "grad_norm": 2.3585004806518555, "learning_rate": 3.383494484543103e-05, "loss": 4.4019, "step": 12370 }, { "epoch": 0.33252720677146314, "grad_norm": 2.2861709594726562, "learning_rate": 3.382813563938445e-05, "loss": 4.341, "step": 12375 }, { "epoch": 0.3326615611984415, "grad_norm": 2.525719165802002, "learning_rate": 3.3821326433337874e-05, "loss": 4.3403, "step": 12380 }, { "epoch": 0.3327959156254199, "grad_norm": 2.331448793411255, "learning_rate": 3.3814517227291296e-05, "loss": 4.3904, "step": 12385 }, { "epoch": 0.3329302700523982, "grad_norm": 2.2829172611236572, "learning_rate": 3.3807708021244725e-05, "loss": 4.3851, "step": 12390 }, { "epoch": 0.33306462447937657, "grad_norm": 2.4504599571228027, "learning_rate": 3.3800898815198154e-05, "loss": 4.4037, "step": 12395 }, { "epoch": 0.33319897890635497, "grad_norm": 2.14096999168396, "learning_rate": 3.3794089609151576e-05, "loss": 4.2153, "step": 12400 }, { "epoch": 0.3333333333333333, "grad_norm": 2.502445697784424, "learning_rate": 3.3787280403105e-05, "loss": 4.3228, "step": 12405 }, { "epoch": 0.3334676877603117, "grad_norm": 2.3934991359710693, "learning_rate": 3.378047119705842e-05, "loss": 4.3316, "step": 12410 }, { "epoch": 0.33360204218729006, "grad_norm": 2.35772705078125, "learning_rate": 3.377366199101185e-05, "loss": 4.3413, "step": 12415 }, { "epoch": 0.33373639661426846, "grad_norm": 2.1789119243621826, "learning_rate": 3.376685278496527e-05, "loss": 4.4542, "step": 12420 }, { "epoch": 0.3338707510412468, "grad_norm": 2.1237103939056396, "learning_rate": 3.37600435789187e-05, "loss": 4.1719, "step": 12425 }, { "epoch": 0.3340051054682252, "grad_norm": 2.3214142322540283, "learning_rate": 3.375323437287212e-05, "loss": 4.3039, "step": 12430 }, { "epoch": 0.33413945989520355, "grad_norm": 2.4996509552001953, "learning_rate": 3.374642516682555e-05, "loss": 4.4156, "step": 12435 }, { "epoch": 0.3342738143221819, "grad_norm": 2.3055732250213623, "learning_rate": 3.3739615960778975e-05, "loss": 4.3311, "step": 12440 }, { "epoch": 0.3344081687491603, "grad_norm": 2.5904111862182617, "learning_rate": 3.37328067547324e-05, "loss": 4.2898, "step": 12445 }, { "epoch": 0.33454252317613864, "grad_norm": 2.240269422531128, "learning_rate": 3.3725997548685826e-05, "loss": 4.3548, "step": 12450 }, { "epoch": 0.33467687760311704, "grad_norm": 2.281118869781494, "learning_rate": 3.3719188342639255e-05, "loss": 4.4745, "step": 12455 }, { "epoch": 0.3348112320300954, "grad_norm": 2.4933812618255615, "learning_rate": 3.371237913659268e-05, "loss": 4.4069, "step": 12460 }, { "epoch": 0.3349455864570738, "grad_norm": 2.5674359798431396, "learning_rate": 3.37055699305461e-05, "loss": 4.2655, "step": 12465 }, { "epoch": 0.33507994088405213, "grad_norm": 2.341557025909424, "learning_rate": 3.369876072449952e-05, "loss": 4.3043, "step": 12470 }, { "epoch": 0.3352142953110305, "grad_norm": 2.2489495277404785, "learning_rate": 3.369195151845295e-05, "loss": 4.3071, "step": 12475 }, { "epoch": 0.3353486497380089, "grad_norm": 2.3209519386291504, "learning_rate": 3.368514231240637e-05, "loss": 4.4017, "step": 12480 }, { "epoch": 0.3354830041649872, "grad_norm": 2.4912731647491455, "learning_rate": 3.36783331063598e-05, "loss": 4.1886, "step": 12485 }, { "epoch": 0.3356173585919656, "grad_norm": 2.1422030925750732, "learning_rate": 3.3671523900313224e-05, "loss": 4.2687, "step": 12490 }, { "epoch": 0.33575171301894396, "grad_norm": 2.25996732711792, "learning_rate": 3.3664714694266653e-05, "loss": 4.3919, "step": 12495 }, { "epoch": 0.33588606744592236, "grad_norm": 2.4497909545898438, "learning_rate": 3.3657905488220076e-05, "loss": 4.3657, "step": 12500 }, { "epoch": 0.3360204218729007, "grad_norm": 2.573125123977661, "learning_rate": 3.36510962821735e-05, "loss": 4.514, "step": 12505 }, { "epoch": 0.33615477629987905, "grad_norm": 2.1329214572906494, "learning_rate": 3.364428707612692e-05, "loss": 4.2861, "step": 12510 }, { "epoch": 0.33628913072685745, "grad_norm": 2.522408962249756, "learning_rate": 3.3637477870080356e-05, "loss": 4.4201, "step": 12515 }, { "epoch": 0.3364234851538358, "grad_norm": 2.377274990081787, "learning_rate": 3.363066866403378e-05, "loss": 4.4027, "step": 12520 }, { "epoch": 0.3365578395808142, "grad_norm": 2.287916660308838, "learning_rate": 3.36238594579872e-05, "loss": 4.3578, "step": 12525 }, { "epoch": 0.33669219400779254, "grad_norm": 2.212514638900757, "learning_rate": 3.361705025194062e-05, "loss": 4.3872, "step": 12530 }, { "epoch": 0.33682654843477094, "grad_norm": 2.34199857711792, "learning_rate": 3.361024104589405e-05, "loss": 4.3825, "step": 12535 }, { "epoch": 0.3369609028617493, "grad_norm": 2.276085376739502, "learning_rate": 3.3603431839847474e-05, "loss": 4.2627, "step": 12540 }, { "epoch": 0.3370952572887277, "grad_norm": 2.4497063159942627, "learning_rate": 3.35966226338009e-05, "loss": 4.3662, "step": 12545 }, { "epoch": 0.33722961171570603, "grad_norm": 2.360079050064087, "learning_rate": 3.3589813427754326e-05, "loss": 4.3821, "step": 12550 }, { "epoch": 0.3373639661426844, "grad_norm": 2.5171477794647217, "learning_rate": 3.358300422170775e-05, "loss": 4.3701, "step": 12555 }, { "epoch": 0.3374983205696628, "grad_norm": 2.528137445449829, "learning_rate": 3.357619501566118e-05, "loss": 4.4686, "step": 12560 }, { "epoch": 0.3376326749966411, "grad_norm": 2.028557300567627, "learning_rate": 3.35693858096146e-05, "loss": 4.4379, "step": 12565 }, { "epoch": 0.3377670294236195, "grad_norm": 2.183546543121338, "learning_rate": 3.356257660356802e-05, "loss": 4.2262, "step": 12570 }, { "epoch": 0.33790138385059787, "grad_norm": 2.586716890335083, "learning_rate": 3.355576739752145e-05, "loss": 4.2206, "step": 12575 }, { "epoch": 0.33803573827757627, "grad_norm": 2.306382656097412, "learning_rate": 3.354895819147488e-05, "loss": 4.3338, "step": 12580 }, { "epoch": 0.3381700927045546, "grad_norm": 2.430414915084839, "learning_rate": 3.35421489854283e-05, "loss": 4.4614, "step": 12585 }, { "epoch": 0.33830444713153296, "grad_norm": 2.2400527000427246, "learning_rate": 3.3535339779381724e-05, "loss": 4.4942, "step": 12590 }, { "epoch": 0.33843880155851136, "grad_norm": 2.146414279937744, "learning_rate": 3.3528530573335146e-05, "loss": 4.3551, "step": 12595 }, { "epoch": 0.3385731559854897, "grad_norm": 2.246049404144287, "learning_rate": 3.3521721367288575e-05, "loss": 4.3162, "step": 12600 }, { "epoch": 0.3387075104124681, "grad_norm": 2.318774700164795, "learning_rate": 3.3514912161242004e-05, "loss": 4.3223, "step": 12605 }, { "epoch": 0.33884186483944645, "grad_norm": 2.380481004714966, "learning_rate": 3.350810295519543e-05, "loss": 4.4088, "step": 12610 }, { "epoch": 0.33897621926642485, "grad_norm": 2.5404300689697266, "learning_rate": 3.350129374914885e-05, "loss": 4.3706, "step": 12615 }, { "epoch": 0.3391105736934032, "grad_norm": 2.256519079208374, "learning_rate": 3.349448454310228e-05, "loss": 4.2743, "step": 12620 }, { "epoch": 0.33924492812038154, "grad_norm": 2.2663304805755615, "learning_rate": 3.34876753370557e-05, "loss": 4.2736, "step": 12625 }, { "epoch": 0.33937928254735994, "grad_norm": 2.308063268661499, "learning_rate": 3.348086613100912e-05, "loss": 4.4443, "step": 12630 }, { "epoch": 0.3395136369743383, "grad_norm": 2.628347635269165, "learning_rate": 3.347405692496255e-05, "loss": 4.3288, "step": 12635 }, { "epoch": 0.3396479914013167, "grad_norm": 2.6036171913146973, "learning_rate": 3.346724771891598e-05, "loss": 4.373, "step": 12640 }, { "epoch": 0.33978234582829503, "grad_norm": 2.3996665477752686, "learning_rate": 3.34604385128694e-05, "loss": 4.291, "step": 12645 }, { "epoch": 0.33991670025527343, "grad_norm": 2.418679714202881, "learning_rate": 3.3453629306822825e-05, "loss": 4.3245, "step": 12650 }, { "epoch": 0.3400510546822518, "grad_norm": 2.441160202026367, "learning_rate": 3.344682010077625e-05, "loss": 4.2828, "step": 12655 }, { "epoch": 0.3401854091092302, "grad_norm": 2.4043376445770264, "learning_rate": 3.3440010894729676e-05, "loss": 4.3061, "step": 12660 }, { "epoch": 0.3403197635362085, "grad_norm": 2.0237512588500977, "learning_rate": 3.3433201688683106e-05, "loss": 4.2617, "step": 12665 }, { "epoch": 0.34045411796318686, "grad_norm": 2.437177896499634, "learning_rate": 3.342639248263653e-05, "loss": 4.327, "step": 12670 }, { "epoch": 0.34058847239016526, "grad_norm": 2.318439483642578, "learning_rate": 3.341958327658995e-05, "loss": 4.3084, "step": 12675 }, { "epoch": 0.3407228268171436, "grad_norm": 2.2808539867401123, "learning_rate": 3.341277407054338e-05, "loss": 4.3637, "step": 12680 }, { "epoch": 0.340857181244122, "grad_norm": 2.3590006828308105, "learning_rate": 3.34059648644968e-05, "loss": 4.3297, "step": 12685 }, { "epoch": 0.34099153567110035, "grad_norm": 2.226768732070923, "learning_rate": 3.3399155658450224e-05, "loss": 4.2035, "step": 12690 }, { "epoch": 0.34112589009807875, "grad_norm": 2.8055317401885986, "learning_rate": 3.339234645240365e-05, "loss": 4.3896, "step": 12695 }, { "epoch": 0.3412602445250571, "grad_norm": 2.292235851287842, "learning_rate": 3.3385537246357075e-05, "loss": 4.413, "step": 12700 }, { "epoch": 0.34139459895203544, "grad_norm": 2.29434871673584, "learning_rate": 3.3378728040310504e-05, "loss": 4.3033, "step": 12705 }, { "epoch": 0.34152895337901384, "grad_norm": 2.349175214767456, "learning_rate": 3.3371918834263926e-05, "loss": 4.4828, "step": 12710 }, { "epoch": 0.3416633078059922, "grad_norm": 2.490933656692505, "learning_rate": 3.336510962821735e-05, "loss": 4.2965, "step": 12715 }, { "epoch": 0.3417976622329706, "grad_norm": 2.208439588546753, "learning_rate": 3.335830042217077e-05, "loss": 4.2578, "step": 12720 }, { "epoch": 0.34193201665994893, "grad_norm": 2.362440586090088, "learning_rate": 3.335149121612421e-05, "loss": 4.2685, "step": 12725 }, { "epoch": 0.34206637108692733, "grad_norm": 2.432011127471924, "learning_rate": 3.334468201007763e-05, "loss": 4.3871, "step": 12730 }, { "epoch": 0.3422007255139057, "grad_norm": 2.506363868713379, "learning_rate": 3.333787280403105e-05, "loss": 4.2423, "step": 12735 }, { "epoch": 0.3423350799408841, "grad_norm": 2.4747965335845947, "learning_rate": 3.3331063597984473e-05, "loss": 4.3385, "step": 12740 }, { "epoch": 0.3424694343678624, "grad_norm": 2.325584650039673, "learning_rate": 3.33242543919379e-05, "loss": 4.3641, "step": 12745 }, { "epoch": 0.34260378879484077, "grad_norm": 2.3004395961761475, "learning_rate": 3.3317445185891325e-05, "loss": 4.2633, "step": 12750 }, { "epoch": 0.34273814322181917, "grad_norm": 2.0634853839874268, "learning_rate": 3.3310635979844754e-05, "loss": 4.3731, "step": 12755 }, { "epoch": 0.3428724976487975, "grad_norm": 2.610539436340332, "learning_rate": 3.3303826773798176e-05, "loss": 4.3725, "step": 12760 }, { "epoch": 0.3430068520757759, "grad_norm": 2.309396266937256, "learning_rate": 3.3297017567751605e-05, "loss": 4.3448, "step": 12765 }, { "epoch": 0.34314120650275426, "grad_norm": 2.2891199588775635, "learning_rate": 3.329020836170503e-05, "loss": 4.3961, "step": 12770 }, { "epoch": 0.34327556092973266, "grad_norm": 2.1714365482330322, "learning_rate": 3.328339915565845e-05, "loss": 4.2788, "step": 12775 }, { "epoch": 0.343409915356711, "grad_norm": 2.311539888381958, "learning_rate": 3.327658994961187e-05, "loss": 4.2987, "step": 12780 }, { "epoch": 0.34354426978368935, "grad_norm": 2.3169705867767334, "learning_rate": 3.32697807435653e-05, "loss": 4.2607, "step": 12785 }, { "epoch": 0.34367862421066775, "grad_norm": 2.112238645553589, "learning_rate": 3.326297153751873e-05, "loss": 4.2827, "step": 12790 }, { "epoch": 0.3438129786376461, "grad_norm": 2.3358473777770996, "learning_rate": 3.325616233147215e-05, "loss": 4.323, "step": 12795 }, { "epoch": 0.3439473330646245, "grad_norm": 2.499443292617798, "learning_rate": 3.3249353125425575e-05, "loss": 4.2559, "step": 12800 }, { "epoch": 0.34408168749160284, "grad_norm": 2.4832966327667236, "learning_rate": 3.3242543919379004e-05, "loss": 4.41, "step": 12805 }, { "epoch": 0.34421604191858124, "grad_norm": 2.3134751319885254, "learning_rate": 3.3235734713332426e-05, "loss": 4.3617, "step": 12810 }, { "epoch": 0.3443503963455596, "grad_norm": 2.6137137413024902, "learning_rate": 3.3228925507285855e-05, "loss": 4.1836, "step": 12815 }, { "epoch": 0.34448475077253793, "grad_norm": 2.100457191467285, "learning_rate": 3.322211630123928e-05, "loss": 4.2716, "step": 12820 }, { "epoch": 0.34461910519951633, "grad_norm": 2.5705742835998535, "learning_rate": 3.3215307095192706e-05, "loss": 4.3527, "step": 12825 }, { "epoch": 0.3447534596264947, "grad_norm": 2.684929132461548, "learning_rate": 3.320849788914613e-05, "loss": 4.2143, "step": 12830 }, { "epoch": 0.3448878140534731, "grad_norm": 2.2381880283355713, "learning_rate": 3.320168868309955e-05, "loss": 4.4224, "step": 12835 }, { "epoch": 0.3450221684804514, "grad_norm": 2.333702325820923, "learning_rate": 3.319487947705297e-05, "loss": 4.2755, "step": 12840 }, { "epoch": 0.3451565229074298, "grad_norm": 2.3880534172058105, "learning_rate": 3.31880702710064e-05, "loss": 4.2388, "step": 12845 }, { "epoch": 0.34529087733440816, "grad_norm": 2.6067099571228027, "learning_rate": 3.318126106495983e-05, "loss": 4.3602, "step": 12850 }, { "epoch": 0.34542523176138656, "grad_norm": 2.1584560871124268, "learning_rate": 3.317445185891325e-05, "loss": 4.3264, "step": 12855 }, { "epoch": 0.3455595861883649, "grad_norm": 2.2544124126434326, "learning_rate": 3.3167642652866676e-05, "loss": 4.3131, "step": 12860 }, { "epoch": 0.34569394061534325, "grad_norm": 2.511993885040283, "learning_rate": 3.31608334468201e-05, "loss": 4.2717, "step": 12865 }, { "epoch": 0.34582829504232165, "grad_norm": 2.2463836669921875, "learning_rate": 3.315402424077353e-05, "loss": 4.3676, "step": 12870 }, { "epoch": 0.3459626494693, "grad_norm": 2.109424114227295, "learning_rate": 3.3147215034726956e-05, "loss": 4.3602, "step": 12875 }, { "epoch": 0.3460970038962784, "grad_norm": 2.3144948482513428, "learning_rate": 3.314040582868038e-05, "loss": 4.4366, "step": 12880 }, { "epoch": 0.34623135832325674, "grad_norm": 2.4145798683166504, "learning_rate": 3.31335966226338e-05, "loss": 4.2313, "step": 12885 }, { "epoch": 0.34636571275023514, "grad_norm": 2.1766817569732666, "learning_rate": 3.312678741658723e-05, "loss": 4.4385, "step": 12890 }, { "epoch": 0.3465000671772135, "grad_norm": 2.543679714202881, "learning_rate": 3.311997821054065e-05, "loss": 4.3023, "step": 12895 }, { "epoch": 0.34663442160419183, "grad_norm": 2.249096632003784, "learning_rate": 3.3113169004494074e-05, "loss": 4.2675, "step": 12900 }, { "epoch": 0.34676877603117023, "grad_norm": 2.3533482551574707, "learning_rate": 3.31063597984475e-05, "loss": 4.3446, "step": 12905 }, { "epoch": 0.3469031304581486, "grad_norm": 2.6078922748565674, "learning_rate": 3.309955059240093e-05, "loss": 4.4185, "step": 12910 }, { "epoch": 0.347037484885127, "grad_norm": 2.3863863945007324, "learning_rate": 3.3092741386354354e-05, "loss": 4.3962, "step": 12915 }, { "epoch": 0.3471718393121053, "grad_norm": 2.592764377593994, "learning_rate": 3.308593218030778e-05, "loss": 4.3918, "step": 12920 }, { "epoch": 0.3473061937390837, "grad_norm": 2.2816545963287354, "learning_rate": 3.30791229742612e-05, "loss": 4.4112, "step": 12925 }, { "epoch": 0.34744054816606207, "grad_norm": 2.369417190551758, "learning_rate": 3.307231376821463e-05, "loss": 4.2738, "step": 12930 }, { "epoch": 0.3475749025930404, "grad_norm": 2.3674509525299072, "learning_rate": 3.306550456216805e-05, "loss": 4.3954, "step": 12935 }, { "epoch": 0.3477092570200188, "grad_norm": 2.3611881732940674, "learning_rate": 3.305869535612148e-05, "loss": 4.3391, "step": 12940 }, { "epoch": 0.34784361144699716, "grad_norm": 2.3024861812591553, "learning_rate": 3.30518861500749e-05, "loss": 4.4389, "step": 12945 }, { "epoch": 0.34797796587397556, "grad_norm": 2.271338701248169, "learning_rate": 3.304507694402833e-05, "loss": 4.3156, "step": 12950 }, { "epoch": 0.3481123203009539, "grad_norm": 2.339171886444092, "learning_rate": 3.303826773798175e-05, "loss": 4.4184, "step": 12955 }, { "epoch": 0.3482466747279323, "grad_norm": 2.6277692317962646, "learning_rate": 3.3031458531935175e-05, "loss": 4.3438, "step": 12960 }, { "epoch": 0.34838102915491065, "grad_norm": 2.1784210205078125, "learning_rate": 3.3024649325888604e-05, "loss": 4.3129, "step": 12965 }, { "epoch": 0.34851538358188905, "grad_norm": 2.840534210205078, "learning_rate": 3.301784011984203e-05, "loss": 4.3633, "step": 12970 }, { "epoch": 0.3486497380088674, "grad_norm": 2.3672585487365723, "learning_rate": 3.3011030913795456e-05, "loss": 4.2966, "step": 12975 }, { "epoch": 0.34878409243584574, "grad_norm": 2.3058366775512695, "learning_rate": 3.300422170774888e-05, "loss": 4.2895, "step": 12980 }, { "epoch": 0.34891844686282414, "grad_norm": 2.5000271797180176, "learning_rate": 3.29974125017023e-05, "loss": 4.325, "step": 12985 }, { "epoch": 0.3490528012898025, "grad_norm": 2.2673470973968506, "learning_rate": 3.299060329565572e-05, "loss": 4.3071, "step": 12990 }, { "epoch": 0.3491871557167809, "grad_norm": 2.4056758880615234, "learning_rate": 3.298379408960915e-05, "loss": 4.2686, "step": 12995 }, { "epoch": 0.34932151014375923, "grad_norm": 2.2597715854644775, "learning_rate": 3.297698488356258e-05, "loss": 4.2819, "step": 13000 }, { "epoch": 0.34945586457073763, "grad_norm": 2.7082595825195312, "learning_rate": 3.2970175677516e-05, "loss": 4.2946, "step": 13005 }, { "epoch": 0.349590218997716, "grad_norm": 2.4204490184783936, "learning_rate": 3.2963366471469425e-05, "loss": 4.359, "step": 13010 }, { "epoch": 0.3497245734246943, "grad_norm": 2.5691988468170166, "learning_rate": 3.2956557265422854e-05, "loss": 4.3456, "step": 13015 }, { "epoch": 0.3498589278516727, "grad_norm": 2.4026260375976562, "learning_rate": 3.2949748059376276e-05, "loss": 4.3767, "step": 13020 }, { "epoch": 0.34999328227865106, "grad_norm": 2.2824840545654297, "learning_rate": 3.2942938853329705e-05, "loss": 4.3572, "step": 13025 }, { "epoch": 0.35012763670562946, "grad_norm": 2.3221051692962646, "learning_rate": 3.293612964728313e-05, "loss": 4.3843, "step": 13030 }, { "epoch": 0.3502619911326078, "grad_norm": 2.3892171382904053, "learning_rate": 3.292932044123656e-05, "loss": 4.2276, "step": 13035 }, { "epoch": 0.3503963455595862, "grad_norm": 2.467667579650879, "learning_rate": 3.292251123518998e-05, "loss": 4.3588, "step": 13040 }, { "epoch": 0.35053069998656455, "grad_norm": 2.259399175643921, "learning_rate": 3.29157020291434e-05, "loss": 4.2737, "step": 13045 }, { "epoch": 0.35066505441354295, "grad_norm": 2.447152853012085, "learning_rate": 3.2908892823096824e-05, "loss": 4.3613, "step": 13050 }, { "epoch": 0.3507994088405213, "grad_norm": 2.2995541095733643, "learning_rate": 3.290208361705025e-05, "loss": 4.2341, "step": 13055 }, { "epoch": 0.35093376326749964, "grad_norm": 2.4681267738342285, "learning_rate": 3.289527441100368e-05, "loss": 4.3615, "step": 13060 }, { "epoch": 0.35106811769447804, "grad_norm": 2.305631160736084, "learning_rate": 3.2888465204957104e-05, "loss": 4.2977, "step": 13065 }, { "epoch": 0.3512024721214564, "grad_norm": 2.4446351528167725, "learning_rate": 3.2881655998910526e-05, "loss": 4.271, "step": 13070 }, { "epoch": 0.3513368265484348, "grad_norm": 2.445784330368042, "learning_rate": 3.2874846792863955e-05, "loss": 4.3712, "step": 13075 }, { "epoch": 0.35147118097541313, "grad_norm": 2.290306806564331, "learning_rate": 3.286803758681738e-05, "loss": 4.3611, "step": 13080 }, { "epoch": 0.35160553540239153, "grad_norm": 2.186196804046631, "learning_rate": 3.2861228380770807e-05, "loss": 4.3724, "step": 13085 }, { "epoch": 0.3517398898293699, "grad_norm": 2.5417280197143555, "learning_rate": 3.285441917472423e-05, "loss": 4.3885, "step": 13090 }, { "epoch": 0.3518742442563482, "grad_norm": 2.3365049362182617, "learning_rate": 3.284760996867766e-05, "loss": 4.4129, "step": 13095 }, { "epoch": 0.3520085986833266, "grad_norm": 2.5200552940368652, "learning_rate": 3.284080076263108e-05, "loss": 4.3778, "step": 13100 }, { "epoch": 0.35214295311030497, "grad_norm": 2.069540023803711, "learning_rate": 3.28339915565845e-05, "loss": 4.2942, "step": 13105 }, { "epoch": 0.35227730753728337, "grad_norm": 2.589008331298828, "learning_rate": 3.2827182350537925e-05, "loss": 4.2585, "step": 13110 }, { "epoch": 0.3524116619642617, "grad_norm": 2.5103719234466553, "learning_rate": 3.2820373144491354e-05, "loss": 4.3366, "step": 13115 }, { "epoch": 0.3525460163912401, "grad_norm": 2.3338003158569336, "learning_rate": 3.281356393844478e-05, "loss": 4.2475, "step": 13120 }, { "epoch": 0.35268037081821846, "grad_norm": 2.2297842502593994, "learning_rate": 3.2806754732398205e-05, "loss": 4.3412, "step": 13125 }, { "epoch": 0.3528147252451968, "grad_norm": 2.464806318283081, "learning_rate": 3.279994552635163e-05, "loss": 4.3853, "step": 13130 }, { "epoch": 0.3529490796721752, "grad_norm": 2.184504270553589, "learning_rate": 3.279313632030505e-05, "loss": 4.3079, "step": 13135 }, { "epoch": 0.35308343409915355, "grad_norm": 2.5422070026397705, "learning_rate": 3.278632711425848e-05, "loss": 4.47, "step": 13140 }, { "epoch": 0.35321778852613195, "grad_norm": 2.4231884479522705, "learning_rate": 3.27795179082119e-05, "loss": 4.3651, "step": 13145 }, { "epoch": 0.3533521429531103, "grad_norm": 2.534411668777466, "learning_rate": 3.277270870216533e-05, "loss": 4.3518, "step": 13150 }, { "epoch": 0.3534864973800887, "grad_norm": 2.259965419769287, "learning_rate": 3.276589949611875e-05, "loss": 4.2198, "step": 13155 }, { "epoch": 0.35362085180706704, "grad_norm": 2.5303735733032227, "learning_rate": 3.275909029007218e-05, "loss": 4.2907, "step": 13160 }, { "epoch": 0.35375520623404544, "grad_norm": 2.239152431488037, "learning_rate": 3.2752281084025603e-05, "loss": 4.2998, "step": 13165 }, { "epoch": 0.3538895606610238, "grad_norm": 2.473051071166992, "learning_rate": 3.2745471877979026e-05, "loss": 4.2962, "step": 13170 }, { "epoch": 0.35402391508800213, "grad_norm": 2.3368377685546875, "learning_rate": 3.2738662671932455e-05, "loss": 4.1866, "step": 13175 }, { "epoch": 0.35415826951498053, "grad_norm": 2.282496929168701, "learning_rate": 3.2731853465885884e-05, "loss": 4.367, "step": 13180 }, { "epoch": 0.3542926239419589, "grad_norm": 2.385413646697998, "learning_rate": 3.2725044259839306e-05, "loss": 4.343, "step": 13185 }, { "epoch": 0.3544269783689373, "grad_norm": 2.589935064315796, "learning_rate": 3.271823505379273e-05, "loss": 4.4793, "step": 13190 }, { "epoch": 0.3545613327959156, "grad_norm": 2.264702558517456, "learning_rate": 3.271142584774615e-05, "loss": 4.4538, "step": 13195 }, { "epoch": 0.354695687222894, "grad_norm": 2.366159439086914, "learning_rate": 3.270461664169958e-05, "loss": 4.3138, "step": 13200 }, { "epoch": 0.35483004164987236, "grad_norm": 2.3087618350982666, "learning_rate": 3.2697807435653e-05, "loss": 4.3036, "step": 13205 }, { "epoch": 0.3549643960768507, "grad_norm": 2.1213228702545166, "learning_rate": 3.269099822960643e-05, "loss": 4.2427, "step": 13210 }, { "epoch": 0.3550987505038291, "grad_norm": 2.2862708568573, "learning_rate": 3.268418902355985e-05, "loss": 4.2808, "step": 13215 }, { "epoch": 0.35523310493080745, "grad_norm": 2.5117104053497314, "learning_rate": 3.267737981751328e-05, "loss": 4.4748, "step": 13220 }, { "epoch": 0.35536745935778585, "grad_norm": 2.301981210708618, "learning_rate": 3.2670570611466705e-05, "loss": 4.2165, "step": 13225 }, { "epoch": 0.3555018137847642, "grad_norm": 2.513745069503784, "learning_rate": 3.266376140542013e-05, "loss": 4.1783, "step": 13230 }, { "epoch": 0.3556361682117426, "grad_norm": 2.4079809188842773, "learning_rate": 3.2656952199373556e-05, "loss": 4.2204, "step": 13235 }, { "epoch": 0.35577052263872094, "grad_norm": 2.3042685985565186, "learning_rate": 3.2650142993326985e-05, "loss": 4.3002, "step": 13240 }, { "epoch": 0.3559048770656993, "grad_norm": 2.2028629779815674, "learning_rate": 3.264333378728041e-05, "loss": 4.441, "step": 13245 }, { "epoch": 0.3560392314926777, "grad_norm": 2.6395745277404785, "learning_rate": 3.263652458123383e-05, "loss": 4.3542, "step": 13250 }, { "epoch": 0.35617358591965603, "grad_norm": 2.1632800102233887, "learning_rate": 3.262971537518725e-05, "loss": 4.4031, "step": 13255 }, { "epoch": 0.35630794034663443, "grad_norm": 2.2891623973846436, "learning_rate": 3.262290616914068e-05, "loss": 4.3825, "step": 13260 }, { "epoch": 0.3564422947736128, "grad_norm": 2.374030113220215, "learning_rate": 3.26160969630941e-05, "loss": 4.3131, "step": 13265 }, { "epoch": 0.3565766492005912, "grad_norm": 2.316019058227539, "learning_rate": 3.260928775704753e-05, "loss": 4.3345, "step": 13270 }, { "epoch": 0.3567110036275695, "grad_norm": 2.604022979736328, "learning_rate": 3.2602478551000954e-05, "loss": 4.3721, "step": 13275 }, { "epoch": 0.3568453580545479, "grad_norm": 2.4381356239318848, "learning_rate": 3.2595669344954383e-05, "loss": 4.312, "step": 13280 }, { "epoch": 0.35697971248152627, "grad_norm": 2.338482618331909, "learning_rate": 3.2588860138907806e-05, "loss": 4.3368, "step": 13285 }, { "epoch": 0.3571140669085046, "grad_norm": 2.236905813217163, "learning_rate": 3.258205093286123e-05, "loss": 4.2244, "step": 13290 }, { "epoch": 0.357248421335483, "grad_norm": 2.308786153793335, "learning_rate": 3.257524172681466e-05, "loss": 4.2951, "step": 13295 }, { "epoch": 0.35738277576246136, "grad_norm": 2.677859306335449, "learning_rate": 3.256843252076808e-05, "loss": 4.2441, "step": 13300 }, { "epoch": 0.35751713018943976, "grad_norm": 2.5888803005218506, "learning_rate": 3.256162331472151e-05, "loss": 4.2934, "step": 13305 }, { "epoch": 0.3576514846164181, "grad_norm": 2.2555806636810303, "learning_rate": 3.255481410867493e-05, "loss": 4.3666, "step": 13310 }, { "epoch": 0.3577858390433965, "grad_norm": 2.345254898071289, "learning_rate": 3.254800490262835e-05, "loss": 4.478, "step": 13315 }, { "epoch": 0.35792019347037485, "grad_norm": 2.3655991554260254, "learning_rate": 3.2541195696581775e-05, "loss": 4.3758, "step": 13320 }, { "epoch": 0.3580545478973532, "grad_norm": 2.3388137817382812, "learning_rate": 3.2534386490535204e-05, "loss": 4.3717, "step": 13325 }, { "epoch": 0.3581889023243316, "grad_norm": 2.4523465633392334, "learning_rate": 3.252757728448863e-05, "loss": 4.3566, "step": 13330 }, { "epoch": 0.35832325675130994, "grad_norm": 2.712498903274536, "learning_rate": 3.2520768078442056e-05, "loss": 4.3991, "step": 13335 }, { "epoch": 0.35845761117828834, "grad_norm": 2.667022943496704, "learning_rate": 3.251395887239548e-05, "loss": 4.4253, "step": 13340 }, { "epoch": 0.3585919656052667, "grad_norm": 2.307260751724243, "learning_rate": 3.250714966634891e-05, "loss": 4.319, "step": 13345 }, { "epoch": 0.3587263200322451, "grad_norm": 2.3503916263580322, "learning_rate": 3.250034046030233e-05, "loss": 4.4879, "step": 13350 }, { "epoch": 0.3588606744592234, "grad_norm": 2.235161781311035, "learning_rate": 3.249353125425575e-05, "loss": 4.3408, "step": 13355 }, { "epoch": 0.35899502888620183, "grad_norm": 2.282172918319702, "learning_rate": 3.248672204820918e-05, "loss": 4.3556, "step": 13360 }, { "epoch": 0.3591293833131802, "grad_norm": 2.2733702659606934, "learning_rate": 3.247991284216261e-05, "loss": 4.2609, "step": 13365 }, { "epoch": 0.3592637377401585, "grad_norm": 2.234506607055664, "learning_rate": 3.247310363611603e-05, "loss": 4.3787, "step": 13370 }, { "epoch": 0.3593980921671369, "grad_norm": 2.5725324153900146, "learning_rate": 3.2466294430069454e-05, "loss": 4.2591, "step": 13375 }, { "epoch": 0.35953244659411526, "grad_norm": 2.399301767349243, "learning_rate": 3.2459485224022876e-05, "loss": 4.3134, "step": 13380 }, { "epoch": 0.35966680102109366, "grad_norm": 2.3300259113311768, "learning_rate": 3.2452676017976305e-05, "loss": 4.4212, "step": 13385 }, { "epoch": 0.359801155448072, "grad_norm": 2.4857661724090576, "learning_rate": 3.2445866811929734e-05, "loss": 4.3523, "step": 13390 }, { "epoch": 0.3599355098750504, "grad_norm": 2.1681809425354004, "learning_rate": 3.243905760588316e-05, "loss": 4.2649, "step": 13395 }, { "epoch": 0.36006986430202875, "grad_norm": 2.599060535430908, "learning_rate": 3.243224839983658e-05, "loss": 4.2693, "step": 13400 }, { "epoch": 0.3602042187290071, "grad_norm": 2.467041254043579, "learning_rate": 3.242543919379001e-05, "loss": 4.3746, "step": 13405 }, { "epoch": 0.3603385731559855, "grad_norm": 2.2224814891815186, "learning_rate": 3.241862998774343e-05, "loss": 4.2686, "step": 13410 }, { "epoch": 0.36047292758296384, "grad_norm": 2.422572374343872, "learning_rate": 3.241182078169685e-05, "loss": 4.3886, "step": 13415 }, { "epoch": 0.36060728200994224, "grad_norm": 2.1943535804748535, "learning_rate": 3.240501157565028e-05, "loss": 4.3362, "step": 13420 }, { "epoch": 0.3607416364369206, "grad_norm": 2.3149867057800293, "learning_rate": 3.239820236960371e-05, "loss": 4.4284, "step": 13425 }, { "epoch": 0.360875990863899, "grad_norm": 2.480860948562622, "learning_rate": 3.239139316355713e-05, "loss": 4.3047, "step": 13430 }, { "epoch": 0.36101034529087733, "grad_norm": 2.400604248046875, "learning_rate": 3.2384583957510555e-05, "loss": 4.3188, "step": 13435 }, { "epoch": 0.3611446997178557, "grad_norm": 2.2765445709228516, "learning_rate": 3.237777475146398e-05, "loss": 4.4669, "step": 13440 }, { "epoch": 0.3612790541448341, "grad_norm": 2.3425376415252686, "learning_rate": 3.2370965545417406e-05, "loss": 4.3459, "step": 13445 }, { "epoch": 0.3614134085718124, "grad_norm": 2.1203501224517822, "learning_rate": 3.2364156339370835e-05, "loss": 4.2457, "step": 13450 }, { "epoch": 0.3615477629987908, "grad_norm": 2.41072678565979, "learning_rate": 3.235734713332426e-05, "loss": 4.2384, "step": 13455 }, { "epoch": 0.36168211742576917, "grad_norm": 2.6223561763763428, "learning_rate": 3.235053792727768e-05, "loss": 4.4556, "step": 13460 }, { "epoch": 0.36181647185274757, "grad_norm": 2.5104267597198486, "learning_rate": 3.23437287212311e-05, "loss": 4.3188, "step": 13465 }, { "epoch": 0.3619508262797259, "grad_norm": 2.3373522758483887, "learning_rate": 3.233691951518453e-05, "loss": 4.3959, "step": 13470 }, { "epoch": 0.3620851807067043, "grad_norm": 2.3905205726623535, "learning_rate": 3.2330110309137954e-05, "loss": 4.2754, "step": 13475 }, { "epoch": 0.36221953513368266, "grad_norm": 2.557098150253296, "learning_rate": 3.232330110309138e-05, "loss": 4.3411, "step": 13480 }, { "epoch": 0.362353889560661, "grad_norm": 2.4500479698181152, "learning_rate": 3.2316491897044805e-05, "loss": 4.234, "step": 13485 }, { "epoch": 0.3624882439876394, "grad_norm": 2.1909146308898926, "learning_rate": 3.2309682690998234e-05, "loss": 4.4129, "step": 13490 }, { "epoch": 0.36262259841461775, "grad_norm": 2.441007614135742, "learning_rate": 3.2302873484951656e-05, "loss": 4.3136, "step": 13495 }, { "epoch": 0.36275695284159615, "grad_norm": 2.4471328258514404, "learning_rate": 3.229606427890508e-05, "loss": 4.3156, "step": 13500 }, { "epoch": 0.3628913072685745, "grad_norm": 2.4198410511016846, "learning_rate": 3.228925507285851e-05, "loss": 4.3298, "step": 13505 }, { "epoch": 0.3630256616955529, "grad_norm": 2.2383103370666504, "learning_rate": 3.2282445866811937e-05, "loss": 4.4694, "step": 13510 }, { "epoch": 0.36316001612253124, "grad_norm": 2.175715684890747, "learning_rate": 3.227563666076536e-05, "loss": 4.3111, "step": 13515 }, { "epoch": 0.3632943705495096, "grad_norm": 2.485316514968872, "learning_rate": 3.226882745471878e-05, "loss": 4.2738, "step": 13520 }, { "epoch": 0.363428724976488, "grad_norm": 2.4540674686431885, "learning_rate": 3.22620182486722e-05, "loss": 4.175, "step": 13525 }, { "epoch": 0.3635630794034663, "grad_norm": 2.538102388381958, "learning_rate": 3.225520904262563e-05, "loss": 4.3979, "step": 13530 }, { "epoch": 0.3636974338304447, "grad_norm": 2.215350389480591, "learning_rate": 3.2248399836579055e-05, "loss": 4.2962, "step": 13535 }, { "epoch": 0.36383178825742307, "grad_norm": 2.4535844326019287, "learning_rate": 3.2241590630532484e-05, "loss": 4.2784, "step": 13540 }, { "epoch": 0.3639661426844015, "grad_norm": 2.6061220169067383, "learning_rate": 3.2234781424485906e-05, "loss": 4.4522, "step": 13545 }, { "epoch": 0.3641004971113798, "grad_norm": 2.3602547645568848, "learning_rate": 3.2227972218439335e-05, "loss": 4.2543, "step": 13550 }, { "epoch": 0.36423485153835816, "grad_norm": 2.4458539485931396, "learning_rate": 3.222116301239276e-05, "loss": 4.3424, "step": 13555 }, { "epoch": 0.36436920596533656, "grad_norm": 2.3571009635925293, "learning_rate": 3.221435380634618e-05, "loss": 4.277, "step": 13560 }, { "epoch": 0.3645035603923149, "grad_norm": 2.3767616748809814, "learning_rate": 3.22075446002996e-05, "loss": 4.2392, "step": 13565 }, { "epoch": 0.3646379148192933, "grad_norm": 2.26343035697937, "learning_rate": 3.220073539425304e-05, "loss": 4.3238, "step": 13570 }, { "epoch": 0.36477226924627165, "grad_norm": 2.4008126258850098, "learning_rate": 3.219392618820646e-05, "loss": 4.3178, "step": 13575 }, { "epoch": 0.36490662367325005, "grad_norm": 2.539283037185669, "learning_rate": 3.218711698215988e-05, "loss": 4.2648, "step": 13580 }, { "epoch": 0.3650409781002284, "grad_norm": 2.628666400909424, "learning_rate": 3.2180307776113304e-05, "loss": 4.2871, "step": 13585 }, { "epoch": 0.3651753325272068, "grad_norm": 2.194849729537964, "learning_rate": 3.217349857006673e-05, "loss": 4.256, "step": 13590 }, { "epoch": 0.36530968695418514, "grad_norm": 2.3149709701538086, "learning_rate": 3.2166689364020156e-05, "loss": 4.2944, "step": 13595 }, { "epoch": 0.3654440413811635, "grad_norm": 2.3692774772644043, "learning_rate": 3.2159880157973585e-05, "loss": 4.3548, "step": 13600 }, { "epoch": 0.3655783958081419, "grad_norm": 2.481020212173462, "learning_rate": 3.215307095192701e-05, "loss": 4.2844, "step": 13605 }, { "epoch": 0.36571275023512023, "grad_norm": 2.3730828762054443, "learning_rate": 3.214626174588043e-05, "loss": 4.3499, "step": 13610 }, { "epoch": 0.36584710466209863, "grad_norm": 2.391204595565796, "learning_rate": 3.213945253983386e-05, "loss": 4.323, "step": 13615 }, { "epoch": 0.365981459089077, "grad_norm": 3.4085335731506348, "learning_rate": 3.213264333378728e-05, "loss": 4.3378, "step": 13620 }, { "epoch": 0.3661158135160554, "grad_norm": 2.3898353576660156, "learning_rate": 3.21258341277407e-05, "loss": 4.2884, "step": 13625 }, { "epoch": 0.3662501679430337, "grad_norm": 2.4828672409057617, "learning_rate": 3.211902492169413e-05, "loss": 4.2994, "step": 13630 }, { "epoch": 0.36638452237001207, "grad_norm": 2.3456521034240723, "learning_rate": 3.211221571564756e-05, "loss": 4.2502, "step": 13635 }, { "epoch": 0.36651887679699047, "grad_norm": 2.24570894241333, "learning_rate": 3.210540650960098e-05, "loss": 4.2802, "step": 13640 }, { "epoch": 0.3666532312239688, "grad_norm": 2.4074454307556152, "learning_rate": 3.2098597303554406e-05, "loss": 4.3179, "step": 13645 }, { "epoch": 0.3667875856509472, "grad_norm": 2.4296061992645264, "learning_rate": 3.209178809750783e-05, "loss": 4.3309, "step": 13650 }, { "epoch": 0.36692194007792556, "grad_norm": 2.282538890838623, "learning_rate": 3.208497889146126e-05, "loss": 4.3102, "step": 13655 }, { "epoch": 0.36705629450490396, "grad_norm": 2.2850332260131836, "learning_rate": 3.2078169685414686e-05, "loss": 4.3872, "step": 13660 }, { "epoch": 0.3671906489318823, "grad_norm": 2.5425775051116943, "learning_rate": 3.207136047936811e-05, "loss": 4.3569, "step": 13665 }, { "epoch": 0.3673250033588607, "grad_norm": 2.4117627143859863, "learning_rate": 3.206455127332153e-05, "loss": 4.2418, "step": 13670 }, { "epoch": 0.36745935778583905, "grad_norm": 2.239330291748047, "learning_rate": 3.205774206727496e-05, "loss": 4.4028, "step": 13675 }, { "epoch": 0.3675937122128174, "grad_norm": 2.353691816329956, "learning_rate": 3.205093286122838e-05, "loss": 4.3272, "step": 13680 }, { "epoch": 0.3677280666397958, "grad_norm": 2.436859369277954, "learning_rate": 3.2044123655181804e-05, "loss": 4.2692, "step": 13685 }, { "epoch": 0.36786242106677414, "grad_norm": 2.4121267795562744, "learning_rate": 3.203731444913523e-05, "loss": 4.2616, "step": 13690 }, { "epoch": 0.36799677549375254, "grad_norm": 2.618720293045044, "learning_rate": 3.203050524308866e-05, "loss": 4.3374, "step": 13695 }, { "epoch": 0.3681311299207309, "grad_norm": 2.3474345207214355, "learning_rate": 3.2023696037042084e-05, "loss": 4.2059, "step": 13700 }, { "epoch": 0.3682654843477093, "grad_norm": 2.301142692565918, "learning_rate": 3.201688683099551e-05, "loss": 4.1871, "step": 13705 }, { "epoch": 0.3683998387746876, "grad_norm": 2.4006447792053223, "learning_rate": 3.201007762494893e-05, "loss": 4.3669, "step": 13710 }, { "epoch": 0.36853419320166597, "grad_norm": 2.057865858078003, "learning_rate": 3.200326841890236e-05, "loss": 4.2507, "step": 13715 }, { "epoch": 0.36866854762864437, "grad_norm": 2.4721176624298096, "learning_rate": 3.199645921285579e-05, "loss": 4.2579, "step": 13720 }, { "epoch": 0.3688029020556227, "grad_norm": 2.228379011154175, "learning_rate": 3.198965000680921e-05, "loss": 4.3001, "step": 13725 }, { "epoch": 0.3689372564826011, "grad_norm": 2.6983954906463623, "learning_rate": 3.198284080076263e-05, "loss": 4.2866, "step": 13730 }, { "epoch": 0.36907161090957946, "grad_norm": 2.450631856918335, "learning_rate": 3.1976031594716054e-05, "loss": 4.2417, "step": 13735 }, { "epoch": 0.36920596533655786, "grad_norm": 2.277873992919922, "learning_rate": 3.196922238866948e-05, "loss": 4.3859, "step": 13740 }, { "epoch": 0.3693403197635362, "grad_norm": 2.459268808364868, "learning_rate": 3.1962413182622905e-05, "loss": 4.4825, "step": 13745 }, { "epoch": 0.36947467419051455, "grad_norm": 2.222341299057007, "learning_rate": 3.1955603976576334e-05, "loss": 4.4071, "step": 13750 }, { "epoch": 0.36960902861749295, "grad_norm": 2.232779026031494, "learning_rate": 3.1948794770529757e-05, "loss": 4.3603, "step": 13755 }, { "epoch": 0.3697433830444713, "grad_norm": 2.4482953548431396, "learning_rate": 3.1941985564483186e-05, "loss": 4.4021, "step": 13760 }, { "epoch": 0.3698777374714497, "grad_norm": 2.452336549758911, "learning_rate": 3.193517635843661e-05, "loss": 4.3711, "step": 13765 }, { "epoch": 0.37001209189842804, "grad_norm": 2.470099449157715, "learning_rate": 3.192836715239003e-05, "loss": 4.4044, "step": 13770 }, { "epoch": 0.37014644632540644, "grad_norm": 2.5558035373687744, "learning_rate": 3.192155794634345e-05, "loss": 4.3643, "step": 13775 }, { "epoch": 0.3702808007523848, "grad_norm": 2.4871790409088135, "learning_rate": 3.191474874029688e-05, "loss": 4.2087, "step": 13780 }, { "epoch": 0.3704151551793632, "grad_norm": 2.2882003784179688, "learning_rate": 3.190793953425031e-05, "loss": 4.3122, "step": 13785 }, { "epoch": 0.37054950960634153, "grad_norm": 2.282665967941284, "learning_rate": 3.190113032820373e-05, "loss": 4.3829, "step": 13790 }, { "epoch": 0.3706838640333199, "grad_norm": 2.388610363006592, "learning_rate": 3.1894321122157155e-05, "loss": 4.3001, "step": 13795 }, { "epoch": 0.3708182184602983, "grad_norm": 2.1769843101501465, "learning_rate": 3.1887511916110584e-05, "loss": 4.4585, "step": 13800 }, { "epoch": 0.3709525728872766, "grad_norm": 2.489595413208008, "learning_rate": 3.1880702710064006e-05, "loss": 4.2977, "step": 13805 }, { "epoch": 0.371086927314255, "grad_norm": 2.352616548538208, "learning_rate": 3.1873893504017435e-05, "loss": 4.2531, "step": 13810 }, { "epoch": 0.37122128174123337, "grad_norm": 2.5033323764801025, "learning_rate": 3.186708429797086e-05, "loss": 4.4416, "step": 13815 }, { "epoch": 0.37135563616821177, "grad_norm": 2.4697649478912354, "learning_rate": 3.186027509192429e-05, "loss": 4.3765, "step": 13820 }, { "epoch": 0.3714899905951901, "grad_norm": 2.4438958168029785, "learning_rate": 3.185346588587771e-05, "loss": 4.334, "step": 13825 }, { "epoch": 0.37162434502216846, "grad_norm": 2.189882278442383, "learning_rate": 3.184665667983113e-05, "loss": 4.2742, "step": 13830 }, { "epoch": 0.37175869944914686, "grad_norm": 2.1358470916748047, "learning_rate": 3.1839847473784553e-05, "loss": 4.3132, "step": 13835 }, { "epoch": 0.3718930538761252, "grad_norm": 2.26397967338562, "learning_rate": 3.183303826773798e-05, "loss": 4.2145, "step": 13840 }, { "epoch": 0.3720274083031036, "grad_norm": 2.44718074798584, "learning_rate": 3.182622906169141e-05, "loss": 4.3329, "step": 13845 }, { "epoch": 0.37216176273008195, "grad_norm": 2.214589834213257, "learning_rate": 3.1819419855644834e-05, "loss": 4.2368, "step": 13850 }, { "epoch": 0.37229611715706035, "grad_norm": 2.757277727127075, "learning_rate": 3.1812610649598256e-05, "loss": 4.2521, "step": 13855 }, { "epoch": 0.3724304715840387, "grad_norm": 2.1392934322357178, "learning_rate": 3.1805801443551685e-05, "loss": 4.3144, "step": 13860 }, { "epoch": 0.37256482601101704, "grad_norm": 2.5721726417541504, "learning_rate": 3.179899223750511e-05, "loss": 4.2698, "step": 13865 }, { "epoch": 0.37269918043799544, "grad_norm": 2.332751512527466, "learning_rate": 3.1792183031458536e-05, "loss": 4.2705, "step": 13870 }, { "epoch": 0.3728335348649738, "grad_norm": 2.238003969192505, "learning_rate": 3.178537382541196e-05, "loss": 4.3019, "step": 13875 }, { "epoch": 0.3729678892919522, "grad_norm": 2.2340259552001953, "learning_rate": 3.177856461936538e-05, "loss": 4.3363, "step": 13880 }, { "epoch": 0.3731022437189305, "grad_norm": 2.435854911804199, "learning_rate": 3.177175541331881e-05, "loss": 4.3889, "step": 13885 }, { "epoch": 0.3732365981459089, "grad_norm": 2.3172571659088135, "learning_rate": 3.176494620727223e-05, "loss": 4.3017, "step": 13890 }, { "epoch": 0.37337095257288727, "grad_norm": 2.4550118446350098, "learning_rate": 3.1758137001225655e-05, "loss": 4.3483, "step": 13895 }, { "epoch": 0.37350530699986567, "grad_norm": 2.4662134647369385, "learning_rate": 3.1751327795179084e-05, "loss": 4.1626, "step": 13900 }, { "epoch": 0.373639661426844, "grad_norm": 2.231564998626709, "learning_rate": 3.174451858913251e-05, "loss": 4.3447, "step": 13905 }, { "epoch": 0.37377401585382236, "grad_norm": 2.454305648803711, "learning_rate": 3.1737709383085935e-05, "loss": 4.3161, "step": 13910 }, { "epoch": 0.37390837028080076, "grad_norm": 2.379092216491699, "learning_rate": 3.173090017703936e-05, "loss": 4.2613, "step": 13915 }, { "epoch": 0.3740427247077791, "grad_norm": 2.5741708278656006, "learning_rate": 3.172409097099278e-05, "loss": 4.2513, "step": 13920 }, { "epoch": 0.3741770791347575, "grad_norm": 2.3459253311157227, "learning_rate": 3.171728176494621e-05, "loss": 4.2775, "step": 13925 }, { "epoch": 0.37431143356173585, "grad_norm": 2.249350070953369, "learning_rate": 3.171047255889964e-05, "loss": 4.2756, "step": 13930 }, { "epoch": 0.37444578798871425, "grad_norm": 2.5505788326263428, "learning_rate": 3.170366335285306e-05, "loss": 4.2877, "step": 13935 }, { "epoch": 0.3745801424156926, "grad_norm": 2.5322370529174805, "learning_rate": 3.169685414680648e-05, "loss": 4.3698, "step": 13940 }, { "epoch": 0.37471449684267094, "grad_norm": 2.253079891204834, "learning_rate": 3.169004494075991e-05, "loss": 4.2765, "step": 13945 }, { "epoch": 0.37484885126964934, "grad_norm": 2.479083776473999, "learning_rate": 3.1683235734713333e-05, "loss": 4.3739, "step": 13950 }, { "epoch": 0.3749832056966277, "grad_norm": 2.4715497493743896, "learning_rate": 3.1676426528666756e-05, "loss": 4.3076, "step": 13955 }, { "epoch": 0.3751175601236061, "grad_norm": 2.3707046508789062, "learning_rate": 3.1669617322620185e-05, "loss": 4.2982, "step": 13960 }, { "epoch": 0.37525191455058443, "grad_norm": 2.500237226486206, "learning_rate": 3.1662808116573614e-05, "loss": 4.2391, "step": 13965 }, { "epoch": 0.37538626897756283, "grad_norm": 2.3745200634002686, "learning_rate": 3.1655998910527036e-05, "loss": 4.3926, "step": 13970 }, { "epoch": 0.3755206234045412, "grad_norm": 2.675363302230835, "learning_rate": 3.164918970448046e-05, "loss": 4.336, "step": 13975 }, { "epoch": 0.3756549778315195, "grad_norm": 2.282299280166626, "learning_rate": 3.164238049843388e-05, "loss": 4.2732, "step": 13980 }, { "epoch": 0.3757893322584979, "grad_norm": 2.0888710021972656, "learning_rate": 3.163557129238731e-05, "loss": 4.283, "step": 13985 }, { "epoch": 0.37592368668547627, "grad_norm": 2.2703559398651123, "learning_rate": 3.162876208634073e-05, "loss": 4.3679, "step": 13990 }, { "epoch": 0.37605804111245467, "grad_norm": 2.280846118927002, "learning_rate": 3.162195288029416e-05, "loss": 4.3027, "step": 13995 }, { "epoch": 0.376192395539433, "grad_norm": 2.2215635776519775, "learning_rate": 3.161514367424758e-05, "loss": 4.1959, "step": 14000 }, { "epoch": 0.3763267499664114, "grad_norm": 2.3503646850585938, "learning_rate": 3.160833446820101e-05, "loss": 4.2136, "step": 14005 }, { "epoch": 0.37646110439338976, "grad_norm": 2.187411069869995, "learning_rate": 3.1601525262154435e-05, "loss": 4.2578, "step": 14010 }, { "epoch": 0.37659545882036816, "grad_norm": 2.5558669567108154, "learning_rate": 3.159471605610786e-05, "loss": 4.304, "step": 14015 }, { "epoch": 0.3767298132473465, "grad_norm": 2.2085471153259277, "learning_rate": 3.1587906850061286e-05, "loss": 4.3951, "step": 14020 }, { "epoch": 0.37686416767432485, "grad_norm": 2.408627510070801, "learning_rate": 3.1581097644014715e-05, "loss": 4.3445, "step": 14025 }, { "epoch": 0.37699852210130325, "grad_norm": 2.470081329345703, "learning_rate": 3.157428843796814e-05, "loss": 4.2864, "step": 14030 }, { "epoch": 0.3771328765282816, "grad_norm": 2.3218884468078613, "learning_rate": 3.156747923192156e-05, "loss": 4.274, "step": 14035 }, { "epoch": 0.37726723095526, "grad_norm": 2.378514051437378, "learning_rate": 3.156067002587498e-05, "loss": 4.3995, "step": 14040 }, { "epoch": 0.37740158538223834, "grad_norm": 2.340200424194336, "learning_rate": 3.1553860819828404e-05, "loss": 4.3735, "step": 14045 }, { "epoch": 0.37753593980921674, "grad_norm": 2.374241590499878, "learning_rate": 3.154705161378183e-05, "loss": 4.2402, "step": 14050 }, { "epoch": 0.3776702942361951, "grad_norm": 2.232448101043701, "learning_rate": 3.154024240773526e-05, "loss": 4.3424, "step": 14055 }, { "epoch": 0.3778046486631734, "grad_norm": 2.2524218559265137, "learning_rate": 3.1533433201688684e-05, "loss": 4.2701, "step": 14060 }, { "epoch": 0.3779390030901518, "grad_norm": 2.14957594871521, "learning_rate": 3.152662399564211e-05, "loss": 4.1266, "step": 14065 }, { "epoch": 0.37807335751713017, "grad_norm": 2.4026060104370117, "learning_rate": 3.1519814789595536e-05, "loss": 4.407, "step": 14070 }, { "epoch": 0.37820771194410857, "grad_norm": 2.387247323989868, "learning_rate": 3.151300558354896e-05, "loss": 4.2958, "step": 14075 }, { "epoch": 0.3783420663710869, "grad_norm": 2.4510486125946045, "learning_rate": 3.150619637750239e-05, "loss": 4.3604, "step": 14080 }, { "epoch": 0.3784764207980653, "grad_norm": 2.552234649658203, "learning_rate": 3.149938717145581e-05, "loss": 4.3157, "step": 14085 }, { "epoch": 0.37861077522504366, "grad_norm": 2.2374775409698486, "learning_rate": 3.149257796540924e-05, "loss": 4.2936, "step": 14090 }, { "epoch": 0.37874512965202206, "grad_norm": 2.369304895401001, "learning_rate": 3.148576875936266e-05, "loss": 4.3154, "step": 14095 }, { "epoch": 0.3788794840790004, "grad_norm": 2.432016134262085, "learning_rate": 3.147895955331608e-05, "loss": 4.1877, "step": 14100 }, { "epoch": 0.37901383850597875, "grad_norm": 2.4371907711029053, "learning_rate": 3.1472150347269505e-05, "loss": 4.1549, "step": 14105 }, { "epoch": 0.37914819293295715, "grad_norm": 2.350525379180908, "learning_rate": 3.1465341141222934e-05, "loss": 4.4245, "step": 14110 }, { "epoch": 0.3792825473599355, "grad_norm": 2.5332536697387695, "learning_rate": 3.145853193517636e-05, "loss": 4.2288, "step": 14115 }, { "epoch": 0.3794169017869139, "grad_norm": 2.175351619720459, "learning_rate": 3.1451722729129785e-05, "loss": 4.3708, "step": 14120 }, { "epoch": 0.37955125621389224, "grad_norm": 2.519005537033081, "learning_rate": 3.144491352308321e-05, "loss": 4.2499, "step": 14125 }, { "epoch": 0.37968561064087064, "grad_norm": 2.4850316047668457, "learning_rate": 3.143810431703664e-05, "loss": 4.3916, "step": 14130 }, { "epoch": 0.379819965067849, "grad_norm": 2.2136423587799072, "learning_rate": 3.143129511099006e-05, "loss": 4.1762, "step": 14135 }, { "epoch": 0.37995431949482733, "grad_norm": 2.305082321166992, "learning_rate": 3.142448590494349e-05, "loss": 4.2126, "step": 14140 }, { "epoch": 0.38008867392180573, "grad_norm": 2.4834587574005127, "learning_rate": 3.141767669889691e-05, "loss": 4.2659, "step": 14145 }, { "epoch": 0.3802230283487841, "grad_norm": 2.492098569869995, "learning_rate": 3.141086749285034e-05, "loss": 4.3247, "step": 14150 }, { "epoch": 0.3803573827757625, "grad_norm": 2.2131361961364746, "learning_rate": 3.140405828680376e-05, "loss": 4.1219, "step": 14155 }, { "epoch": 0.3804917372027408, "grad_norm": 2.1240522861480713, "learning_rate": 3.1397249080757184e-05, "loss": 4.2959, "step": 14160 }, { "epoch": 0.3806260916297192, "grad_norm": 2.4772067070007324, "learning_rate": 3.1390439874710606e-05, "loss": 4.2243, "step": 14165 }, { "epoch": 0.38076044605669757, "grad_norm": 2.3326475620269775, "learning_rate": 3.1383630668664035e-05, "loss": 4.3017, "step": 14170 }, { "epoch": 0.3808948004836759, "grad_norm": 2.458195686340332, "learning_rate": 3.1376821462617464e-05, "loss": 4.2366, "step": 14175 }, { "epoch": 0.3810291549106543, "grad_norm": 2.4461262226104736, "learning_rate": 3.1370012256570887e-05, "loss": 4.3435, "step": 14180 }, { "epoch": 0.38116350933763266, "grad_norm": 2.315472364425659, "learning_rate": 3.136320305052431e-05, "loss": 4.349, "step": 14185 }, { "epoch": 0.38129786376461106, "grad_norm": 2.5450448989868164, "learning_rate": 3.135639384447773e-05, "loss": 4.3052, "step": 14190 }, { "epoch": 0.3814322181915894, "grad_norm": 2.1828768253326416, "learning_rate": 3.134958463843116e-05, "loss": 4.2564, "step": 14195 }, { "epoch": 0.3815665726185678, "grad_norm": 2.2555363178253174, "learning_rate": 3.134277543238458e-05, "loss": 4.2913, "step": 14200 }, { "epoch": 0.38170092704554615, "grad_norm": 2.1589887142181396, "learning_rate": 3.133596622633801e-05, "loss": 4.2235, "step": 14205 }, { "epoch": 0.38183528147252455, "grad_norm": 2.4267561435699463, "learning_rate": 3.1329157020291434e-05, "loss": 4.3017, "step": 14210 }, { "epoch": 0.3819696358995029, "grad_norm": 2.5112481117248535, "learning_rate": 3.132234781424486e-05, "loss": 4.0831, "step": 14215 }, { "epoch": 0.38210399032648124, "grad_norm": 2.3390679359436035, "learning_rate": 3.1315538608198285e-05, "loss": 4.2947, "step": 14220 }, { "epoch": 0.38223834475345964, "grad_norm": 2.448854923248291, "learning_rate": 3.130872940215171e-05, "loss": 4.122, "step": 14225 }, { "epoch": 0.382372699180438, "grad_norm": 2.604612112045288, "learning_rate": 3.1301920196105136e-05, "loss": 4.258, "step": 14230 }, { "epoch": 0.3825070536074164, "grad_norm": 2.5440499782562256, "learning_rate": 3.1295110990058565e-05, "loss": 4.3373, "step": 14235 }, { "epoch": 0.3826414080343947, "grad_norm": 2.497462749481201, "learning_rate": 3.128830178401199e-05, "loss": 4.2936, "step": 14240 }, { "epoch": 0.3827757624613731, "grad_norm": 2.372606039047241, "learning_rate": 3.128149257796541e-05, "loss": 4.2315, "step": 14245 }, { "epoch": 0.38291011688835147, "grad_norm": 2.5296621322631836, "learning_rate": 3.127468337191883e-05, "loss": 4.2571, "step": 14250 }, { "epoch": 0.3830444713153298, "grad_norm": 2.2454700469970703, "learning_rate": 3.126787416587226e-05, "loss": 4.3954, "step": 14255 }, { "epoch": 0.3831788257423082, "grad_norm": 2.3120620250701904, "learning_rate": 3.1261064959825684e-05, "loss": 4.2695, "step": 14260 }, { "epoch": 0.38331318016928656, "grad_norm": 2.09098482131958, "learning_rate": 3.125425575377911e-05, "loss": 4.3242, "step": 14265 }, { "epoch": 0.38344753459626496, "grad_norm": 2.4609668254852295, "learning_rate": 3.1247446547732535e-05, "loss": 4.1983, "step": 14270 }, { "epoch": 0.3835818890232433, "grad_norm": 2.365065336227417, "learning_rate": 3.1240637341685964e-05, "loss": 4.2711, "step": 14275 }, { "epoch": 0.3837162434502217, "grad_norm": 2.483854055404663, "learning_rate": 3.1233828135639386e-05, "loss": 4.3801, "step": 14280 }, { "epoch": 0.38385059787720005, "grad_norm": 2.4485063552856445, "learning_rate": 3.122701892959281e-05, "loss": 4.3037, "step": 14285 }, { "epoch": 0.3839849523041784, "grad_norm": 2.3685925006866455, "learning_rate": 3.122020972354624e-05, "loss": 4.2255, "step": 14290 }, { "epoch": 0.3841193067311568, "grad_norm": 2.5178205966949463, "learning_rate": 3.1213400517499667e-05, "loss": 4.3208, "step": 14295 }, { "epoch": 0.38425366115813514, "grad_norm": 2.51224422454834, "learning_rate": 3.120659131145309e-05, "loss": 4.3695, "step": 14300 }, { "epoch": 0.38438801558511354, "grad_norm": 2.542829990386963, "learning_rate": 3.119978210540651e-05, "loss": 4.2582, "step": 14305 }, { "epoch": 0.3845223700120919, "grad_norm": 2.5083277225494385, "learning_rate": 3.119297289935993e-05, "loss": 4.3223, "step": 14310 }, { "epoch": 0.3846567244390703, "grad_norm": 2.367457151412964, "learning_rate": 3.118616369331336e-05, "loss": 4.351, "step": 14315 }, { "epoch": 0.38479107886604863, "grad_norm": 2.4680230617523193, "learning_rate": 3.1179354487266785e-05, "loss": 4.2761, "step": 14320 }, { "epoch": 0.38492543329302703, "grad_norm": 2.4924192428588867, "learning_rate": 3.1172545281220214e-05, "loss": 4.3583, "step": 14325 }, { "epoch": 0.3850597877200054, "grad_norm": 2.4994428157806396, "learning_rate": 3.1165736075173636e-05, "loss": 4.3397, "step": 14330 }, { "epoch": 0.3851941421469837, "grad_norm": 2.4816064834594727, "learning_rate": 3.115892686912706e-05, "loss": 4.1394, "step": 14335 }, { "epoch": 0.3853284965739621, "grad_norm": 2.2779366970062256, "learning_rate": 3.115211766308049e-05, "loss": 4.4211, "step": 14340 }, { "epoch": 0.38546285100094047, "grad_norm": 2.1995530128479004, "learning_rate": 3.114530845703391e-05, "loss": 4.299, "step": 14345 }, { "epoch": 0.38559720542791887, "grad_norm": 2.7301137447357178, "learning_rate": 3.113849925098734e-05, "loss": 4.3092, "step": 14350 }, { "epoch": 0.3857315598548972, "grad_norm": 2.349949836730957, "learning_rate": 3.113169004494076e-05, "loss": 4.2077, "step": 14355 }, { "epoch": 0.3858659142818756, "grad_norm": 2.3989739418029785, "learning_rate": 3.112488083889419e-05, "loss": 4.0892, "step": 14360 }, { "epoch": 0.38600026870885396, "grad_norm": 2.335411310195923, "learning_rate": 3.111807163284761e-05, "loss": 4.3051, "step": 14365 }, { "epoch": 0.3861346231358323, "grad_norm": 2.5236847400665283, "learning_rate": 3.1111262426801034e-05, "loss": 4.1836, "step": 14370 }, { "epoch": 0.3862689775628107, "grad_norm": 2.4553840160369873, "learning_rate": 3.110445322075446e-05, "loss": 4.2342, "step": 14375 }, { "epoch": 0.38640333198978905, "grad_norm": 2.4479806423187256, "learning_rate": 3.1097644014707886e-05, "loss": 4.1587, "step": 14380 }, { "epoch": 0.38653768641676745, "grad_norm": 2.241891384124756, "learning_rate": 3.1090834808661315e-05, "loss": 4.2843, "step": 14385 }, { "epoch": 0.3866720408437458, "grad_norm": 2.3287885189056396, "learning_rate": 3.108402560261474e-05, "loss": 4.2179, "step": 14390 }, { "epoch": 0.3868063952707242, "grad_norm": 2.5230860710144043, "learning_rate": 3.107721639656816e-05, "loss": 4.335, "step": 14395 }, { "epoch": 0.38694074969770254, "grad_norm": 2.3073525428771973, "learning_rate": 3.107040719052159e-05, "loss": 4.2206, "step": 14400 }, { "epoch": 0.38707510412468094, "grad_norm": 2.0871682167053223, "learning_rate": 3.106359798447501e-05, "loss": 4.3839, "step": 14405 }, { "epoch": 0.3872094585516593, "grad_norm": 2.3388237953186035, "learning_rate": 3.105678877842843e-05, "loss": 4.29, "step": 14410 }, { "epoch": 0.3873438129786376, "grad_norm": 2.275890588760376, "learning_rate": 3.104997957238186e-05, "loss": 4.2855, "step": 14415 }, { "epoch": 0.387478167405616, "grad_norm": 2.410067319869995, "learning_rate": 3.104317036633529e-05, "loss": 4.2585, "step": 14420 }, { "epoch": 0.38761252183259437, "grad_norm": 2.5437259674072266, "learning_rate": 3.103636116028871e-05, "loss": 4.2653, "step": 14425 }, { "epoch": 0.38774687625957277, "grad_norm": 2.3848185539245605, "learning_rate": 3.1029551954242136e-05, "loss": 4.3734, "step": 14430 }, { "epoch": 0.3878812306865511, "grad_norm": 2.6258974075317383, "learning_rate": 3.102274274819556e-05, "loss": 4.2421, "step": 14435 }, { "epoch": 0.3880155851135295, "grad_norm": 2.394111394882202, "learning_rate": 3.101593354214899e-05, "loss": 4.2514, "step": 14440 }, { "epoch": 0.38814993954050786, "grad_norm": 2.386808156967163, "learning_rate": 3.1009124336102416e-05, "loss": 4.1963, "step": 14445 }, { "epoch": 0.3882842939674862, "grad_norm": 2.3925538063049316, "learning_rate": 3.100231513005584e-05, "loss": 4.2571, "step": 14450 }, { "epoch": 0.3884186483944646, "grad_norm": 2.5595006942749023, "learning_rate": 3.099550592400926e-05, "loss": 4.3216, "step": 14455 }, { "epoch": 0.38855300282144295, "grad_norm": 2.333843469619751, "learning_rate": 3.098869671796269e-05, "loss": 4.3402, "step": 14460 }, { "epoch": 0.38868735724842135, "grad_norm": 2.466310977935791, "learning_rate": 3.098188751191611e-05, "loss": 4.2875, "step": 14465 }, { "epoch": 0.3888217116753997, "grad_norm": 2.2852773666381836, "learning_rate": 3.0975078305869534e-05, "loss": 4.3023, "step": 14470 }, { "epoch": 0.3889560661023781, "grad_norm": 2.5199766159057617, "learning_rate": 3.096826909982296e-05, "loss": 4.3194, "step": 14475 }, { "epoch": 0.38909042052935644, "grad_norm": 2.1740705966949463, "learning_rate": 3.0961459893776385e-05, "loss": 4.4294, "step": 14480 }, { "epoch": 0.3892247749563348, "grad_norm": 2.358670234680176, "learning_rate": 3.0954650687729814e-05, "loss": 4.3519, "step": 14485 }, { "epoch": 0.3893591293833132, "grad_norm": 2.6331212520599365, "learning_rate": 3.094784148168324e-05, "loss": 4.099, "step": 14490 }, { "epoch": 0.38949348381029153, "grad_norm": 2.4660329818725586, "learning_rate": 3.094103227563666e-05, "loss": 4.3083, "step": 14495 }, { "epoch": 0.38962783823726993, "grad_norm": 2.4906811714172363, "learning_rate": 3.093422306959009e-05, "loss": 4.3835, "step": 14500 }, { "epoch": 0.3897621926642483, "grad_norm": 2.560702323913574, "learning_rate": 3.092741386354352e-05, "loss": 4.3658, "step": 14505 }, { "epoch": 0.3898965470912267, "grad_norm": 2.466280698776245, "learning_rate": 3.092060465749694e-05, "loss": 4.3304, "step": 14510 }, { "epoch": 0.390030901518205, "grad_norm": 2.138554096221924, "learning_rate": 3.091379545145036e-05, "loss": 4.3575, "step": 14515 }, { "epoch": 0.3901652559451834, "grad_norm": 2.5083069801330566, "learning_rate": 3.0906986245403784e-05, "loss": 4.3453, "step": 14520 }, { "epoch": 0.39029961037216176, "grad_norm": 2.420567750930786, "learning_rate": 3.090017703935721e-05, "loss": 4.2272, "step": 14525 }, { "epoch": 0.3904339647991401, "grad_norm": 2.2206883430480957, "learning_rate": 3.0893367833310635e-05, "loss": 4.1968, "step": 14530 }, { "epoch": 0.3905683192261185, "grad_norm": 2.1825122833251953, "learning_rate": 3.0886558627264064e-05, "loss": 4.2142, "step": 14535 }, { "epoch": 0.39070267365309685, "grad_norm": 2.488612413406372, "learning_rate": 3.0879749421217486e-05, "loss": 4.1741, "step": 14540 }, { "epoch": 0.39083702808007525, "grad_norm": 2.635913372039795, "learning_rate": 3.0872940215170916e-05, "loss": 4.2384, "step": 14545 }, { "epoch": 0.3909713825070536, "grad_norm": 2.381077766418457, "learning_rate": 3.086613100912434e-05, "loss": 4.3086, "step": 14550 }, { "epoch": 0.391105736934032, "grad_norm": 2.255950689315796, "learning_rate": 3.085932180307776e-05, "loss": 4.2243, "step": 14555 }, { "epoch": 0.39124009136101034, "grad_norm": 2.60119891166687, "learning_rate": 3.085251259703118e-05, "loss": 4.3109, "step": 14560 }, { "epoch": 0.3913744457879887, "grad_norm": 2.424812078475952, "learning_rate": 3.084570339098462e-05, "loss": 4.2584, "step": 14565 }, { "epoch": 0.3915088002149671, "grad_norm": 2.320416212081909, "learning_rate": 3.083889418493804e-05, "loss": 4.1829, "step": 14570 }, { "epoch": 0.39164315464194543, "grad_norm": 2.7615835666656494, "learning_rate": 3.083208497889146e-05, "loss": 4.3671, "step": 14575 }, { "epoch": 0.39177750906892383, "grad_norm": 2.309784173965454, "learning_rate": 3.0825275772844885e-05, "loss": 4.3134, "step": 14580 }, { "epoch": 0.3919118634959022, "grad_norm": 2.4076573848724365, "learning_rate": 3.0818466566798314e-05, "loss": 4.2382, "step": 14585 }, { "epoch": 0.3920462179228806, "grad_norm": 2.3375957012176514, "learning_rate": 3.0811657360751736e-05, "loss": 4.3127, "step": 14590 }, { "epoch": 0.3921805723498589, "grad_norm": 2.443911552429199, "learning_rate": 3.0804848154705165e-05, "loss": 4.2665, "step": 14595 }, { "epoch": 0.39231492677683727, "grad_norm": 2.487468957901001, "learning_rate": 3.079803894865859e-05, "loss": 4.2395, "step": 14600 }, { "epoch": 0.39244928120381567, "grad_norm": 2.3549230098724365, "learning_rate": 3.079122974261202e-05, "loss": 4.3414, "step": 14605 }, { "epoch": 0.392583635630794, "grad_norm": 2.296757221221924, "learning_rate": 3.078442053656544e-05, "loss": 4.3853, "step": 14610 }, { "epoch": 0.3927179900577724, "grad_norm": 2.708432674407959, "learning_rate": 3.077761133051886e-05, "loss": 4.3558, "step": 14615 }, { "epoch": 0.39285234448475076, "grad_norm": 2.183584213256836, "learning_rate": 3.0770802124472283e-05, "loss": 4.3252, "step": 14620 }, { "epoch": 0.39298669891172916, "grad_norm": 2.314103364944458, "learning_rate": 3.076399291842571e-05, "loss": 4.2631, "step": 14625 }, { "epoch": 0.3931210533387075, "grad_norm": 2.1969337463378906, "learning_rate": 3.075718371237914e-05, "loss": 4.1227, "step": 14630 }, { "epoch": 0.3932554077656859, "grad_norm": 2.2576987743377686, "learning_rate": 3.0750374506332564e-05, "loss": 4.3324, "step": 14635 }, { "epoch": 0.39338976219266425, "grad_norm": 2.5025899410247803, "learning_rate": 3.0743565300285986e-05, "loss": 4.3568, "step": 14640 }, { "epoch": 0.3935241166196426, "grad_norm": 2.507436752319336, "learning_rate": 3.073675609423941e-05, "loss": 4.3309, "step": 14645 }, { "epoch": 0.393658471046621, "grad_norm": 2.682708978652954, "learning_rate": 3.072994688819284e-05, "loss": 4.3191, "step": 14650 }, { "epoch": 0.39379282547359934, "grad_norm": 2.4889323711395264, "learning_rate": 3.0723137682146266e-05, "loss": 4.2975, "step": 14655 }, { "epoch": 0.39392717990057774, "grad_norm": 2.4475367069244385, "learning_rate": 3.071632847609969e-05, "loss": 4.3637, "step": 14660 }, { "epoch": 0.3940615343275561, "grad_norm": 2.423344135284424, "learning_rate": 3.070951927005311e-05, "loss": 4.3865, "step": 14665 }, { "epoch": 0.3941958887545345, "grad_norm": 2.3686540126800537, "learning_rate": 3.070271006400654e-05, "loss": 4.2723, "step": 14670 }, { "epoch": 0.39433024318151283, "grad_norm": 2.49894118309021, "learning_rate": 3.069590085795996e-05, "loss": 4.2414, "step": 14675 }, { "epoch": 0.3944645976084912, "grad_norm": 2.3771181106567383, "learning_rate": 3.0689091651913385e-05, "loss": 4.3081, "step": 14680 }, { "epoch": 0.3945989520354696, "grad_norm": 2.3944599628448486, "learning_rate": 3.0682282445866814e-05, "loss": 4.3964, "step": 14685 }, { "epoch": 0.3947333064624479, "grad_norm": 2.4124321937561035, "learning_rate": 3.067547323982024e-05, "loss": 4.1933, "step": 14690 }, { "epoch": 0.3948676608894263, "grad_norm": 2.43300724029541, "learning_rate": 3.0668664033773665e-05, "loss": 4.3368, "step": 14695 }, { "epoch": 0.39500201531640466, "grad_norm": 2.250229597091675, "learning_rate": 3.066185482772709e-05, "loss": 4.376, "step": 14700 }, { "epoch": 0.39513636974338306, "grad_norm": 2.409778594970703, "learning_rate": 3.065504562168051e-05, "loss": 4.3241, "step": 14705 }, { "epoch": 0.3952707241703614, "grad_norm": 2.294586420059204, "learning_rate": 3.064823641563394e-05, "loss": 4.2036, "step": 14710 }, { "epoch": 0.3954050785973398, "grad_norm": 2.2948389053344727, "learning_rate": 3.064142720958737e-05, "loss": 4.2007, "step": 14715 }, { "epoch": 0.39553943302431815, "grad_norm": 2.7229578495025635, "learning_rate": 3.063461800354079e-05, "loss": 4.3387, "step": 14720 }, { "epoch": 0.3956737874512965, "grad_norm": 2.3358476161956787, "learning_rate": 3.062780879749421e-05, "loss": 4.324, "step": 14725 }, { "epoch": 0.3958081418782749, "grad_norm": 2.2669789791107178, "learning_rate": 3.062099959144764e-05, "loss": 4.2444, "step": 14730 }, { "epoch": 0.39594249630525324, "grad_norm": 2.391108512878418, "learning_rate": 3.061419038540106e-05, "loss": 4.2416, "step": 14735 }, { "epoch": 0.39607685073223164, "grad_norm": 2.172057628631592, "learning_rate": 3.0607381179354486e-05, "loss": 4.2266, "step": 14740 }, { "epoch": 0.39621120515921, "grad_norm": 2.415519952774048, "learning_rate": 3.0600571973307915e-05, "loss": 4.2517, "step": 14745 }, { "epoch": 0.3963455595861884, "grad_norm": 2.3594329357147217, "learning_rate": 3.0593762767261344e-05, "loss": 4.2522, "step": 14750 }, { "epoch": 0.39647991401316673, "grad_norm": 2.3897271156311035, "learning_rate": 3.0586953561214766e-05, "loss": 4.3044, "step": 14755 }, { "epoch": 0.3966142684401451, "grad_norm": 2.520068407058716, "learning_rate": 3.058014435516819e-05, "loss": 4.3266, "step": 14760 }, { "epoch": 0.3967486228671235, "grad_norm": 2.099815845489502, "learning_rate": 3.057333514912161e-05, "loss": 4.3265, "step": 14765 }, { "epoch": 0.3968829772941018, "grad_norm": 2.750869035720825, "learning_rate": 3.056652594307503e-05, "loss": 4.322, "step": 14770 }, { "epoch": 0.3970173317210802, "grad_norm": 2.6960182189941406, "learning_rate": 3.055971673702847e-05, "loss": 4.2079, "step": 14775 }, { "epoch": 0.39715168614805857, "grad_norm": 2.3361809253692627, "learning_rate": 3.055290753098189e-05, "loss": 4.2933, "step": 14780 }, { "epoch": 0.39728604057503697, "grad_norm": 2.231854200363159, "learning_rate": 3.054609832493531e-05, "loss": 4.2878, "step": 14785 }, { "epoch": 0.3974203950020153, "grad_norm": 2.33640456199646, "learning_rate": 3.0539289118888735e-05, "loss": 4.2761, "step": 14790 }, { "epoch": 0.39755474942899366, "grad_norm": 2.4944357872009277, "learning_rate": 3.0532479912842164e-05, "loss": 4.2499, "step": 14795 }, { "epoch": 0.39768910385597206, "grad_norm": 2.3984992504119873, "learning_rate": 3.052567070679559e-05, "loss": 4.1777, "step": 14800 }, { "epoch": 0.3978234582829504, "grad_norm": 2.363731622695923, "learning_rate": 3.0518861500749016e-05, "loss": 4.3235, "step": 14805 }, { "epoch": 0.3979578127099288, "grad_norm": 2.428506374359131, "learning_rate": 3.0512052294702438e-05, "loss": 4.2432, "step": 14810 }, { "epoch": 0.39809216713690715, "grad_norm": 2.2954978942871094, "learning_rate": 3.0505243088655867e-05, "loss": 4.3462, "step": 14815 }, { "epoch": 0.39822652156388555, "grad_norm": 2.387347936630249, "learning_rate": 3.049843388260929e-05, "loss": 4.2013, "step": 14820 }, { "epoch": 0.3983608759908639, "grad_norm": 2.5069823265075684, "learning_rate": 3.0491624676562715e-05, "loss": 4.2306, "step": 14825 }, { "epoch": 0.3984952304178423, "grad_norm": 2.564117670059204, "learning_rate": 3.0484815470516137e-05, "loss": 4.2126, "step": 14830 }, { "epoch": 0.39862958484482064, "grad_norm": 2.3405795097351074, "learning_rate": 3.0478006264469566e-05, "loss": 4.3728, "step": 14835 }, { "epoch": 0.398763939271799, "grad_norm": 2.156245470046997, "learning_rate": 3.047119705842299e-05, "loss": 4.2642, "step": 14840 }, { "epoch": 0.3988982936987774, "grad_norm": 2.3241055011749268, "learning_rate": 3.0464387852376414e-05, "loss": 4.2607, "step": 14845 }, { "epoch": 0.39903264812575573, "grad_norm": 2.4331064224243164, "learning_rate": 3.0457578646329837e-05, "loss": 4.2721, "step": 14850 }, { "epoch": 0.39916700255273413, "grad_norm": 2.3952794075012207, "learning_rate": 3.0450769440283266e-05, "loss": 4.266, "step": 14855 }, { "epoch": 0.3993013569797125, "grad_norm": 2.445282220840454, "learning_rate": 3.044396023423669e-05, "loss": 4.2446, "step": 14860 }, { "epoch": 0.3994357114066909, "grad_norm": 2.4237923622131348, "learning_rate": 3.0437151028190114e-05, "loss": 4.1766, "step": 14865 }, { "epoch": 0.3995700658336692, "grad_norm": 2.685258388519287, "learning_rate": 3.043034182214354e-05, "loss": 4.3549, "step": 14870 }, { "epoch": 0.39970442026064756, "grad_norm": 2.370173215866089, "learning_rate": 3.0423532616096968e-05, "loss": 4.3332, "step": 14875 }, { "epoch": 0.39983877468762596, "grad_norm": 2.275838613510132, "learning_rate": 3.041672341005039e-05, "loss": 4.3136, "step": 14880 }, { "epoch": 0.3999731291146043, "grad_norm": 2.8002326488494873, "learning_rate": 3.0409914204003813e-05, "loss": 4.3134, "step": 14885 }, { "epoch": 0.4001074835415827, "grad_norm": 2.200334072113037, "learning_rate": 3.040310499795724e-05, "loss": 4.4386, "step": 14890 }, { "epoch": 0.40024183796856105, "grad_norm": 2.3915858268737793, "learning_rate": 3.0396295791910667e-05, "loss": 4.289, "step": 14895 }, { "epoch": 0.40037619239553945, "grad_norm": 2.397400379180908, "learning_rate": 3.038948658586409e-05, "loss": 4.2631, "step": 14900 }, { "epoch": 0.4005105468225178, "grad_norm": 2.279953718185425, "learning_rate": 3.0382677379817515e-05, "loss": 4.2948, "step": 14905 }, { "epoch": 0.40064490124949614, "grad_norm": 2.3294737339019775, "learning_rate": 3.0375868173770938e-05, "loss": 4.2607, "step": 14910 }, { "epoch": 0.40077925567647454, "grad_norm": 2.601576328277588, "learning_rate": 3.0369058967724367e-05, "loss": 4.2404, "step": 14915 }, { "epoch": 0.4009136101034529, "grad_norm": 2.3031506538391113, "learning_rate": 3.0362249761677792e-05, "loss": 4.2878, "step": 14920 }, { "epoch": 0.4010479645304313, "grad_norm": 2.8369812965393066, "learning_rate": 3.0355440555631215e-05, "loss": 4.3155, "step": 14925 }, { "epoch": 0.40118231895740963, "grad_norm": 2.2411205768585205, "learning_rate": 3.0348631349584637e-05, "loss": 4.3156, "step": 14930 }, { "epoch": 0.40131667338438803, "grad_norm": 2.224097728729248, "learning_rate": 3.0341822143538063e-05, "loss": 4.1881, "step": 14935 }, { "epoch": 0.4014510278113664, "grad_norm": 2.1425905227661133, "learning_rate": 3.033501293749149e-05, "loss": 4.216, "step": 14940 }, { "epoch": 0.4015853822383448, "grad_norm": 2.3168671131134033, "learning_rate": 3.0328203731444914e-05, "loss": 4.2863, "step": 14945 }, { "epoch": 0.4017197366653231, "grad_norm": 2.4455814361572266, "learning_rate": 3.032139452539834e-05, "loss": 4.1961, "step": 14950 }, { "epoch": 0.40185409109230147, "grad_norm": 2.408461570739746, "learning_rate": 3.0314585319351762e-05, "loss": 4.2671, "step": 14955 }, { "epoch": 0.40198844551927987, "grad_norm": 2.2294387817382812, "learning_rate": 3.030777611330519e-05, "loss": 4.3296, "step": 14960 }, { "epoch": 0.4021227999462582, "grad_norm": 2.4524431228637695, "learning_rate": 3.0300966907258617e-05, "loss": 4.3318, "step": 14965 }, { "epoch": 0.4022571543732366, "grad_norm": 2.2154600620269775, "learning_rate": 3.029415770121204e-05, "loss": 4.3565, "step": 14970 }, { "epoch": 0.40239150880021496, "grad_norm": 2.629197359085083, "learning_rate": 3.0287348495165464e-05, "loss": 4.2913, "step": 14975 }, { "epoch": 0.40252586322719336, "grad_norm": 2.20185923576355, "learning_rate": 3.0280539289118893e-05, "loss": 4.3749, "step": 14980 }, { "epoch": 0.4026602176541717, "grad_norm": 2.567887306213379, "learning_rate": 3.0273730083072316e-05, "loss": 4.2403, "step": 14985 }, { "epoch": 0.40279457208115005, "grad_norm": 2.561450719833374, "learning_rate": 3.0266920877025738e-05, "loss": 4.2801, "step": 14990 }, { "epoch": 0.40292892650812845, "grad_norm": 2.7562851905822754, "learning_rate": 3.0260111670979164e-05, "loss": 4.1768, "step": 14995 }, { "epoch": 0.4030632809351068, "grad_norm": 2.4966940879821777, "learning_rate": 3.0253302464932593e-05, "loss": 4.364, "step": 15000 }, { "epoch": 0.4031976353620852, "grad_norm": 2.5060904026031494, "learning_rate": 3.0246493258886015e-05, "loss": 4.308, "step": 15005 }, { "epoch": 0.40333198978906354, "grad_norm": 2.2610812187194824, "learning_rate": 3.023968405283944e-05, "loss": 4.2393, "step": 15010 }, { "epoch": 0.40346634421604194, "grad_norm": 2.411501884460449, "learning_rate": 3.0232874846792863e-05, "loss": 4.254, "step": 15015 }, { "epoch": 0.4036006986430203, "grad_norm": 2.28826642036438, "learning_rate": 3.0226065640746292e-05, "loss": 4.3133, "step": 15020 }, { "epoch": 0.4037350530699987, "grad_norm": 2.2608704566955566, "learning_rate": 3.0219256434699718e-05, "loss": 4.1253, "step": 15025 }, { "epoch": 0.40386940749697703, "grad_norm": 2.1428303718566895, "learning_rate": 3.021244722865314e-05, "loss": 4.162, "step": 15030 }, { "epoch": 0.4040037619239554, "grad_norm": 2.4092249870300293, "learning_rate": 3.0205638022606562e-05, "loss": 4.1827, "step": 15035 }, { "epoch": 0.4041381163509338, "grad_norm": 2.355943202972412, "learning_rate": 3.0198828816559995e-05, "loss": 4.2591, "step": 15040 }, { "epoch": 0.4042724707779121, "grad_norm": 2.637709379196167, "learning_rate": 3.0192019610513417e-05, "loss": 4.305, "step": 15045 }, { "epoch": 0.4044068252048905, "grad_norm": 2.2113492488861084, "learning_rate": 3.018521040446684e-05, "loss": 4.3268, "step": 15050 }, { "epoch": 0.40454117963186886, "grad_norm": 2.410249948501587, "learning_rate": 3.0178401198420265e-05, "loss": 4.3034, "step": 15055 }, { "epoch": 0.40467553405884726, "grad_norm": 2.1468658447265625, "learning_rate": 3.0171591992373694e-05, "loss": 4.2177, "step": 15060 }, { "epoch": 0.4048098884858256, "grad_norm": 2.650271415710449, "learning_rate": 3.0164782786327116e-05, "loss": 4.224, "step": 15065 }, { "epoch": 0.40494424291280395, "grad_norm": 2.519599676132202, "learning_rate": 3.0157973580280542e-05, "loss": 4.2121, "step": 15070 }, { "epoch": 0.40507859733978235, "grad_norm": 2.3711276054382324, "learning_rate": 3.0151164374233964e-05, "loss": 4.315, "step": 15075 }, { "epoch": 0.4052129517667607, "grad_norm": 2.265425205230713, "learning_rate": 3.014435516818739e-05, "loss": 4.1935, "step": 15080 }, { "epoch": 0.4053473061937391, "grad_norm": 2.3750174045562744, "learning_rate": 3.013754596214082e-05, "loss": 4.2176, "step": 15085 }, { "epoch": 0.40548166062071744, "grad_norm": 2.3182108402252197, "learning_rate": 3.013073675609424e-05, "loss": 4.265, "step": 15090 }, { "epoch": 0.40561601504769584, "grad_norm": 2.5446548461914062, "learning_rate": 3.0123927550047663e-05, "loss": 4.1888, "step": 15095 }, { "epoch": 0.4057503694746742, "grad_norm": 2.3328607082366943, "learning_rate": 3.011711834400109e-05, "loss": 4.2458, "step": 15100 }, { "epoch": 0.40588472390165253, "grad_norm": 2.4188597202301025, "learning_rate": 3.0110309137954518e-05, "loss": 4.3838, "step": 15105 }, { "epoch": 0.40601907832863093, "grad_norm": 2.267055034637451, "learning_rate": 3.010349993190794e-05, "loss": 4.3703, "step": 15110 }, { "epoch": 0.4061534327556093, "grad_norm": 2.4511544704437256, "learning_rate": 3.0096690725861366e-05, "loss": 4.2743, "step": 15115 }, { "epoch": 0.4062877871825877, "grad_norm": 2.320572853088379, "learning_rate": 3.0089881519814788e-05, "loss": 4.0622, "step": 15120 }, { "epoch": 0.406422141609566, "grad_norm": 2.3764612674713135, "learning_rate": 3.0083072313768217e-05, "loss": 4.3091, "step": 15125 }, { "epoch": 0.4065564960365444, "grad_norm": 2.329913377761841, "learning_rate": 3.0076263107721643e-05, "loss": 4.2826, "step": 15130 }, { "epoch": 0.40669085046352277, "grad_norm": 2.2480175495147705, "learning_rate": 3.0069453901675065e-05, "loss": 4.3427, "step": 15135 }, { "epoch": 0.40682520489050117, "grad_norm": 2.583582878112793, "learning_rate": 3.0062644695628487e-05, "loss": 4.1923, "step": 15140 }, { "epoch": 0.4069595593174795, "grad_norm": 2.2606606483459473, "learning_rate": 3.005583548958192e-05, "loss": 4.2712, "step": 15145 }, { "epoch": 0.40709391374445786, "grad_norm": 2.2507436275482178, "learning_rate": 3.0049026283535342e-05, "loss": 4.1473, "step": 15150 }, { "epoch": 0.40722826817143626, "grad_norm": 2.137530565261841, "learning_rate": 3.0042217077488764e-05, "loss": 4.2804, "step": 15155 }, { "epoch": 0.4073626225984146, "grad_norm": 2.306863307952881, "learning_rate": 3.003540787144219e-05, "loss": 4.2638, "step": 15160 }, { "epoch": 0.407496977025393, "grad_norm": 2.3660683631896973, "learning_rate": 3.002859866539562e-05, "loss": 4.405, "step": 15165 }, { "epoch": 0.40763133145237135, "grad_norm": 2.4482462406158447, "learning_rate": 3.002178945934904e-05, "loss": 4.288, "step": 15170 }, { "epoch": 0.40776568587934975, "grad_norm": 2.33117938041687, "learning_rate": 3.0014980253302467e-05, "loss": 4.3375, "step": 15175 }, { "epoch": 0.4079000403063281, "grad_norm": 2.2881529331207275, "learning_rate": 3.000817104725589e-05, "loss": 4.2885, "step": 15180 }, { "epoch": 0.40803439473330644, "grad_norm": 2.23380446434021, "learning_rate": 3.000136184120932e-05, "loss": 4.0958, "step": 15185 }, { "epoch": 0.40816874916028484, "grad_norm": 2.4789693355560303, "learning_rate": 2.9994552635162744e-05, "loss": 4.3061, "step": 15190 }, { "epoch": 0.4083031035872632, "grad_norm": 2.3406922817230225, "learning_rate": 2.9987743429116166e-05, "loss": 4.3921, "step": 15195 }, { "epoch": 0.4084374580142416, "grad_norm": 2.4371888637542725, "learning_rate": 2.998093422306959e-05, "loss": 4.2097, "step": 15200 }, { "epoch": 0.40857181244121993, "grad_norm": 2.0882675647735596, "learning_rate": 2.9974125017023018e-05, "loss": 4.1242, "step": 15205 }, { "epoch": 0.40870616686819833, "grad_norm": 2.570497989654541, "learning_rate": 2.9967315810976443e-05, "loss": 4.3416, "step": 15210 }, { "epoch": 0.4088405212951767, "grad_norm": 2.3471245765686035, "learning_rate": 2.9960506604929866e-05, "loss": 4.3881, "step": 15215 }, { "epoch": 0.408974875722155, "grad_norm": 2.3258862495422363, "learning_rate": 2.995369739888329e-05, "loss": 4.3062, "step": 15220 }, { "epoch": 0.4091092301491334, "grad_norm": 2.3135623931884766, "learning_rate": 2.9946888192836713e-05, "loss": 4.2543, "step": 15225 }, { "epoch": 0.40924358457611176, "grad_norm": 2.445925712585449, "learning_rate": 2.9940078986790142e-05, "loss": 4.2006, "step": 15230 }, { "epoch": 0.40937793900309016, "grad_norm": 2.381124973297119, "learning_rate": 2.9933269780743568e-05, "loss": 4.2782, "step": 15235 }, { "epoch": 0.4095122934300685, "grad_norm": 2.4999194145202637, "learning_rate": 2.992646057469699e-05, "loss": 4.2521, "step": 15240 }, { "epoch": 0.4096466478570469, "grad_norm": 2.268766164779663, "learning_rate": 2.9919651368650413e-05, "loss": 4.1848, "step": 15245 }, { "epoch": 0.40978100228402525, "grad_norm": 2.3853793144226074, "learning_rate": 2.9912842162603845e-05, "loss": 4.2844, "step": 15250 }, { "epoch": 0.40991535671100365, "grad_norm": 2.2645647525787354, "learning_rate": 2.9906032956557267e-05, "loss": 4.2929, "step": 15255 }, { "epoch": 0.410049711137982, "grad_norm": 2.361607074737549, "learning_rate": 2.989922375051069e-05, "loss": 4.3022, "step": 15260 }, { "epoch": 0.41018406556496034, "grad_norm": 2.481165885925293, "learning_rate": 2.9892414544464115e-05, "loss": 4.2606, "step": 15265 }, { "epoch": 0.41031841999193874, "grad_norm": 2.334106206893921, "learning_rate": 2.9885605338417544e-05, "loss": 4.3242, "step": 15270 }, { "epoch": 0.4104527744189171, "grad_norm": 2.417473554611206, "learning_rate": 2.9878796132370967e-05, "loss": 4.1249, "step": 15275 }, { "epoch": 0.4105871288458955, "grad_norm": 2.33376145362854, "learning_rate": 2.9871986926324392e-05, "loss": 4.3304, "step": 15280 }, { "epoch": 0.41072148327287383, "grad_norm": 2.1903340816497803, "learning_rate": 2.9865177720277815e-05, "loss": 4.2321, "step": 15285 }, { "epoch": 0.41085583769985223, "grad_norm": 2.3100905418395996, "learning_rate": 2.9858368514231244e-05, "loss": 4.3124, "step": 15290 }, { "epoch": 0.4109901921268306, "grad_norm": 2.26353120803833, "learning_rate": 2.985155930818467e-05, "loss": 4.2926, "step": 15295 }, { "epoch": 0.4111245465538089, "grad_norm": 2.370575428009033, "learning_rate": 2.984475010213809e-05, "loss": 4.2766, "step": 15300 }, { "epoch": 0.4112589009807873, "grad_norm": 2.46513032913208, "learning_rate": 2.9837940896091514e-05, "loss": 4.29, "step": 15305 }, { "epoch": 0.41139325540776567, "grad_norm": 2.3344225883483887, "learning_rate": 2.9831131690044943e-05, "loss": 4.2037, "step": 15310 }, { "epoch": 0.41152760983474407, "grad_norm": 2.2919318675994873, "learning_rate": 2.982432248399837e-05, "loss": 4.3345, "step": 15315 }, { "epoch": 0.4116619642617224, "grad_norm": 2.172579526901245, "learning_rate": 2.981751327795179e-05, "loss": 4.1602, "step": 15320 }, { "epoch": 0.4117963186887008, "grad_norm": 2.434432029724121, "learning_rate": 2.9810704071905216e-05, "loss": 4.2995, "step": 15325 }, { "epoch": 0.41193067311567916, "grad_norm": 2.534857988357544, "learning_rate": 2.9803894865858645e-05, "loss": 4.313, "step": 15330 }, { "epoch": 0.41206502754265756, "grad_norm": 2.4175307750701904, "learning_rate": 2.9797085659812068e-05, "loss": 4.3759, "step": 15335 }, { "epoch": 0.4121993819696359, "grad_norm": 2.5274789333343506, "learning_rate": 2.9790276453765493e-05, "loss": 4.3469, "step": 15340 }, { "epoch": 0.41233373639661425, "grad_norm": 2.3748111724853516, "learning_rate": 2.9783467247718916e-05, "loss": 4.179, "step": 15345 }, { "epoch": 0.41246809082359265, "grad_norm": 2.3223702907562256, "learning_rate": 2.9776658041672345e-05, "loss": 4.1739, "step": 15350 }, { "epoch": 0.412602445250571, "grad_norm": 2.3797850608825684, "learning_rate": 2.976984883562577e-05, "loss": 4.178, "step": 15355 }, { "epoch": 0.4127367996775494, "grad_norm": 2.349032402038574, "learning_rate": 2.9763039629579193e-05, "loss": 4.3435, "step": 15360 }, { "epoch": 0.41287115410452774, "grad_norm": 2.6877405643463135, "learning_rate": 2.9756230423532615e-05, "loss": 4.2759, "step": 15365 }, { "epoch": 0.41300550853150614, "grad_norm": 2.4556851387023926, "learning_rate": 2.974942121748604e-05, "loss": 4.1202, "step": 15370 }, { "epoch": 0.4131398629584845, "grad_norm": 2.4657938480377197, "learning_rate": 2.974261201143947e-05, "loss": 4.3019, "step": 15375 }, { "epoch": 0.4132742173854628, "grad_norm": 2.379432439804077, "learning_rate": 2.9735802805392892e-05, "loss": 4.2152, "step": 15380 }, { "epoch": 0.41340857181244123, "grad_norm": 2.2791435718536377, "learning_rate": 2.9728993599346318e-05, "loss": 4.2521, "step": 15385 }, { "epoch": 0.4135429262394196, "grad_norm": 2.5904486179351807, "learning_rate": 2.972218439329974e-05, "loss": 4.0848, "step": 15390 }, { "epoch": 0.413677280666398, "grad_norm": 2.5664472579956055, "learning_rate": 2.971537518725317e-05, "loss": 4.2924, "step": 15395 }, { "epoch": 0.4138116350933763, "grad_norm": 2.36448335647583, "learning_rate": 2.9708565981206595e-05, "loss": 4.3416, "step": 15400 }, { "epoch": 0.4139459895203547, "grad_norm": 2.3223648071289062, "learning_rate": 2.9701756775160017e-05, "loss": 4.3747, "step": 15405 }, { "epoch": 0.41408034394733306, "grad_norm": 2.1586551666259766, "learning_rate": 2.969494756911344e-05, "loss": 4.2933, "step": 15410 }, { "epoch": 0.4142146983743114, "grad_norm": 2.236051559448242, "learning_rate": 2.9688138363066868e-05, "loss": 4.3789, "step": 15415 }, { "epoch": 0.4143490528012898, "grad_norm": 2.394160509109497, "learning_rate": 2.9681329157020294e-05, "loss": 4.2693, "step": 15420 }, { "epoch": 0.41448340722826815, "grad_norm": 2.3833975791931152, "learning_rate": 2.9674519950973716e-05, "loss": 4.2282, "step": 15425 }, { "epoch": 0.41461776165524655, "grad_norm": 2.242784023284912, "learning_rate": 2.966771074492714e-05, "loss": 4.2533, "step": 15430 }, { "epoch": 0.4147521160822249, "grad_norm": 2.4441890716552734, "learning_rate": 2.966090153888057e-05, "loss": 4.2694, "step": 15435 }, { "epoch": 0.4148864705092033, "grad_norm": 2.5111019611358643, "learning_rate": 2.9654092332833993e-05, "loss": 4.2356, "step": 15440 }, { "epoch": 0.41502082493618164, "grad_norm": 2.2310760021209717, "learning_rate": 2.964728312678742e-05, "loss": 4.2776, "step": 15445 }, { "epoch": 0.41515517936316004, "grad_norm": 2.354158878326416, "learning_rate": 2.964047392074084e-05, "loss": 4.2804, "step": 15450 }, { "epoch": 0.4152895337901384, "grad_norm": 2.1626672744750977, "learning_rate": 2.963366471469427e-05, "loss": 4.2974, "step": 15455 }, { "epoch": 0.41542388821711673, "grad_norm": 2.473813056945801, "learning_rate": 2.9626855508647696e-05, "loss": 4.2955, "step": 15460 }, { "epoch": 0.41555824264409513, "grad_norm": 2.185945510864258, "learning_rate": 2.9620046302601118e-05, "loss": 4.1219, "step": 15465 }, { "epoch": 0.4156925970710735, "grad_norm": 2.626720428466797, "learning_rate": 2.961323709655454e-05, "loss": 4.1857, "step": 15470 }, { "epoch": 0.4158269514980519, "grad_norm": 2.3311195373535156, "learning_rate": 2.960642789050797e-05, "loss": 4.2611, "step": 15475 }, { "epoch": 0.4159613059250302, "grad_norm": 2.170579195022583, "learning_rate": 2.9599618684461395e-05, "loss": 4.3102, "step": 15480 }, { "epoch": 0.4160956603520086, "grad_norm": 2.331987142562866, "learning_rate": 2.9592809478414817e-05, "loss": 4.1262, "step": 15485 }, { "epoch": 0.41623001477898697, "grad_norm": 2.2168030738830566, "learning_rate": 2.9586000272368243e-05, "loss": 4.1843, "step": 15490 }, { "epoch": 0.4163643692059653, "grad_norm": 2.3450028896331787, "learning_rate": 2.9579191066321672e-05, "loss": 4.2369, "step": 15495 }, { "epoch": 0.4164987236329437, "grad_norm": 2.378307580947876, "learning_rate": 2.9572381860275094e-05, "loss": 4.223, "step": 15500 }, { "epoch": 0.41663307805992206, "grad_norm": 2.3677380084991455, "learning_rate": 2.956557265422852e-05, "loss": 4.1944, "step": 15505 }, { "epoch": 0.41676743248690046, "grad_norm": 2.76792573928833, "learning_rate": 2.9558763448181942e-05, "loss": 4.3932, "step": 15510 }, { "epoch": 0.4169017869138788, "grad_norm": 2.4063220024108887, "learning_rate": 2.9551954242135364e-05, "loss": 4.2682, "step": 15515 }, { "epoch": 0.4170361413408572, "grad_norm": 2.0933823585510254, "learning_rate": 2.9545145036088793e-05, "loss": 4.2798, "step": 15520 }, { "epoch": 0.41717049576783555, "grad_norm": 2.150895357131958, "learning_rate": 2.953833583004222e-05, "loss": 4.3057, "step": 15525 }, { "epoch": 0.4173048501948139, "grad_norm": 2.9398670196533203, "learning_rate": 2.953152662399564e-05, "loss": 4.2759, "step": 15530 }, { "epoch": 0.4174392046217923, "grad_norm": 2.4601643085479736, "learning_rate": 2.9524717417949067e-05, "loss": 4.0938, "step": 15535 }, { "epoch": 0.41757355904877064, "grad_norm": 2.6426565647125244, "learning_rate": 2.9517908211902496e-05, "loss": 4.2446, "step": 15540 }, { "epoch": 0.41770791347574904, "grad_norm": 2.5562407970428467, "learning_rate": 2.9511099005855918e-05, "loss": 4.2428, "step": 15545 }, { "epoch": 0.4178422679027274, "grad_norm": 2.6013388633728027, "learning_rate": 2.9504289799809344e-05, "loss": 4.2926, "step": 15550 }, { "epoch": 0.4179766223297058, "grad_norm": 2.298737049102783, "learning_rate": 2.9497480593762766e-05, "loss": 4.2539, "step": 15555 }, { "epoch": 0.4181109767566841, "grad_norm": 2.445235013961792, "learning_rate": 2.9490671387716195e-05, "loss": 4.2892, "step": 15560 }, { "epoch": 0.4182453311836625, "grad_norm": 2.329190492630005, "learning_rate": 2.948386218166962e-05, "loss": 4.2953, "step": 15565 }, { "epoch": 0.4183796856106409, "grad_norm": 2.494016170501709, "learning_rate": 2.9477052975623043e-05, "loss": 4.1849, "step": 15570 }, { "epoch": 0.4185140400376192, "grad_norm": 2.3938581943511963, "learning_rate": 2.9470243769576465e-05, "loss": 4.202, "step": 15575 }, { "epoch": 0.4186483944645976, "grad_norm": 2.4662394523620605, "learning_rate": 2.9463434563529894e-05, "loss": 4.2463, "step": 15580 }, { "epoch": 0.41878274889157596, "grad_norm": 2.4669694900512695, "learning_rate": 2.945662535748332e-05, "loss": 4.2475, "step": 15585 }, { "epoch": 0.41891710331855436, "grad_norm": 2.4376564025878906, "learning_rate": 2.9449816151436742e-05, "loss": 4.3352, "step": 15590 }, { "epoch": 0.4190514577455327, "grad_norm": 2.1778059005737305, "learning_rate": 2.9443006945390168e-05, "loss": 4.164, "step": 15595 }, { "epoch": 0.4191858121725111, "grad_norm": 2.559877395629883, "learning_rate": 2.9436197739343597e-05, "loss": 4.3062, "step": 15600 }, { "epoch": 0.41932016659948945, "grad_norm": 2.3988101482391357, "learning_rate": 2.942938853329702e-05, "loss": 4.3022, "step": 15605 }, { "epoch": 0.4194545210264678, "grad_norm": 2.220656394958496, "learning_rate": 2.9422579327250445e-05, "loss": 4.3416, "step": 15610 }, { "epoch": 0.4195888754534462, "grad_norm": 2.5570366382598877, "learning_rate": 2.9415770121203867e-05, "loss": 4.1994, "step": 15615 }, { "epoch": 0.41972322988042454, "grad_norm": 2.2814064025878906, "learning_rate": 2.9408960915157296e-05, "loss": 4.1622, "step": 15620 }, { "epoch": 0.41985758430740294, "grad_norm": 2.3180532455444336, "learning_rate": 2.940215170911072e-05, "loss": 4.2521, "step": 15625 }, { "epoch": 0.4199919387343813, "grad_norm": 2.4440035820007324, "learning_rate": 2.9395342503064144e-05, "loss": 4.2457, "step": 15630 }, { "epoch": 0.4201262931613597, "grad_norm": 2.3329436779022217, "learning_rate": 2.9388533297017567e-05, "loss": 4.2131, "step": 15635 }, { "epoch": 0.42026064758833803, "grad_norm": 2.373483896255493, "learning_rate": 2.9381724090970996e-05, "loss": 4.348, "step": 15640 }, { "epoch": 0.42039500201531643, "grad_norm": 2.350590944290161, "learning_rate": 2.937491488492442e-05, "loss": 4.2993, "step": 15645 }, { "epoch": 0.4205293564422948, "grad_norm": 2.516461133956909, "learning_rate": 2.9368105678877843e-05, "loss": 4.2517, "step": 15650 }, { "epoch": 0.4206637108692731, "grad_norm": 2.2071080207824707, "learning_rate": 2.936129647283127e-05, "loss": 4.2219, "step": 15655 }, { "epoch": 0.4207980652962515, "grad_norm": 2.14993953704834, "learning_rate": 2.9354487266784698e-05, "loss": 4.2534, "step": 15660 }, { "epoch": 0.42093241972322987, "grad_norm": 2.2577908039093018, "learning_rate": 2.934767806073812e-05, "loss": 4.2327, "step": 15665 }, { "epoch": 0.42106677415020827, "grad_norm": 2.4337992668151855, "learning_rate": 2.9340868854691546e-05, "loss": 4.1449, "step": 15670 }, { "epoch": 0.4212011285771866, "grad_norm": 2.216754913330078, "learning_rate": 2.933405964864497e-05, "loss": 4.3407, "step": 15675 }, { "epoch": 0.421335483004165, "grad_norm": 2.3895459175109863, "learning_rate": 2.932725044259839e-05, "loss": 4.3221, "step": 15680 }, { "epoch": 0.42146983743114336, "grad_norm": 2.391270399093628, "learning_rate": 2.932044123655182e-05, "loss": 4.2125, "step": 15685 }, { "epoch": 0.4216041918581217, "grad_norm": 2.336050271987915, "learning_rate": 2.9313632030505245e-05, "loss": 4.2911, "step": 15690 }, { "epoch": 0.4217385462851001, "grad_norm": 2.289990186691284, "learning_rate": 2.9306822824458668e-05, "loss": 4.2434, "step": 15695 }, { "epoch": 0.42187290071207845, "grad_norm": 2.1537325382232666, "learning_rate": 2.9300013618412093e-05, "loss": 4.2653, "step": 15700 }, { "epoch": 0.42200725513905685, "grad_norm": 2.3626420497894287, "learning_rate": 2.9293204412365522e-05, "loss": 4.2741, "step": 15705 }, { "epoch": 0.4221416095660352, "grad_norm": 2.5895607471466064, "learning_rate": 2.9286395206318945e-05, "loss": 4.2704, "step": 15710 }, { "epoch": 0.4222759639930136, "grad_norm": 2.508665084838867, "learning_rate": 2.927958600027237e-05, "loss": 4.3302, "step": 15715 }, { "epoch": 0.42241031841999194, "grad_norm": 2.4382503032684326, "learning_rate": 2.9272776794225793e-05, "loss": 4.3504, "step": 15720 }, { "epoch": 0.4225446728469703, "grad_norm": 2.6313557624816895, "learning_rate": 2.926596758817922e-05, "loss": 4.2784, "step": 15725 }, { "epoch": 0.4226790272739487, "grad_norm": 2.280480146408081, "learning_rate": 2.9259158382132644e-05, "loss": 4.3419, "step": 15730 }, { "epoch": 0.422813381700927, "grad_norm": 2.2819299697875977, "learning_rate": 2.925234917608607e-05, "loss": 4.3885, "step": 15735 }, { "epoch": 0.4229477361279054, "grad_norm": 2.3130624294281006, "learning_rate": 2.9245539970039492e-05, "loss": 4.2851, "step": 15740 }, { "epoch": 0.42308209055488377, "grad_norm": 2.536611795425415, "learning_rate": 2.923873076399292e-05, "loss": 4.147, "step": 15745 }, { "epoch": 0.42321644498186217, "grad_norm": 2.3833565711975098, "learning_rate": 2.9231921557946346e-05, "loss": 4.1647, "step": 15750 }, { "epoch": 0.4233507994088405, "grad_norm": 2.502312660217285, "learning_rate": 2.922511235189977e-05, "loss": 4.3536, "step": 15755 }, { "epoch": 0.4234851538358189, "grad_norm": 2.4071357250213623, "learning_rate": 2.9218303145853194e-05, "loss": 4.2648, "step": 15760 }, { "epoch": 0.42361950826279726, "grad_norm": 2.3981125354766846, "learning_rate": 2.9211493939806623e-05, "loss": 4.2304, "step": 15765 }, { "epoch": 0.4237538626897756, "grad_norm": 2.381065607070923, "learning_rate": 2.9204684733760046e-05, "loss": 4.3488, "step": 15770 }, { "epoch": 0.423888217116754, "grad_norm": 2.742440938949585, "learning_rate": 2.919787552771347e-05, "loss": 4.2627, "step": 15775 }, { "epoch": 0.42402257154373235, "grad_norm": 2.414639949798584, "learning_rate": 2.9191066321666894e-05, "loss": 4.1813, "step": 15780 }, { "epoch": 0.42415692597071075, "grad_norm": 2.519242286682129, "learning_rate": 2.9184257115620323e-05, "loss": 4.2614, "step": 15785 }, { "epoch": 0.4242912803976891, "grad_norm": 2.365382194519043, "learning_rate": 2.9177447909573745e-05, "loss": 4.2572, "step": 15790 }, { "epoch": 0.4244256348246675, "grad_norm": 2.306090831756592, "learning_rate": 2.917063870352717e-05, "loss": 4.2328, "step": 15795 }, { "epoch": 0.42455998925164584, "grad_norm": 2.2574098110198975, "learning_rate": 2.9163829497480593e-05, "loss": 4.2036, "step": 15800 }, { "epoch": 0.4246943436786242, "grad_norm": 2.481963634490967, "learning_rate": 2.9157020291434022e-05, "loss": 4.3622, "step": 15805 }, { "epoch": 0.4248286981056026, "grad_norm": 2.133134365081787, "learning_rate": 2.9150211085387448e-05, "loss": 4.2293, "step": 15810 }, { "epoch": 0.42496305253258093, "grad_norm": 2.2951953411102295, "learning_rate": 2.914340187934087e-05, "loss": 4.2183, "step": 15815 }, { "epoch": 0.42509740695955933, "grad_norm": 2.374598264694214, "learning_rate": 2.9136592673294296e-05, "loss": 4.1375, "step": 15820 }, { "epoch": 0.4252317613865377, "grad_norm": 2.261354684829712, "learning_rate": 2.9129783467247718e-05, "loss": 4.3236, "step": 15825 }, { "epoch": 0.4253661158135161, "grad_norm": 2.373946189880371, "learning_rate": 2.9122974261201147e-05, "loss": 4.1737, "step": 15830 }, { "epoch": 0.4255004702404944, "grad_norm": 2.2643213272094727, "learning_rate": 2.911616505515457e-05, "loss": 4.3299, "step": 15835 }, { "epoch": 0.42563482466747277, "grad_norm": 2.3239800930023193, "learning_rate": 2.9109355849107995e-05, "loss": 4.3079, "step": 15840 }, { "epoch": 0.42576917909445117, "grad_norm": 2.3317983150482178, "learning_rate": 2.9102546643061417e-05, "loss": 4.3979, "step": 15845 }, { "epoch": 0.4259035335214295, "grad_norm": 2.6195383071899414, "learning_rate": 2.9095737437014846e-05, "loss": 4.249, "step": 15850 }, { "epoch": 0.4260378879484079, "grad_norm": 2.2538535594940186, "learning_rate": 2.9088928230968272e-05, "loss": 4.2613, "step": 15855 }, { "epoch": 0.42617224237538626, "grad_norm": 2.4361369609832764, "learning_rate": 2.9082119024921694e-05, "loss": 4.0982, "step": 15860 }, { "epoch": 0.42630659680236466, "grad_norm": 2.3043746948242188, "learning_rate": 2.907530981887512e-05, "loss": 4.2159, "step": 15865 }, { "epoch": 0.426440951229343, "grad_norm": 2.6173923015594482, "learning_rate": 2.906850061282855e-05, "loss": 4.1797, "step": 15870 }, { "epoch": 0.4265753056563214, "grad_norm": 2.2595863342285156, "learning_rate": 2.906169140678197e-05, "loss": 4.2729, "step": 15875 }, { "epoch": 0.42670966008329975, "grad_norm": 2.6921050548553467, "learning_rate": 2.9054882200735393e-05, "loss": 4.1995, "step": 15880 }, { "epoch": 0.4268440145102781, "grad_norm": 2.3651561737060547, "learning_rate": 2.904807299468882e-05, "loss": 4.196, "step": 15885 }, { "epoch": 0.4269783689372565, "grad_norm": 2.7142176628112793, "learning_rate": 2.9041263788642248e-05, "loss": 4.1929, "step": 15890 }, { "epoch": 0.42711272336423484, "grad_norm": 2.3585288524627686, "learning_rate": 2.903445458259567e-05, "loss": 4.2118, "step": 15895 }, { "epoch": 0.42724707779121324, "grad_norm": 2.525831699371338, "learning_rate": 2.9027645376549096e-05, "loss": 4.2202, "step": 15900 }, { "epoch": 0.4273814322181916, "grad_norm": 2.475548505783081, "learning_rate": 2.9020836170502518e-05, "loss": 4.2349, "step": 15905 }, { "epoch": 0.42751578664517, "grad_norm": 2.3495709896087646, "learning_rate": 2.9014026964455947e-05, "loss": 4.3478, "step": 15910 }, { "epoch": 0.4276501410721483, "grad_norm": 2.495866298675537, "learning_rate": 2.9007217758409373e-05, "loss": 4.2731, "step": 15915 }, { "epoch": 0.42778449549912667, "grad_norm": 2.3341360092163086, "learning_rate": 2.9000408552362795e-05, "loss": 4.1796, "step": 15920 }, { "epoch": 0.42791884992610507, "grad_norm": 2.4594175815582275, "learning_rate": 2.899359934631622e-05, "loss": 4.2713, "step": 15925 }, { "epoch": 0.4280532043530834, "grad_norm": 2.548840284347534, "learning_rate": 2.898679014026965e-05, "loss": 4.2566, "step": 15930 }, { "epoch": 0.4281875587800618, "grad_norm": 2.6312308311462402, "learning_rate": 2.8979980934223072e-05, "loss": 4.2493, "step": 15935 }, { "epoch": 0.42832191320704016, "grad_norm": 2.3427841663360596, "learning_rate": 2.8973171728176494e-05, "loss": 4.1521, "step": 15940 }, { "epoch": 0.42845626763401856, "grad_norm": 2.225707769393921, "learning_rate": 2.896636252212992e-05, "loss": 4.2175, "step": 15945 }, { "epoch": 0.4285906220609969, "grad_norm": 2.355645179748535, "learning_rate": 2.895955331608335e-05, "loss": 4.2413, "step": 15950 }, { "epoch": 0.42872497648797525, "grad_norm": 2.465419054031372, "learning_rate": 2.895274411003677e-05, "loss": 4.3133, "step": 15955 }, { "epoch": 0.42885933091495365, "grad_norm": 2.4609131813049316, "learning_rate": 2.8945934903990197e-05, "loss": 4.1369, "step": 15960 }, { "epoch": 0.428993685341932, "grad_norm": 2.462458848953247, "learning_rate": 2.893912569794362e-05, "loss": 4.2616, "step": 15965 }, { "epoch": 0.4291280397689104, "grad_norm": 2.5317609310150146, "learning_rate": 2.8932316491897045e-05, "loss": 4.1, "step": 15970 }, { "epoch": 0.42926239419588874, "grad_norm": 2.3024344444274902, "learning_rate": 2.8925507285850474e-05, "loss": 4.3086, "step": 15975 }, { "epoch": 0.42939674862286714, "grad_norm": 2.417215585708618, "learning_rate": 2.8918698079803896e-05, "loss": 4.2283, "step": 15980 }, { "epoch": 0.4295311030498455, "grad_norm": 2.3241524696350098, "learning_rate": 2.891188887375732e-05, "loss": 4.2974, "step": 15985 }, { "epoch": 0.4296654574768239, "grad_norm": 2.4867942333221436, "learning_rate": 2.8905079667710744e-05, "loss": 4.1405, "step": 15990 }, { "epoch": 0.42979981190380223, "grad_norm": 2.3965344429016113, "learning_rate": 2.8898270461664173e-05, "loss": 4.2104, "step": 15995 }, { "epoch": 0.4299341663307806, "grad_norm": 2.107988119125366, "learning_rate": 2.8891461255617595e-05, "loss": 4.1407, "step": 16000 }, { "epoch": 0.430068520757759, "grad_norm": 2.2528016567230225, "learning_rate": 2.888465204957102e-05, "loss": 4.1995, "step": 16005 }, { "epoch": 0.4302028751847373, "grad_norm": 2.3667545318603516, "learning_rate": 2.8877842843524443e-05, "loss": 4.3516, "step": 16010 }, { "epoch": 0.4303372296117157, "grad_norm": 2.5429117679595947, "learning_rate": 2.8871033637477872e-05, "loss": 4.285, "step": 16015 }, { "epoch": 0.43047158403869407, "grad_norm": 2.8680622577667236, "learning_rate": 2.8864224431431298e-05, "loss": 4.2952, "step": 16020 }, { "epoch": 0.43060593846567247, "grad_norm": 2.4054863452911377, "learning_rate": 2.885741522538472e-05, "loss": 4.194, "step": 16025 }, { "epoch": 0.4307402928926508, "grad_norm": 2.44966459274292, "learning_rate": 2.8850606019338146e-05, "loss": 4.27, "step": 16030 }, { "epoch": 0.43087464731962916, "grad_norm": 2.4636518955230713, "learning_rate": 2.8843796813291575e-05, "loss": 4.3184, "step": 16035 }, { "epoch": 0.43100900174660756, "grad_norm": 2.4376299381256104, "learning_rate": 2.8836987607244997e-05, "loss": 4.1826, "step": 16040 }, { "epoch": 0.4311433561735859, "grad_norm": 2.610029458999634, "learning_rate": 2.883017840119842e-05, "loss": 4.1749, "step": 16045 }, { "epoch": 0.4312777106005643, "grad_norm": 2.3537282943725586, "learning_rate": 2.8823369195151845e-05, "loss": 4.1886, "step": 16050 }, { "epoch": 0.43141206502754265, "grad_norm": 2.2158799171447754, "learning_rate": 2.8816559989105274e-05, "loss": 4.1719, "step": 16055 }, { "epoch": 0.43154641945452105, "grad_norm": 2.2209975719451904, "learning_rate": 2.8809750783058697e-05, "loss": 4.2062, "step": 16060 }, { "epoch": 0.4316807738814994, "grad_norm": 2.4708924293518066, "learning_rate": 2.8802941577012122e-05, "loss": 4.2911, "step": 16065 }, { "epoch": 0.4318151283084778, "grad_norm": 2.4639086723327637, "learning_rate": 2.8796132370965544e-05, "loss": 4.2626, "step": 16070 }, { "epoch": 0.43194948273545614, "grad_norm": 2.3329756259918213, "learning_rate": 2.8789323164918974e-05, "loss": 4.1889, "step": 16075 }, { "epoch": 0.4320838371624345, "grad_norm": 2.6666438579559326, "learning_rate": 2.87825139588724e-05, "loss": 4.204, "step": 16080 }, { "epoch": 0.4322181915894129, "grad_norm": 2.5674540996551514, "learning_rate": 2.877570475282582e-05, "loss": 4.2896, "step": 16085 }, { "epoch": 0.4323525460163912, "grad_norm": 2.536684513092041, "learning_rate": 2.8768895546779244e-05, "loss": 4.2739, "step": 16090 }, { "epoch": 0.4324869004433696, "grad_norm": 2.595012903213501, "learning_rate": 2.8762086340732676e-05, "loss": 4.2995, "step": 16095 }, { "epoch": 0.43262125487034797, "grad_norm": 2.2966651916503906, "learning_rate": 2.87552771346861e-05, "loss": 4.2315, "step": 16100 }, { "epoch": 0.43275560929732637, "grad_norm": 2.257228374481201, "learning_rate": 2.874846792863952e-05, "loss": 4.2044, "step": 16105 }, { "epoch": 0.4328899637243047, "grad_norm": 2.4460296630859375, "learning_rate": 2.8741658722592946e-05, "loss": 4.2559, "step": 16110 }, { "epoch": 0.43302431815128306, "grad_norm": 2.3575544357299805, "learning_rate": 2.873484951654637e-05, "loss": 4.2435, "step": 16115 }, { "epoch": 0.43315867257826146, "grad_norm": 2.2947487831115723, "learning_rate": 2.8728040310499798e-05, "loss": 4.254, "step": 16120 }, { "epoch": 0.4332930270052398, "grad_norm": 2.544032096862793, "learning_rate": 2.8721231104453223e-05, "loss": 4.2012, "step": 16125 }, { "epoch": 0.4334273814322182, "grad_norm": 2.6002767086029053, "learning_rate": 2.8714421898406646e-05, "loss": 4.3138, "step": 16130 }, { "epoch": 0.43356173585919655, "grad_norm": 2.2768044471740723, "learning_rate": 2.870761269236007e-05, "loss": 4.2789, "step": 16135 }, { "epoch": 0.43369609028617495, "grad_norm": 2.584963798522949, "learning_rate": 2.87008034863135e-05, "loss": 4.4201, "step": 16140 }, { "epoch": 0.4338304447131533, "grad_norm": 2.327904224395752, "learning_rate": 2.8693994280266923e-05, "loss": 4.2532, "step": 16145 }, { "epoch": 0.43396479914013164, "grad_norm": 2.3140289783477783, "learning_rate": 2.8687185074220345e-05, "loss": 4.2682, "step": 16150 }, { "epoch": 0.43409915356711004, "grad_norm": 2.5426039695739746, "learning_rate": 2.868037586817377e-05, "loss": 4.2457, "step": 16155 }, { "epoch": 0.4342335079940884, "grad_norm": 2.2707035541534424, "learning_rate": 2.86735666621272e-05, "loss": 4.3181, "step": 16160 }, { "epoch": 0.4343678624210668, "grad_norm": 2.5963802337646484, "learning_rate": 2.8666757456080622e-05, "loss": 4.1863, "step": 16165 }, { "epoch": 0.43450221684804513, "grad_norm": 2.124098539352417, "learning_rate": 2.8659948250034047e-05, "loss": 4.2961, "step": 16170 }, { "epoch": 0.43463657127502353, "grad_norm": 2.386662483215332, "learning_rate": 2.865313904398747e-05, "loss": 4.165, "step": 16175 }, { "epoch": 0.4347709257020019, "grad_norm": 2.4544901847839355, "learning_rate": 2.86463298379409e-05, "loss": 4.1838, "step": 16180 }, { "epoch": 0.4349052801289803, "grad_norm": 2.22908878326416, "learning_rate": 2.8639520631894324e-05, "loss": 4.1704, "step": 16185 }, { "epoch": 0.4350396345559586, "grad_norm": 2.346069574356079, "learning_rate": 2.8632711425847747e-05, "loss": 4.2814, "step": 16190 }, { "epoch": 0.43517398898293697, "grad_norm": 2.26861310005188, "learning_rate": 2.862590221980117e-05, "loss": 4.2697, "step": 16195 }, { "epoch": 0.43530834340991537, "grad_norm": 2.4290709495544434, "learning_rate": 2.86190930137546e-05, "loss": 4.2381, "step": 16200 }, { "epoch": 0.4354426978368937, "grad_norm": 2.4715416431427, "learning_rate": 2.8612283807708024e-05, "loss": 4.2549, "step": 16205 }, { "epoch": 0.4355770522638721, "grad_norm": 2.6182286739349365, "learning_rate": 2.8605474601661446e-05, "loss": 4.3408, "step": 16210 }, { "epoch": 0.43571140669085046, "grad_norm": 2.336998701095581, "learning_rate": 2.859866539561487e-05, "loss": 4.248, "step": 16215 }, { "epoch": 0.43584576111782886, "grad_norm": 2.3119163513183594, "learning_rate": 2.85918561895683e-05, "loss": 4.3169, "step": 16220 }, { "epoch": 0.4359801155448072, "grad_norm": 2.2941818237304688, "learning_rate": 2.8585046983521723e-05, "loss": 4.1995, "step": 16225 }, { "epoch": 0.43611446997178555, "grad_norm": 2.4523491859436035, "learning_rate": 2.857823777747515e-05, "loss": 4.2168, "step": 16230 }, { "epoch": 0.43624882439876395, "grad_norm": 2.5257530212402344, "learning_rate": 2.857142857142857e-05, "loss": 4.236, "step": 16235 }, { "epoch": 0.4363831788257423, "grad_norm": 2.489412307739258, "learning_rate": 2.8564619365382e-05, "loss": 4.1993, "step": 16240 }, { "epoch": 0.4365175332527207, "grad_norm": 2.306957483291626, "learning_rate": 2.8557810159335426e-05, "loss": 4.1267, "step": 16245 }, { "epoch": 0.43665188767969904, "grad_norm": 2.3173604011535645, "learning_rate": 2.8551000953288848e-05, "loss": 4.29, "step": 16250 }, { "epoch": 0.43678624210667744, "grad_norm": 2.4237756729125977, "learning_rate": 2.854419174724227e-05, "loss": 4.2242, "step": 16255 }, { "epoch": 0.4369205965336558, "grad_norm": 2.4559576511383057, "learning_rate": 2.8537382541195696e-05, "loss": 4.1845, "step": 16260 }, { "epoch": 0.4370549509606341, "grad_norm": 2.5824778079986572, "learning_rate": 2.8530573335149125e-05, "loss": 4.1047, "step": 16265 }, { "epoch": 0.4371893053876125, "grad_norm": 2.972888231277466, "learning_rate": 2.8523764129102547e-05, "loss": 4.1979, "step": 16270 }, { "epoch": 0.43732365981459087, "grad_norm": 2.3336901664733887, "learning_rate": 2.8516954923055973e-05, "loss": 4.2603, "step": 16275 }, { "epoch": 0.43745801424156927, "grad_norm": 2.27392315864563, "learning_rate": 2.8510145717009395e-05, "loss": 4.2711, "step": 16280 }, { "epoch": 0.4375923686685476, "grad_norm": 2.2228550910949707, "learning_rate": 2.8503336510962824e-05, "loss": 4.1704, "step": 16285 }, { "epoch": 0.437726723095526, "grad_norm": 2.3461356163024902, "learning_rate": 2.849652730491625e-05, "loss": 4.1636, "step": 16290 }, { "epoch": 0.43786107752250436, "grad_norm": 2.279881715774536, "learning_rate": 2.8489718098869672e-05, "loss": 4.15, "step": 16295 }, { "epoch": 0.43799543194948276, "grad_norm": 2.360457181930542, "learning_rate": 2.8482908892823094e-05, "loss": 4.1969, "step": 16300 }, { "epoch": 0.4381297863764611, "grad_norm": 2.5320329666137695, "learning_rate": 2.8476099686776527e-05, "loss": 4.146, "step": 16305 }, { "epoch": 0.43826414080343945, "grad_norm": 2.245178699493408, "learning_rate": 2.846929048072995e-05, "loss": 4.3665, "step": 16310 }, { "epoch": 0.43839849523041785, "grad_norm": 2.297659397125244, "learning_rate": 2.846248127468337e-05, "loss": 4.2898, "step": 16315 }, { "epoch": 0.4385328496573962, "grad_norm": 2.2972676753997803, "learning_rate": 2.8455672068636797e-05, "loss": 4.2515, "step": 16320 }, { "epoch": 0.4386672040843746, "grad_norm": 2.63649582862854, "learning_rate": 2.8448862862590226e-05, "loss": 4.3153, "step": 16325 }, { "epoch": 0.43880155851135294, "grad_norm": 2.322937488555908, "learning_rate": 2.8442053656543648e-05, "loss": 4.1691, "step": 16330 }, { "epoch": 0.43893591293833134, "grad_norm": 2.166642904281616, "learning_rate": 2.8435244450497074e-05, "loss": 4.2791, "step": 16335 }, { "epoch": 0.4390702673653097, "grad_norm": 2.330350875854492, "learning_rate": 2.8428435244450496e-05, "loss": 4.3459, "step": 16340 }, { "epoch": 0.43920462179228803, "grad_norm": 2.468033790588379, "learning_rate": 2.8421626038403925e-05, "loss": 4.1909, "step": 16345 }, { "epoch": 0.43933897621926643, "grad_norm": 2.2964963912963867, "learning_rate": 2.841481683235735e-05, "loss": 4.2369, "step": 16350 }, { "epoch": 0.4394733306462448, "grad_norm": 2.519709825515747, "learning_rate": 2.8408007626310773e-05, "loss": 4.2757, "step": 16355 }, { "epoch": 0.4396076850732232, "grad_norm": 2.3391635417938232, "learning_rate": 2.8401198420264195e-05, "loss": 4.1615, "step": 16360 }, { "epoch": 0.4397420395002015, "grad_norm": 2.4118688106536865, "learning_rate": 2.8394389214217624e-05, "loss": 4.1488, "step": 16365 }, { "epoch": 0.4398763939271799, "grad_norm": 2.360112428665161, "learning_rate": 2.838758000817105e-05, "loss": 4.3131, "step": 16370 }, { "epoch": 0.44001074835415827, "grad_norm": 2.4447109699249268, "learning_rate": 2.8380770802124472e-05, "loss": 4.1939, "step": 16375 }, { "epoch": 0.44014510278113667, "grad_norm": 2.4136271476745605, "learning_rate": 2.8373961596077898e-05, "loss": 4.2756, "step": 16380 }, { "epoch": 0.440279457208115, "grad_norm": 2.199481964111328, "learning_rate": 2.8367152390031327e-05, "loss": 4.2668, "step": 16385 }, { "epoch": 0.44041381163509336, "grad_norm": 2.580078601837158, "learning_rate": 2.836034318398475e-05, "loss": 4.3542, "step": 16390 }, { "epoch": 0.44054816606207176, "grad_norm": 2.3920347690582275, "learning_rate": 2.8353533977938175e-05, "loss": 4.2211, "step": 16395 }, { "epoch": 0.4406825204890501, "grad_norm": 2.3929126262664795, "learning_rate": 2.8346724771891597e-05, "loss": 4.2629, "step": 16400 }, { "epoch": 0.4408168749160285, "grad_norm": 2.3883540630340576, "learning_rate": 2.833991556584502e-05, "loss": 4.1322, "step": 16405 }, { "epoch": 0.44095122934300685, "grad_norm": 2.2770512104034424, "learning_rate": 2.8333106359798452e-05, "loss": 4.0467, "step": 16410 }, { "epoch": 0.44108558376998525, "grad_norm": 2.3421852588653564, "learning_rate": 2.8326297153751874e-05, "loss": 4.2041, "step": 16415 }, { "epoch": 0.4412199381969636, "grad_norm": 2.461590528488159, "learning_rate": 2.8319487947705296e-05, "loss": 4.2816, "step": 16420 }, { "epoch": 0.44135429262394194, "grad_norm": 2.4068994522094727, "learning_rate": 2.8312678741658722e-05, "loss": 4.1461, "step": 16425 }, { "epoch": 0.44148864705092034, "grad_norm": 2.1470868587493896, "learning_rate": 2.830586953561215e-05, "loss": 4.1923, "step": 16430 }, { "epoch": 0.4416230014778987, "grad_norm": 2.259752035140991, "learning_rate": 2.8299060329565573e-05, "loss": 4.0845, "step": 16435 }, { "epoch": 0.4417573559048771, "grad_norm": 2.2395246028900146, "learning_rate": 2.8292251123519e-05, "loss": 4.2308, "step": 16440 }, { "epoch": 0.4418917103318554, "grad_norm": 2.5562379360198975, "learning_rate": 2.828544191747242e-05, "loss": 4.2582, "step": 16445 }, { "epoch": 0.4420260647588338, "grad_norm": 2.1990792751312256, "learning_rate": 2.827863271142585e-05, "loss": 4.0991, "step": 16450 }, { "epoch": 0.44216041918581217, "grad_norm": 2.6462621688842773, "learning_rate": 2.8271823505379276e-05, "loss": 4.219, "step": 16455 }, { "epoch": 0.4422947736127905, "grad_norm": 2.408582925796509, "learning_rate": 2.82650142993327e-05, "loss": 4.151, "step": 16460 }, { "epoch": 0.4424291280397689, "grad_norm": 2.3861005306243896, "learning_rate": 2.825820509328612e-05, "loss": 4.2187, "step": 16465 }, { "epoch": 0.44256348246674726, "grad_norm": 3.3405652046203613, "learning_rate": 2.825139588723955e-05, "loss": 4.2529, "step": 16470 }, { "epoch": 0.44269783689372566, "grad_norm": 2.3474910259246826, "learning_rate": 2.8244586681192975e-05, "loss": 4.1749, "step": 16475 }, { "epoch": 0.442832191320704, "grad_norm": 2.4719419479370117, "learning_rate": 2.8237777475146398e-05, "loss": 4.09, "step": 16480 }, { "epoch": 0.4429665457476824, "grad_norm": 2.3105177879333496, "learning_rate": 2.8230968269099823e-05, "loss": 4.2202, "step": 16485 }, { "epoch": 0.44310090017466075, "grad_norm": 2.320051431655884, "learning_rate": 2.8224159063053252e-05, "loss": 4.1952, "step": 16490 }, { "epoch": 0.44323525460163915, "grad_norm": 2.18603253364563, "learning_rate": 2.8217349857006675e-05, "loss": 4.2338, "step": 16495 }, { "epoch": 0.4433696090286175, "grad_norm": 2.512141704559326, "learning_rate": 2.82105406509601e-05, "loss": 4.2908, "step": 16500 }, { "epoch": 0.44350396345559584, "grad_norm": 2.251185178756714, "learning_rate": 2.8203731444913522e-05, "loss": 4.3063, "step": 16505 }, { "epoch": 0.44363831788257424, "grad_norm": 2.357961893081665, "learning_rate": 2.819692223886695e-05, "loss": 4.1761, "step": 16510 }, { "epoch": 0.4437726723095526, "grad_norm": 2.427928924560547, "learning_rate": 2.8190113032820377e-05, "loss": 4.2287, "step": 16515 }, { "epoch": 0.443907026736531, "grad_norm": 2.463430643081665, "learning_rate": 2.81833038267738e-05, "loss": 4.2661, "step": 16520 }, { "epoch": 0.44404138116350933, "grad_norm": 2.1994524002075195, "learning_rate": 2.8176494620727222e-05, "loss": 4.2353, "step": 16525 }, { "epoch": 0.44417573559048773, "grad_norm": 2.338409662246704, "learning_rate": 2.816968541468065e-05, "loss": 4.3711, "step": 16530 }, { "epoch": 0.4443100900174661, "grad_norm": 2.452298641204834, "learning_rate": 2.8162876208634076e-05, "loss": 4.1041, "step": 16535 }, { "epoch": 0.4444444444444444, "grad_norm": 2.5668511390686035, "learning_rate": 2.81560670025875e-05, "loss": 4.1343, "step": 16540 }, { "epoch": 0.4445787988714228, "grad_norm": 2.501283645629883, "learning_rate": 2.8149257796540924e-05, "loss": 4.252, "step": 16545 }, { "epoch": 0.44471315329840116, "grad_norm": 2.3662846088409424, "learning_rate": 2.8142448590494353e-05, "loss": 4.252, "step": 16550 }, { "epoch": 0.44484750772537957, "grad_norm": 2.4809865951538086, "learning_rate": 2.8135639384447776e-05, "loss": 4.1486, "step": 16555 }, { "epoch": 0.4449818621523579, "grad_norm": 2.655667543411255, "learning_rate": 2.81288301784012e-05, "loss": 4.159, "step": 16560 }, { "epoch": 0.4451162165793363, "grad_norm": 2.2696967124938965, "learning_rate": 2.8122020972354624e-05, "loss": 4.3239, "step": 16565 }, { "epoch": 0.44525057100631465, "grad_norm": 2.820187568664551, "learning_rate": 2.8115211766308046e-05, "loss": 4.369, "step": 16570 }, { "epoch": 0.445384925433293, "grad_norm": 2.1930160522460938, "learning_rate": 2.8108402560261475e-05, "loss": 4.1778, "step": 16575 }, { "epoch": 0.4455192798602714, "grad_norm": 2.3937129974365234, "learning_rate": 2.81015933542149e-05, "loss": 4.3096, "step": 16580 }, { "epoch": 0.44565363428724974, "grad_norm": 2.46380877494812, "learning_rate": 2.8094784148168323e-05, "loss": 4.2947, "step": 16585 }, { "epoch": 0.44578798871422814, "grad_norm": 2.473848581314087, "learning_rate": 2.808797494212175e-05, "loss": 4.1486, "step": 16590 }, { "epoch": 0.4459223431412065, "grad_norm": 2.1642918586730957, "learning_rate": 2.8081165736075178e-05, "loss": 4.3182, "step": 16595 }, { "epoch": 0.4460566975681849, "grad_norm": 2.41929030418396, "learning_rate": 2.80743565300286e-05, "loss": 4.2923, "step": 16600 }, { "epoch": 0.44619105199516323, "grad_norm": 2.5490310192108154, "learning_rate": 2.8067547323982025e-05, "loss": 4.2793, "step": 16605 }, { "epoch": 0.44632540642214164, "grad_norm": 2.5455315113067627, "learning_rate": 2.8060738117935448e-05, "loss": 4.1978, "step": 16610 }, { "epoch": 0.44645976084912, "grad_norm": 2.274678945541382, "learning_rate": 2.8053928911888877e-05, "loss": 4.2547, "step": 16615 }, { "epoch": 0.4465941152760983, "grad_norm": 2.721982955932617, "learning_rate": 2.8047119705842302e-05, "loss": 4.119, "step": 16620 }, { "epoch": 0.4467284697030767, "grad_norm": 2.5358617305755615, "learning_rate": 2.8040310499795725e-05, "loss": 4.2037, "step": 16625 }, { "epoch": 0.44686282413005507, "grad_norm": 2.323357582092285, "learning_rate": 2.8033501293749147e-05, "loss": 4.2736, "step": 16630 }, { "epoch": 0.44699717855703347, "grad_norm": 2.411027193069458, "learning_rate": 2.8026692087702576e-05, "loss": 4.1978, "step": 16635 }, { "epoch": 0.4471315329840118, "grad_norm": 2.6535913944244385, "learning_rate": 2.8019882881656e-05, "loss": 4.2786, "step": 16640 }, { "epoch": 0.4472658874109902, "grad_norm": 2.3581197261810303, "learning_rate": 2.8013073675609424e-05, "loss": 4.2367, "step": 16645 }, { "epoch": 0.44740024183796856, "grad_norm": 2.307725429534912, "learning_rate": 2.800626446956285e-05, "loss": 4.2442, "step": 16650 }, { "epoch": 0.4475345962649469, "grad_norm": 2.2293784618377686, "learning_rate": 2.799945526351628e-05, "loss": 4.2545, "step": 16655 }, { "epoch": 0.4476689506919253, "grad_norm": 2.4522855281829834, "learning_rate": 2.79926460574697e-05, "loss": 4.2684, "step": 16660 }, { "epoch": 0.44780330511890365, "grad_norm": 2.384861469268799, "learning_rate": 2.7985836851423127e-05, "loss": 4.2494, "step": 16665 }, { "epoch": 0.44793765954588205, "grad_norm": 2.419438123703003, "learning_rate": 2.797902764537655e-05, "loss": 4.2345, "step": 16670 }, { "epoch": 0.4480720139728604, "grad_norm": 2.312316417694092, "learning_rate": 2.7972218439329978e-05, "loss": 4.2121, "step": 16675 }, { "epoch": 0.4482063683998388, "grad_norm": 2.3352880477905273, "learning_rate": 2.79654092332834e-05, "loss": 4.1864, "step": 16680 }, { "epoch": 0.44834072282681714, "grad_norm": 2.4615979194641113, "learning_rate": 2.7958600027236826e-05, "loss": 4.2601, "step": 16685 }, { "epoch": 0.44847507725379554, "grad_norm": 2.1711671352386475, "learning_rate": 2.7951790821190248e-05, "loss": 4.2544, "step": 16690 }, { "epoch": 0.4486094316807739, "grad_norm": 2.54040789604187, "learning_rate": 2.7944981615143677e-05, "loss": 4.1784, "step": 16695 }, { "epoch": 0.44874378610775223, "grad_norm": 2.468158721923828, "learning_rate": 2.7938172409097103e-05, "loss": 4.2734, "step": 16700 }, { "epoch": 0.44887814053473063, "grad_norm": 2.4374277591705322, "learning_rate": 2.7931363203050525e-05, "loss": 4.1635, "step": 16705 }, { "epoch": 0.449012494961709, "grad_norm": 2.549758195877075, "learning_rate": 2.792455399700395e-05, "loss": 4.2302, "step": 16710 }, { "epoch": 0.4491468493886874, "grad_norm": 2.3844382762908936, "learning_rate": 2.7917744790957373e-05, "loss": 4.3163, "step": 16715 }, { "epoch": 0.4492812038156657, "grad_norm": 2.311689615249634, "learning_rate": 2.7910935584910802e-05, "loss": 4.226, "step": 16720 }, { "epoch": 0.4494155582426441, "grad_norm": 2.5290637016296387, "learning_rate": 2.7904126378864224e-05, "loss": 4.3294, "step": 16725 }, { "epoch": 0.44954991266962246, "grad_norm": 2.617156982421875, "learning_rate": 2.789731717281765e-05, "loss": 4.1743, "step": 16730 }, { "epoch": 0.4496842670966008, "grad_norm": 2.384774923324585, "learning_rate": 2.7890507966771072e-05, "loss": 4.2958, "step": 16735 }, { "epoch": 0.4498186215235792, "grad_norm": 2.4706966876983643, "learning_rate": 2.78836987607245e-05, "loss": 4.2683, "step": 16740 }, { "epoch": 0.44995297595055755, "grad_norm": 2.587946653366089, "learning_rate": 2.7876889554677927e-05, "loss": 4.3468, "step": 16745 }, { "epoch": 0.45008733037753595, "grad_norm": 2.2501325607299805, "learning_rate": 2.787008034863135e-05, "loss": 4.1242, "step": 16750 }, { "epoch": 0.4502216848045143, "grad_norm": 2.2695369720458984, "learning_rate": 2.7863271142584775e-05, "loss": 4.2609, "step": 16755 }, { "epoch": 0.4503560392314927, "grad_norm": 2.4646825790405273, "learning_rate": 2.7856461936538204e-05, "loss": 4.2798, "step": 16760 }, { "epoch": 0.45049039365847104, "grad_norm": 2.6789493560791016, "learning_rate": 2.7849652730491626e-05, "loss": 4.1272, "step": 16765 }, { "epoch": 0.4506247480854494, "grad_norm": 2.4618709087371826, "learning_rate": 2.7842843524445052e-05, "loss": 4.146, "step": 16770 }, { "epoch": 0.4507591025124278, "grad_norm": 2.187411069869995, "learning_rate": 2.7836034318398474e-05, "loss": 4.108, "step": 16775 }, { "epoch": 0.45089345693940613, "grad_norm": 2.58382248878479, "learning_rate": 2.7829225112351903e-05, "loss": 4.2531, "step": 16780 }, { "epoch": 0.45102781136638453, "grad_norm": 2.456743001937866, "learning_rate": 2.7822415906305325e-05, "loss": 4.3009, "step": 16785 }, { "epoch": 0.4511621657933629, "grad_norm": 2.5162689685821533, "learning_rate": 2.781560670025875e-05, "loss": 4.1872, "step": 16790 }, { "epoch": 0.4512965202203413, "grad_norm": 2.3018100261688232, "learning_rate": 2.7808797494212173e-05, "loss": 4.283, "step": 16795 }, { "epoch": 0.4514308746473196, "grad_norm": 2.4532432556152344, "learning_rate": 2.7801988288165602e-05, "loss": 4.2603, "step": 16800 }, { "epoch": 0.451565229074298, "grad_norm": 2.2642500400543213, "learning_rate": 2.7795179082119028e-05, "loss": 4.205, "step": 16805 }, { "epoch": 0.45169958350127637, "grad_norm": 2.441150426864624, "learning_rate": 2.778836987607245e-05, "loss": 4.1176, "step": 16810 }, { "epoch": 0.4518339379282547, "grad_norm": 2.5511395931243896, "learning_rate": 2.7781560670025876e-05, "loss": 4.3164, "step": 16815 }, { "epoch": 0.4519682923552331, "grad_norm": 2.7007107734680176, "learning_rate": 2.7774751463979305e-05, "loss": 4.1382, "step": 16820 }, { "epoch": 0.45210264678221146, "grad_norm": 2.3082969188690186, "learning_rate": 2.7767942257932727e-05, "loss": 4.1662, "step": 16825 }, { "epoch": 0.45223700120918986, "grad_norm": 2.1427457332611084, "learning_rate": 2.776113305188615e-05, "loss": 4.0684, "step": 16830 }, { "epoch": 0.4523713556361682, "grad_norm": 2.4540045261383057, "learning_rate": 2.7754323845839575e-05, "loss": 4.1479, "step": 16835 }, { "epoch": 0.4525057100631466, "grad_norm": 2.539919137954712, "learning_rate": 2.7747514639793004e-05, "loss": 4.309, "step": 16840 }, { "epoch": 0.45264006449012495, "grad_norm": 2.131465196609497, "learning_rate": 2.7740705433746427e-05, "loss": 4.2961, "step": 16845 }, { "epoch": 0.4527744189171033, "grad_norm": 2.4124083518981934, "learning_rate": 2.7733896227699852e-05, "loss": 4.1728, "step": 16850 }, { "epoch": 0.4529087733440817, "grad_norm": 2.4689693450927734, "learning_rate": 2.7727087021653274e-05, "loss": 4.2876, "step": 16855 }, { "epoch": 0.45304312777106004, "grad_norm": 2.43318510055542, "learning_rate": 2.77202778156067e-05, "loss": 4.214, "step": 16860 }, { "epoch": 0.45317748219803844, "grad_norm": 2.3449084758758545, "learning_rate": 2.771346860956013e-05, "loss": 4.277, "step": 16865 }, { "epoch": 0.4533118366250168, "grad_norm": 2.183117628097534, "learning_rate": 2.770665940351355e-05, "loss": 4.3097, "step": 16870 }, { "epoch": 0.4534461910519952, "grad_norm": 2.4677445888519287, "learning_rate": 2.7699850197466977e-05, "loss": 4.2961, "step": 16875 }, { "epoch": 0.45358054547897353, "grad_norm": 2.373764753341675, "learning_rate": 2.76930409914204e-05, "loss": 4.3081, "step": 16880 }, { "epoch": 0.4537148999059519, "grad_norm": 2.315349578857422, "learning_rate": 2.768623178537383e-05, "loss": 4.4613, "step": 16885 }, { "epoch": 0.4538492543329303, "grad_norm": 2.489366292953491, "learning_rate": 2.767942257932725e-05, "loss": 4.24, "step": 16890 }, { "epoch": 0.4539836087599086, "grad_norm": 2.3375909328460693, "learning_rate": 2.7672613373280676e-05, "loss": 4.3812, "step": 16895 }, { "epoch": 0.454117963186887, "grad_norm": 2.539177417755127, "learning_rate": 2.76658041672341e-05, "loss": 4.2191, "step": 16900 }, { "epoch": 0.45425231761386536, "grad_norm": 2.5202620029449463, "learning_rate": 2.7658994961187528e-05, "loss": 4.1013, "step": 16905 }, { "epoch": 0.45438667204084376, "grad_norm": 2.3986546993255615, "learning_rate": 2.7652185755140953e-05, "loss": 4.2712, "step": 16910 }, { "epoch": 0.4545210264678221, "grad_norm": 2.3960037231445312, "learning_rate": 2.7645376549094376e-05, "loss": 4.1948, "step": 16915 }, { "epoch": 0.4546553808948005, "grad_norm": 2.4452643394470215, "learning_rate": 2.76385673430478e-05, "loss": 4.2279, "step": 16920 }, { "epoch": 0.45478973532177885, "grad_norm": 2.552942991256714, "learning_rate": 2.763175813700123e-05, "loss": 4.2151, "step": 16925 }, { "epoch": 0.4549240897487572, "grad_norm": 2.4289443492889404, "learning_rate": 2.7624948930954653e-05, "loss": 4.1386, "step": 16930 }, { "epoch": 0.4550584441757356, "grad_norm": 2.380992889404297, "learning_rate": 2.7618139724908075e-05, "loss": 4.0512, "step": 16935 }, { "epoch": 0.45519279860271394, "grad_norm": 2.267054557800293, "learning_rate": 2.76113305188615e-05, "loss": 4.2075, "step": 16940 }, { "epoch": 0.45532715302969234, "grad_norm": 2.4519221782684326, "learning_rate": 2.760452131281493e-05, "loss": 4.0911, "step": 16945 }, { "epoch": 0.4554615074566707, "grad_norm": 2.26326060295105, "learning_rate": 2.7597712106768352e-05, "loss": 4.1349, "step": 16950 }, { "epoch": 0.4555958618836491, "grad_norm": 2.2596428394317627, "learning_rate": 2.7590902900721777e-05, "loss": 4.1746, "step": 16955 }, { "epoch": 0.45573021631062743, "grad_norm": 2.338775873184204, "learning_rate": 2.75840936946752e-05, "loss": 4.1893, "step": 16960 }, { "epoch": 0.4558645707376058, "grad_norm": 2.572000503540039, "learning_rate": 2.757728448862863e-05, "loss": 4.2582, "step": 16965 }, { "epoch": 0.4559989251645842, "grad_norm": 2.2636094093322754, "learning_rate": 2.7570475282582054e-05, "loss": 4.1843, "step": 16970 }, { "epoch": 0.4561332795915625, "grad_norm": 2.6545205116271973, "learning_rate": 2.7563666076535477e-05, "loss": 4.0825, "step": 16975 }, { "epoch": 0.4562676340185409, "grad_norm": 2.1999871730804443, "learning_rate": 2.7556856870488902e-05, "loss": 4.1855, "step": 16980 }, { "epoch": 0.45640198844551927, "grad_norm": 2.7572648525238037, "learning_rate": 2.755004766444233e-05, "loss": 4.3447, "step": 16985 }, { "epoch": 0.45653634287249767, "grad_norm": 2.615355968475342, "learning_rate": 2.7543238458395754e-05, "loss": 4.3807, "step": 16990 }, { "epoch": 0.456670697299476, "grad_norm": 2.6059889793395996, "learning_rate": 2.7536429252349176e-05, "loss": 4.3114, "step": 16995 }, { "epoch": 0.4568050517264544, "grad_norm": 2.7207038402557373, "learning_rate": 2.75296200463026e-05, "loss": 4.251, "step": 17000 }, { "epoch": 0.45693940615343276, "grad_norm": 2.539463996887207, "learning_rate": 2.7522810840256024e-05, "loss": 4.1167, "step": 17005 }, { "epoch": 0.4570737605804111, "grad_norm": 2.6501057147979736, "learning_rate": 2.7516001634209453e-05, "loss": 4.2057, "step": 17010 }, { "epoch": 0.4572081150073895, "grad_norm": 2.492187023162842, "learning_rate": 2.750919242816288e-05, "loss": 4.2154, "step": 17015 }, { "epoch": 0.45734246943436785, "grad_norm": 2.1854114532470703, "learning_rate": 2.75023832221163e-05, "loss": 4.2126, "step": 17020 }, { "epoch": 0.45747682386134625, "grad_norm": 2.345491409301758, "learning_rate": 2.7495574016069726e-05, "loss": 4.2608, "step": 17025 }, { "epoch": 0.4576111782883246, "grad_norm": 2.3118348121643066, "learning_rate": 2.7488764810023156e-05, "loss": 4.1381, "step": 17030 }, { "epoch": 0.457745532715303, "grad_norm": 2.4007415771484375, "learning_rate": 2.7481955603976578e-05, "loss": 4.193, "step": 17035 }, { "epoch": 0.45787988714228134, "grad_norm": 2.3045997619628906, "learning_rate": 2.747514639793e-05, "loss": 4.1246, "step": 17040 }, { "epoch": 0.4580142415692597, "grad_norm": 2.3691446781158447, "learning_rate": 2.7468337191883426e-05, "loss": 4.138, "step": 17045 }, { "epoch": 0.4581485959962381, "grad_norm": 2.2577271461486816, "learning_rate": 2.7461527985836855e-05, "loss": 4.2171, "step": 17050 }, { "epoch": 0.45828295042321643, "grad_norm": 2.394845962524414, "learning_rate": 2.7454718779790277e-05, "loss": 4.2636, "step": 17055 }, { "epoch": 0.45841730485019483, "grad_norm": 2.302999258041382, "learning_rate": 2.7447909573743703e-05, "loss": 4.2129, "step": 17060 }, { "epoch": 0.4585516592771732, "grad_norm": 2.6222727298736572, "learning_rate": 2.7441100367697125e-05, "loss": 4.1679, "step": 17065 }, { "epoch": 0.4586860137041516, "grad_norm": 2.294062852859497, "learning_rate": 2.7434291161650554e-05, "loss": 4.1649, "step": 17070 }, { "epoch": 0.4588203681311299, "grad_norm": 2.4459240436553955, "learning_rate": 2.742748195560398e-05, "loss": 4.171, "step": 17075 }, { "epoch": 0.45895472255810826, "grad_norm": 2.1085197925567627, "learning_rate": 2.7420672749557402e-05, "loss": 4.1963, "step": 17080 }, { "epoch": 0.45908907698508666, "grad_norm": 2.6606881618499756, "learning_rate": 2.7413863543510828e-05, "loss": 4.251, "step": 17085 }, { "epoch": 0.459223431412065, "grad_norm": 2.4329206943511963, "learning_rate": 2.7407054337464257e-05, "loss": 4.1332, "step": 17090 }, { "epoch": 0.4593577858390434, "grad_norm": 2.483705997467041, "learning_rate": 2.740024513141768e-05, "loss": 4.1251, "step": 17095 }, { "epoch": 0.45949214026602175, "grad_norm": 2.378059148788452, "learning_rate": 2.73934359253711e-05, "loss": 4.1439, "step": 17100 }, { "epoch": 0.45962649469300015, "grad_norm": 2.274507522583008, "learning_rate": 2.7386626719324527e-05, "loss": 4.2173, "step": 17105 }, { "epoch": 0.4597608491199785, "grad_norm": 2.444655179977417, "learning_rate": 2.7379817513277956e-05, "loss": 4.2779, "step": 17110 }, { "epoch": 0.4598952035469569, "grad_norm": 2.2635371685028076, "learning_rate": 2.7373008307231378e-05, "loss": 4.2555, "step": 17115 }, { "epoch": 0.46002955797393524, "grad_norm": 2.609412908554077, "learning_rate": 2.7366199101184804e-05, "loss": 4.4117, "step": 17120 }, { "epoch": 0.4601639124009136, "grad_norm": 2.393961191177368, "learning_rate": 2.7359389895138226e-05, "loss": 4.327, "step": 17125 }, { "epoch": 0.460298266827892, "grad_norm": 2.4917209148406982, "learning_rate": 2.7352580689091655e-05, "loss": 4.2426, "step": 17130 }, { "epoch": 0.46043262125487033, "grad_norm": 2.5080511569976807, "learning_rate": 2.734577148304508e-05, "loss": 4.1369, "step": 17135 }, { "epoch": 0.46056697568184873, "grad_norm": 2.3354811668395996, "learning_rate": 2.7338962276998503e-05, "loss": 4.166, "step": 17140 }, { "epoch": 0.4607013301088271, "grad_norm": 2.3345820903778076, "learning_rate": 2.7332153070951925e-05, "loss": 4.2162, "step": 17145 }, { "epoch": 0.4608356845358055, "grad_norm": 2.4131319522857666, "learning_rate": 2.732534386490535e-05, "loss": 4.2319, "step": 17150 }, { "epoch": 0.4609700389627838, "grad_norm": 2.32200026512146, "learning_rate": 2.731853465885878e-05, "loss": 4.2485, "step": 17155 }, { "epoch": 0.46110439338976217, "grad_norm": 2.574925422668457, "learning_rate": 2.7311725452812202e-05, "loss": 4.2692, "step": 17160 }, { "epoch": 0.46123874781674057, "grad_norm": 2.3081295490264893, "learning_rate": 2.7304916246765628e-05, "loss": 4.349, "step": 17165 }, { "epoch": 0.4613731022437189, "grad_norm": 2.494278907775879, "learning_rate": 2.729810704071905e-05, "loss": 4.2664, "step": 17170 }, { "epoch": 0.4615074566706973, "grad_norm": 2.282325029373169, "learning_rate": 2.729129783467248e-05, "loss": 4.2515, "step": 17175 }, { "epoch": 0.46164181109767566, "grad_norm": 2.6024065017700195, "learning_rate": 2.7284488628625905e-05, "loss": 4.1557, "step": 17180 }, { "epoch": 0.46177616552465406, "grad_norm": 2.3965132236480713, "learning_rate": 2.7277679422579327e-05, "loss": 4.3227, "step": 17185 }, { "epoch": 0.4619105199516324, "grad_norm": 2.4947686195373535, "learning_rate": 2.7270870216532753e-05, "loss": 4.2534, "step": 17190 }, { "epoch": 0.46204487437861075, "grad_norm": 2.54958438873291, "learning_rate": 2.7264061010486182e-05, "loss": 4.2254, "step": 17195 }, { "epoch": 0.46217922880558915, "grad_norm": 2.666404962539673, "learning_rate": 2.7257251804439604e-05, "loss": 4.0959, "step": 17200 }, { "epoch": 0.4623135832325675, "grad_norm": 2.3037772178649902, "learning_rate": 2.7250442598393026e-05, "loss": 4.0639, "step": 17205 }, { "epoch": 0.4624479376595459, "grad_norm": 2.432457685470581, "learning_rate": 2.7243633392346452e-05, "loss": 4.3129, "step": 17210 }, { "epoch": 0.46258229208652424, "grad_norm": 2.548823833465576, "learning_rate": 2.723682418629988e-05, "loss": 4.2745, "step": 17215 }, { "epoch": 0.46271664651350264, "grad_norm": 2.524909734725952, "learning_rate": 2.7230014980253303e-05, "loss": 4.2576, "step": 17220 }, { "epoch": 0.462851000940481, "grad_norm": 2.4421544075012207, "learning_rate": 2.722320577420673e-05, "loss": 4.2409, "step": 17225 }, { "epoch": 0.4629853553674594, "grad_norm": 2.226337432861328, "learning_rate": 2.721639656816015e-05, "loss": 4.1865, "step": 17230 }, { "epoch": 0.46311970979443773, "grad_norm": 2.6972339153289795, "learning_rate": 2.720958736211358e-05, "loss": 4.2183, "step": 17235 }, { "epoch": 0.4632540642214161, "grad_norm": 2.2679033279418945, "learning_rate": 2.7202778156067006e-05, "loss": 4.2592, "step": 17240 }, { "epoch": 0.4633884186483945, "grad_norm": 2.443800210952759, "learning_rate": 2.7195968950020428e-05, "loss": 4.2271, "step": 17245 }, { "epoch": 0.4635227730753728, "grad_norm": 2.4577763080596924, "learning_rate": 2.718915974397385e-05, "loss": 4.2984, "step": 17250 }, { "epoch": 0.4636571275023512, "grad_norm": 2.6686177253723145, "learning_rate": 2.7182350537927283e-05, "loss": 4.2698, "step": 17255 }, { "epoch": 0.46379148192932956, "grad_norm": 2.4595165252685547, "learning_rate": 2.7175541331880705e-05, "loss": 4.1804, "step": 17260 }, { "epoch": 0.46392583635630796, "grad_norm": 2.492065668106079, "learning_rate": 2.7168732125834128e-05, "loss": 4.1254, "step": 17265 }, { "epoch": 0.4640601907832863, "grad_norm": 2.230637311935425, "learning_rate": 2.7161922919787553e-05, "loss": 4.3272, "step": 17270 }, { "epoch": 0.46419454521026465, "grad_norm": 2.575864553451538, "learning_rate": 2.7155113713740982e-05, "loss": 4.0523, "step": 17275 }, { "epoch": 0.46432889963724305, "grad_norm": 2.1774120330810547, "learning_rate": 2.7148304507694405e-05, "loss": 4.2356, "step": 17280 }, { "epoch": 0.4644632540642214, "grad_norm": 2.5381484031677246, "learning_rate": 2.714149530164783e-05, "loss": 4.3091, "step": 17285 }, { "epoch": 0.4645976084911998, "grad_norm": 2.4964675903320312, "learning_rate": 2.7134686095601252e-05, "loss": 4.2254, "step": 17290 }, { "epoch": 0.46473196291817814, "grad_norm": 2.316100835800171, "learning_rate": 2.712787688955468e-05, "loss": 4.1697, "step": 17295 }, { "epoch": 0.46486631734515654, "grad_norm": 2.543409585952759, "learning_rate": 2.7121067683508107e-05, "loss": 4.4318, "step": 17300 }, { "epoch": 0.4650006717721349, "grad_norm": 2.597555637359619, "learning_rate": 2.711425847746153e-05, "loss": 4.2439, "step": 17305 }, { "epoch": 0.4651350261991133, "grad_norm": 2.7488832473754883, "learning_rate": 2.710744927141495e-05, "loss": 4.313, "step": 17310 }, { "epoch": 0.46526938062609163, "grad_norm": 2.3650803565979004, "learning_rate": 2.7100640065368377e-05, "loss": 4.1937, "step": 17315 }, { "epoch": 0.46540373505307, "grad_norm": 2.3278439044952393, "learning_rate": 2.7093830859321806e-05, "loss": 4.2019, "step": 17320 }, { "epoch": 0.4655380894800484, "grad_norm": 2.516564130783081, "learning_rate": 2.708702165327523e-05, "loss": 4.0634, "step": 17325 }, { "epoch": 0.4656724439070267, "grad_norm": 2.408878803253174, "learning_rate": 2.7080212447228654e-05, "loss": 4.1538, "step": 17330 }, { "epoch": 0.4658067983340051, "grad_norm": 2.5517773628234863, "learning_rate": 2.7073403241182077e-05, "loss": 4.1877, "step": 17335 }, { "epoch": 0.46594115276098347, "grad_norm": 2.3522565364837646, "learning_rate": 2.7066594035135506e-05, "loss": 4.215, "step": 17340 }, { "epoch": 0.46607550718796187, "grad_norm": 2.408596992492676, "learning_rate": 2.705978482908893e-05, "loss": 4.2429, "step": 17345 }, { "epoch": 0.4662098616149402, "grad_norm": 2.480827808380127, "learning_rate": 2.7052975623042354e-05, "loss": 4.2463, "step": 17350 }, { "epoch": 0.46634421604191856, "grad_norm": 2.461484432220459, "learning_rate": 2.7046166416995776e-05, "loss": 4.2163, "step": 17355 }, { "epoch": 0.46647857046889696, "grad_norm": 2.4052164554595947, "learning_rate": 2.7039357210949208e-05, "loss": 4.2415, "step": 17360 }, { "epoch": 0.4666129248958753, "grad_norm": 2.864283323287964, "learning_rate": 2.703254800490263e-05, "loss": 4.2927, "step": 17365 }, { "epoch": 0.4667472793228537, "grad_norm": 2.6016738414764404, "learning_rate": 2.7025738798856053e-05, "loss": 4.2187, "step": 17370 }, { "epoch": 0.46688163374983205, "grad_norm": 2.4745779037475586, "learning_rate": 2.701892959280948e-05, "loss": 4.2827, "step": 17375 }, { "epoch": 0.46701598817681045, "grad_norm": 2.3813042640686035, "learning_rate": 2.7012120386762907e-05, "loss": 4.2336, "step": 17380 }, { "epoch": 0.4671503426037888, "grad_norm": 2.422985792160034, "learning_rate": 2.700531118071633e-05, "loss": 4.2419, "step": 17385 }, { "epoch": 0.46728469703076714, "grad_norm": 2.4753432273864746, "learning_rate": 2.6998501974669755e-05, "loss": 4.0826, "step": 17390 }, { "epoch": 0.46741905145774554, "grad_norm": 2.1041274070739746, "learning_rate": 2.6991692768623178e-05, "loss": 4.191, "step": 17395 }, { "epoch": 0.4675534058847239, "grad_norm": 2.290698528289795, "learning_rate": 2.6984883562576607e-05, "loss": 4.1772, "step": 17400 }, { "epoch": 0.4676877603117023, "grad_norm": 2.7109265327453613, "learning_rate": 2.6978074356530032e-05, "loss": 4.1148, "step": 17405 }, { "epoch": 0.46782211473868063, "grad_norm": 2.367698907852173, "learning_rate": 2.6971265150483455e-05, "loss": 4.0795, "step": 17410 }, { "epoch": 0.46795646916565903, "grad_norm": 2.3059141635894775, "learning_rate": 2.6964455944436877e-05, "loss": 4.1232, "step": 17415 }, { "epoch": 0.4680908235926374, "grad_norm": 2.4251043796539307, "learning_rate": 2.6957646738390306e-05, "loss": 4.1203, "step": 17420 }, { "epoch": 0.4682251780196158, "grad_norm": 2.641751766204834, "learning_rate": 2.695083753234373e-05, "loss": 4.3078, "step": 17425 }, { "epoch": 0.4683595324465941, "grad_norm": 2.50394344329834, "learning_rate": 2.6944028326297154e-05, "loss": 4.1737, "step": 17430 }, { "epoch": 0.46849388687357246, "grad_norm": 2.2386152744293213, "learning_rate": 2.693721912025058e-05, "loss": 4.2443, "step": 17435 }, { "epoch": 0.46862824130055086, "grad_norm": 2.272341251373291, "learning_rate": 2.693040991420401e-05, "loss": 4.2314, "step": 17440 }, { "epoch": 0.4687625957275292, "grad_norm": 2.4722914695739746, "learning_rate": 2.692360070815743e-05, "loss": 4.2652, "step": 17445 }, { "epoch": 0.4688969501545076, "grad_norm": 2.5747063159942627, "learning_rate": 2.6916791502110857e-05, "loss": 4.2788, "step": 17450 }, { "epoch": 0.46903130458148595, "grad_norm": 2.4937143325805664, "learning_rate": 2.690998229606428e-05, "loss": 4.2571, "step": 17455 }, { "epoch": 0.46916565900846435, "grad_norm": 2.3794162273406982, "learning_rate": 2.69031730900177e-05, "loss": 4.2946, "step": 17460 }, { "epoch": 0.4693000134354427, "grad_norm": 2.4419944286346436, "learning_rate": 2.6896363883971133e-05, "loss": 4.1737, "step": 17465 }, { "epoch": 0.46943436786242104, "grad_norm": 2.308689594268799, "learning_rate": 2.6889554677924556e-05, "loss": 4.144, "step": 17470 }, { "epoch": 0.46956872228939944, "grad_norm": 2.5754618644714355, "learning_rate": 2.6882745471877978e-05, "loss": 4.1665, "step": 17475 }, { "epoch": 0.4697030767163778, "grad_norm": 2.61972975730896, "learning_rate": 2.6875936265831404e-05, "loss": 4.1958, "step": 17480 }, { "epoch": 0.4698374311433562, "grad_norm": 2.571610450744629, "learning_rate": 2.6869127059784833e-05, "loss": 4.2231, "step": 17485 }, { "epoch": 0.46997178557033453, "grad_norm": 2.424769163131714, "learning_rate": 2.6862317853738255e-05, "loss": 4.3764, "step": 17490 }, { "epoch": 0.47010613999731293, "grad_norm": 2.2183611392974854, "learning_rate": 2.685550864769168e-05, "loss": 4.2899, "step": 17495 }, { "epoch": 0.4702404944242913, "grad_norm": 2.213998317718506, "learning_rate": 2.6848699441645103e-05, "loss": 4.1956, "step": 17500 }, { "epoch": 0.4703748488512696, "grad_norm": 2.266749858856201, "learning_rate": 2.6841890235598532e-05, "loss": 4.2263, "step": 17505 }, { "epoch": 0.470509203278248, "grad_norm": 2.4420711994171143, "learning_rate": 2.6835081029551958e-05, "loss": 4.2332, "step": 17510 }, { "epoch": 0.47064355770522637, "grad_norm": 2.535036325454712, "learning_rate": 2.682827182350538e-05, "loss": 4.2994, "step": 17515 }, { "epoch": 0.47077791213220477, "grad_norm": 2.3297159671783447, "learning_rate": 2.6821462617458802e-05, "loss": 4.1712, "step": 17520 }, { "epoch": 0.4709122665591831, "grad_norm": 2.4511682987213135, "learning_rate": 2.681465341141223e-05, "loss": 4.2028, "step": 17525 }, { "epoch": 0.4710466209861615, "grad_norm": 2.225996732711792, "learning_rate": 2.6807844205365657e-05, "loss": 4.145, "step": 17530 }, { "epoch": 0.47118097541313986, "grad_norm": 2.2444803714752197, "learning_rate": 2.680103499931908e-05, "loss": 4.1656, "step": 17535 }, { "epoch": 0.47131532984011826, "grad_norm": 2.502364158630371, "learning_rate": 2.6794225793272505e-05, "loss": 4.1595, "step": 17540 }, { "epoch": 0.4714496842670966, "grad_norm": 2.463336229324341, "learning_rate": 2.6787416587225934e-05, "loss": 4.2751, "step": 17545 }, { "epoch": 0.47158403869407495, "grad_norm": 2.412121295928955, "learning_rate": 2.6780607381179356e-05, "loss": 4.181, "step": 17550 }, { "epoch": 0.47171839312105335, "grad_norm": 2.612711191177368, "learning_rate": 2.6773798175132782e-05, "loss": 4.1914, "step": 17555 }, { "epoch": 0.4718527475480317, "grad_norm": 2.5185494422912598, "learning_rate": 2.6766988969086204e-05, "loss": 4.1948, "step": 17560 }, { "epoch": 0.4719871019750101, "grad_norm": 2.2122249603271484, "learning_rate": 2.6760179763039633e-05, "loss": 4.2405, "step": 17565 }, { "epoch": 0.47212145640198844, "grad_norm": 2.373488187789917, "learning_rate": 2.6753370556993055e-05, "loss": 4.1455, "step": 17570 }, { "epoch": 0.47225581082896684, "grad_norm": 2.4546902179718018, "learning_rate": 2.674656135094648e-05, "loss": 4.1075, "step": 17575 }, { "epoch": 0.4723901652559452, "grad_norm": 2.311859130859375, "learning_rate": 2.6739752144899903e-05, "loss": 4.1556, "step": 17580 }, { "epoch": 0.4725245196829235, "grad_norm": 2.518477201461792, "learning_rate": 2.6732942938853332e-05, "loss": 4.0893, "step": 17585 }, { "epoch": 0.4726588741099019, "grad_norm": 2.4475951194763184, "learning_rate": 2.6726133732806758e-05, "loss": 4.2612, "step": 17590 }, { "epoch": 0.4727932285368803, "grad_norm": 2.544950246810913, "learning_rate": 2.671932452676018e-05, "loss": 4.1614, "step": 17595 }, { "epoch": 0.4729275829638587, "grad_norm": 2.3136157989501953, "learning_rate": 2.6712515320713606e-05, "loss": 4.2227, "step": 17600 }, { "epoch": 0.473061937390837, "grad_norm": 2.471916437149048, "learning_rate": 2.6705706114667028e-05, "loss": 4.2485, "step": 17605 }, { "epoch": 0.4731962918178154, "grad_norm": 2.614469289779663, "learning_rate": 2.6698896908620457e-05, "loss": 4.1375, "step": 17610 }, { "epoch": 0.47333064624479376, "grad_norm": 2.3315632343292236, "learning_rate": 2.6692087702573883e-05, "loss": 4.2119, "step": 17615 }, { "epoch": 0.47346500067177216, "grad_norm": 2.298759937286377, "learning_rate": 2.6685278496527305e-05, "loss": 4.2169, "step": 17620 }, { "epoch": 0.4735993550987505, "grad_norm": 2.42462158203125, "learning_rate": 2.6678469290480727e-05, "loss": 4.1998, "step": 17625 }, { "epoch": 0.47373370952572885, "grad_norm": 2.3904147148132324, "learning_rate": 2.6671660084434156e-05, "loss": 4.1437, "step": 17630 }, { "epoch": 0.47386806395270725, "grad_norm": 2.4099156856536865, "learning_rate": 2.6664850878387582e-05, "loss": 4.234, "step": 17635 }, { "epoch": 0.4740024183796856, "grad_norm": 2.592886447906494, "learning_rate": 2.6658041672341004e-05, "loss": 4.1939, "step": 17640 }, { "epoch": 0.474136772806664, "grad_norm": 2.1890766620635986, "learning_rate": 2.665123246629443e-05, "loss": 4.3098, "step": 17645 }, { "epoch": 0.47427112723364234, "grad_norm": 2.3893442153930664, "learning_rate": 2.664442326024786e-05, "loss": 4.2674, "step": 17650 }, { "epoch": 0.47440548166062074, "grad_norm": 2.4617128372192383, "learning_rate": 2.663761405420128e-05, "loss": 4.1032, "step": 17655 }, { "epoch": 0.4745398360875991, "grad_norm": 2.385512590408325, "learning_rate": 2.6630804848154707e-05, "loss": 4.2033, "step": 17660 }, { "epoch": 0.47467419051457743, "grad_norm": 2.579432487487793, "learning_rate": 2.662399564210813e-05, "loss": 4.1823, "step": 17665 }, { "epoch": 0.47480854494155583, "grad_norm": 2.691681385040283, "learning_rate": 2.661718643606156e-05, "loss": 4.2908, "step": 17670 }, { "epoch": 0.4749428993685342, "grad_norm": 2.675417423248291, "learning_rate": 2.661037723001498e-05, "loss": 4.3303, "step": 17675 }, { "epoch": 0.4750772537955126, "grad_norm": 2.2550888061523438, "learning_rate": 2.6603568023968406e-05, "loss": 4.2557, "step": 17680 }, { "epoch": 0.4752116082224909, "grad_norm": 2.2960779666900635, "learning_rate": 2.659675881792183e-05, "loss": 4.2482, "step": 17685 }, { "epoch": 0.4753459626494693, "grad_norm": 2.44120192527771, "learning_rate": 2.6589949611875258e-05, "loss": 4.3297, "step": 17690 }, { "epoch": 0.47548031707644767, "grad_norm": 2.276158332824707, "learning_rate": 2.6583140405828683e-05, "loss": 4.1763, "step": 17695 }, { "epoch": 0.475614671503426, "grad_norm": 2.441298007965088, "learning_rate": 2.6576331199782106e-05, "loss": 4.288, "step": 17700 }, { "epoch": 0.4757490259304044, "grad_norm": 2.1532068252563477, "learning_rate": 2.656952199373553e-05, "loss": 4.2026, "step": 17705 }, { "epoch": 0.47588338035738276, "grad_norm": 2.3710954189300537, "learning_rate": 2.656271278768896e-05, "loss": 4.235, "step": 17710 }, { "epoch": 0.47601773478436116, "grad_norm": 2.4574921131134033, "learning_rate": 2.6555903581642382e-05, "loss": 4.1355, "step": 17715 }, { "epoch": 0.4761520892113395, "grad_norm": 2.499556303024292, "learning_rate": 2.6549094375595808e-05, "loss": 4.2417, "step": 17720 }, { "epoch": 0.4762864436383179, "grad_norm": 2.3928616046905518, "learning_rate": 2.654228516954923e-05, "loss": 4.1453, "step": 17725 }, { "epoch": 0.47642079806529625, "grad_norm": 2.384659767150879, "learning_rate": 2.653547596350266e-05, "loss": 4.2524, "step": 17730 }, { "epoch": 0.47655515249227465, "grad_norm": 2.28247332572937, "learning_rate": 2.6528666757456082e-05, "loss": 4.0807, "step": 17735 }, { "epoch": 0.476689506919253, "grad_norm": 2.531473398208618, "learning_rate": 2.6521857551409507e-05, "loss": 4.2648, "step": 17740 }, { "epoch": 0.47682386134623134, "grad_norm": 2.4178764820098877, "learning_rate": 2.651504834536293e-05, "loss": 4.2422, "step": 17745 }, { "epoch": 0.47695821577320974, "grad_norm": 2.4847092628479004, "learning_rate": 2.6508239139316355e-05, "loss": 4.1256, "step": 17750 }, { "epoch": 0.4770925702001881, "grad_norm": 2.4517786502838135, "learning_rate": 2.6501429933269784e-05, "loss": 4.218, "step": 17755 }, { "epoch": 0.4772269246271665, "grad_norm": 2.4654595851898193, "learning_rate": 2.6494620727223207e-05, "loss": 4.1453, "step": 17760 }, { "epoch": 0.4773612790541448, "grad_norm": 2.3986763954162598, "learning_rate": 2.6487811521176632e-05, "loss": 4.1833, "step": 17765 }, { "epoch": 0.4774956334811232, "grad_norm": 2.4019174575805664, "learning_rate": 2.6481002315130055e-05, "loss": 4.2353, "step": 17770 }, { "epoch": 0.47762998790810157, "grad_norm": 2.45977783203125, "learning_rate": 2.6474193109083484e-05, "loss": 4.126, "step": 17775 }, { "epoch": 0.4777643423350799, "grad_norm": 2.6667075157165527, "learning_rate": 2.6467383903036906e-05, "loss": 4.1043, "step": 17780 }, { "epoch": 0.4778986967620583, "grad_norm": 2.2129361629486084, "learning_rate": 2.646057469699033e-05, "loss": 4.186, "step": 17785 }, { "epoch": 0.47803305118903666, "grad_norm": 2.371061325073242, "learning_rate": 2.6453765490943754e-05, "loss": 4.3292, "step": 17790 }, { "epoch": 0.47816740561601506, "grad_norm": 2.7223236560821533, "learning_rate": 2.6446956284897183e-05, "loss": 4.1094, "step": 17795 }, { "epoch": 0.4783017600429934, "grad_norm": 2.4327235221862793, "learning_rate": 2.644014707885061e-05, "loss": 4.1131, "step": 17800 }, { "epoch": 0.4784361144699718, "grad_norm": 2.59607195854187, "learning_rate": 2.643333787280403e-05, "loss": 4.1903, "step": 17805 }, { "epoch": 0.47857046889695015, "grad_norm": 2.2929842472076416, "learning_rate": 2.6426528666757456e-05, "loss": 4.1537, "step": 17810 }, { "epoch": 0.4787048233239285, "grad_norm": 2.3058764934539795, "learning_rate": 2.6419719460710885e-05, "loss": 4.1598, "step": 17815 }, { "epoch": 0.4788391777509069, "grad_norm": 2.4818291664123535, "learning_rate": 2.6412910254664308e-05, "loss": 4.3039, "step": 17820 }, { "epoch": 0.47897353217788524, "grad_norm": 2.560182809829712, "learning_rate": 2.6406101048617733e-05, "loss": 4.2255, "step": 17825 }, { "epoch": 0.47910788660486364, "grad_norm": 2.327904462814331, "learning_rate": 2.6399291842571156e-05, "loss": 4.2429, "step": 17830 }, { "epoch": 0.479242241031842, "grad_norm": 2.2578577995300293, "learning_rate": 2.6392482636524585e-05, "loss": 4.3041, "step": 17835 }, { "epoch": 0.4793765954588204, "grad_norm": 2.7132349014282227, "learning_rate": 2.6385673430478007e-05, "loss": 4.1051, "step": 17840 }, { "epoch": 0.47951094988579873, "grad_norm": 2.4492204189300537, "learning_rate": 2.6378864224431433e-05, "loss": 4.2183, "step": 17845 }, { "epoch": 0.47964530431277713, "grad_norm": 2.81508207321167, "learning_rate": 2.6372055018384855e-05, "loss": 4.1398, "step": 17850 }, { "epoch": 0.4797796587397555, "grad_norm": 2.25669002532959, "learning_rate": 2.6365245812338284e-05, "loss": 4.2405, "step": 17855 }, { "epoch": 0.4799140131667338, "grad_norm": 2.654933452606201, "learning_rate": 2.635843660629171e-05, "loss": 4.2503, "step": 17860 }, { "epoch": 0.4800483675937122, "grad_norm": 2.435884714126587, "learning_rate": 2.6351627400245132e-05, "loss": 4.1012, "step": 17865 }, { "epoch": 0.48018272202069057, "grad_norm": 2.656956672668457, "learning_rate": 2.6344818194198558e-05, "loss": 4.2217, "step": 17870 }, { "epoch": 0.48031707644766897, "grad_norm": 2.3096468448638916, "learning_rate": 2.6338008988151987e-05, "loss": 4.1609, "step": 17875 }, { "epoch": 0.4804514308746473, "grad_norm": 2.297157049179077, "learning_rate": 2.633119978210541e-05, "loss": 4.117, "step": 17880 }, { "epoch": 0.4805857853016257, "grad_norm": 2.4415862560272217, "learning_rate": 2.632439057605883e-05, "loss": 4.2962, "step": 17885 }, { "epoch": 0.48072013972860406, "grad_norm": 2.620678186416626, "learning_rate": 2.6317581370012257e-05, "loss": 4.2288, "step": 17890 }, { "epoch": 0.4808544941555824, "grad_norm": 2.480482578277588, "learning_rate": 2.631077216396568e-05, "loss": 4.1715, "step": 17895 }, { "epoch": 0.4809888485825608, "grad_norm": 2.3759844303131104, "learning_rate": 2.6303962957919108e-05, "loss": 4.2503, "step": 17900 }, { "epoch": 0.48112320300953915, "grad_norm": 2.4821155071258545, "learning_rate": 2.6297153751872534e-05, "loss": 4.1572, "step": 17905 }, { "epoch": 0.48125755743651755, "grad_norm": 2.5098817348480225, "learning_rate": 2.6290344545825956e-05, "loss": 4.1967, "step": 17910 }, { "epoch": 0.4813919118634959, "grad_norm": 2.459284782409668, "learning_rate": 2.628353533977938e-05, "loss": 4.1701, "step": 17915 }, { "epoch": 0.4815262662904743, "grad_norm": 2.27101469039917, "learning_rate": 2.627672613373281e-05, "loss": 4.1538, "step": 17920 }, { "epoch": 0.48166062071745264, "grad_norm": 2.3389089107513428, "learning_rate": 2.6269916927686233e-05, "loss": 4.1438, "step": 17925 }, { "epoch": 0.481794975144431, "grad_norm": 2.3425369262695312, "learning_rate": 2.626310772163966e-05, "loss": 4.1803, "step": 17930 }, { "epoch": 0.4819293295714094, "grad_norm": 2.4553651809692383, "learning_rate": 2.625629851559308e-05, "loss": 4.233, "step": 17935 }, { "epoch": 0.4820636839983877, "grad_norm": 2.6110517978668213, "learning_rate": 2.624948930954651e-05, "loss": 4.2309, "step": 17940 }, { "epoch": 0.4821980384253661, "grad_norm": 2.1823177337646484, "learning_rate": 2.6242680103499932e-05, "loss": 4.1027, "step": 17945 }, { "epoch": 0.48233239285234447, "grad_norm": 2.272648811340332, "learning_rate": 2.6235870897453358e-05, "loss": 4.2132, "step": 17950 }, { "epoch": 0.48246674727932287, "grad_norm": 2.4722824096679688, "learning_rate": 2.622906169140678e-05, "loss": 4.0944, "step": 17955 }, { "epoch": 0.4826011017063012, "grad_norm": 2.638125419616699, "learning_rate": 2.622225248536021e-05, "loss": 4.0836, "step": 17960 }, { "epoch": 0.4827354561332796, "grad_norm": 2.454495668411255, "learning_rate": 2.6215443279313635e-05, "loss": 4.2388, "step": 17965 }, { "epoch": 0.48286981056025796, "grad_norm": 2.429551362991333, "learning_rate": 2.6208634073267057e-05, "loss": 4.1451, "step": 17970 }, { "epoch": 0.4830041649872363, "grad_norm": 2.261261224746704, "learning_rate": 2.6201824867220483e-05, "loss": 4.0884, "step": 17975 }, { "epoch": 0.4831385194142147, "grad_norm": 2.5089619159698486, "learning_rate": 2.6195015661173912e-05, "loss": 4.2319, "step": 17980 }, { "epoch": 0.48327287384119305, "grad_norm": 2.2266359329223633, "learning_rate": 2.6188206455127334e-05, "loss": 4.1895, "step": 17985 }, { "epoch": 0.48340722826817145, "grad_norm": 2.4228975772857666, "learning_rate": 2.6181397249080756e-05, "loss": 4.302, "step": 17990 }, { "epoch": 0.4835415826951498, "grad_norm": 2.444434881210327, "learning_rate": 2.6174588043034182e-05, "loss": 4.132, "step": 17995 }, { "epoch": 0.4836759371221282, "grad_norm": 2.5518808364868164, "learning_rate": 2.616777883698761e-05, "loss": 4.3463, "step": 18000 }, { "epoch": 0.48381029154910654, "grad_norm": 2.2340524196624756, "learning_rate": 2.6160969630941033e-05, "loss": 4.1674, "step": 18005 }, { "epoch": 0.4839446459760849, "grad_norm": 2.3275156021118164, "learning_rate": 2.615416042489446e-05, "loss": 4.0952, "step": 18010 }, { "epoch": 0.4840790004030633, "grad_norm": 2.6701903343200684, "learning_rate": 2.614735121884788e-05, "loss": 4.1777, "step": 18015 }, { "epoch": 0.48421335483004163, "grad_norm": 2.301490545272827, "learning_rate": 2.614054201280131e-05, "loss": 4.23, "step": 18020 }, { "epoch": 0.48434770925702003, "grad_norm": 2.430833339691162, "learning_rate": 2.6133732806754736e-05, "loss": 4.28, "step": 18025 }, { "epoch": 0.4844820636839984, "grad_norm": 2.329726219177246, "learning_rate": 2.6126923600708158e-05, "loss": 4.1946, "step": 18030 }, { "epoch": 0.4846164181109768, "grad_norm": 2.191117525100708, "learning_rate": 2.6120114394661584e-05, "loss": 4.2334, "step": 18035 }, { "epoch": 0.4847507725379551, "grad_norm": 2.4942314624786377, "learning_rate": 2.6113305188615006e-05, "loss": 4.2172, "step": 18040 }, { "epoch": 0.4848851269649335, "grad_norm": 2.5436015129089355, "learning_rate": 2.6106495982568435e-05, "loss": 4.2046, "step": 18045 }, { "epoch": 0.48501948139191187, "grad_norm": 2.5554635524749756, "learning_rate": 2.6099686776521857e-05, "loss": 4.1776, "step": 18050 }, { "epoch": 0.4851538358188902, "grad_norm": 2.2278881072998047, "learning_rate": 2.6092877570475283e-05, "loss": 4.2295, "step": 18055 }, { "epoch": 0.4852881902458686, "grad_norm": 2.4933924674987793, "learning_rate": 2.6086068364428705e-05, "loss": 4.1942, "step": 18060 }, { "epoch": 0.48542254467284696, "grad_norm": 2.309387683868408, "learning_rate": 2.6079259158382134e-05, "loss": 4.2468, "step": 18065 }, { "epoch": 0.48555689909982536, "grad_norm": 2.7151975631713867, "learning_rate": 2.607244995233556e-05, "loss": 4.0826, "step": 18070 }, { "epoch": 0.4856912535268037, "grad_norm": 2.4385697841644287, "learning_rate": 2.6065640746288982e-05, "loss": 4.2234, "step": 18075 }, { "epoch": 0.4858256079537821, "grad_norm": 2.408919095993042, "learning_rate": 2.6058831540242408e-05, "loss": 4.2883, "step": 18080 }, { "epoch": 0.48595996238076045, "grad_norm": 2.4114458560943604, "learning_rate": 2.6052022334195837e-05, "loss": 4.2383, "step": 18085 }, { "epoch": 0.4860943168077388, "grad_norm": 2.50626802444458, "learning_rate": 2.604521312814926e-05, "loss": 4.1921, "step": 18090 }, { "epoch": 0.4862286712347172, "grad_norm": 2.3679516315460205, "learning_rate": 2.603840392210268e-05, "loss": 4.1849, "step": 18095 }, { "epoch": 0.48636302566169554, "grad_norm": 2.5321896076202393, "learning_rate": 2.6031594716056107e-05, "loss": 4.2015, "step": 18100 }, { "epoch": 0.48649738008867394, "grad_norm": 2.3566226959228516, "learning_rate": 2.6024785510009536e-05, "loss": 4.3387, "step": 18105 }, { "epoch": 0.4866317345156523, "grad_norm": 2.444742441177368, "learning_rate": 2.601797630396296e-05, "loss": 4.2679, "step": 18110 }, { "epoch": 0.4867660889426307, "grad_norm": 2.2895700931549072, "learning_rate": 2.6011167097916384e-05, "loss": 4.2327, "step": 18115 }, { "epoch": 0.486900443369609, "grad_norm": 2.533116340637207, "learning_rate": 2.6004357891869807e-05, "loss": 4.1546, "step": 18120 }, { "epoch": 0.48703479779658737, "grad_norm": 2.1641852855682373, "learning_rate": 2.5997548685823236e-05, "loss": 4.299, "step": 18125 }, { "epoch": 0.48716915222356577, "grad_norm": 2.540921688079834, "learning_rate": 2.599073947977666e-05, "loss": 4.2227, "step": 18130 }, { "epoch": 0.4873035066505441, "grad_norm": 2.612032651901245, "learning_rate": 2.5983930273730083e-05, "loss": 4.2153, "step": 18135 }, { "epoch": 0.4874378610775225, "grad_norm": 2.280641555786133, "learning_rate": 2.597712106768351e-05, "loss": 4.2552, "step": 18140 }, { "epoch": 0.48757221550450086, "grad_norm": 2.409398078918457, "learning_rate": 2.5970311861636938e-05, "loss": 4.2235, "step": 18145 }, { "epoch": 0.48770656993147926, "grad_norm": 2.3794333934783936, "learning_rate": 2.596350265559036e-05, "loss": 4.0942, "step": 18150 }, { "epoch": 0.4878409243584576, "grad_norm": 2.6261370182037354, "learning_rate": 2.5956693449543783e-05, "loss": 4.1852, "step": 18155 }, { "epoch": 0.487975278785436, "grad_norm": 2.2404489517211914, "learning_rate": 2.594988424349721e-05, "loss": 4.2908, "step": 18160 }, { "epoch": 0.48810963321241435, "grad_norm": 2.5504374504089355, "learning_rate": 2.5943075037450637e-05, "loss": 4.2369, "step": 18165 }, { "epoch": 0.4882439876393927, "grad_norm": 2.4524526596069336, "learning_rate": 2.593626583140406e-05, "loss": 4.269, "step": 18170 }, { "epoch": 0.4883783420663711, "grad_norm": 2.5140442848205566, "learning_rate": 2.5929456625357485e-05, "loss": 4.1981, "step": 18175 }, { "epoch": 0.48851269649334944, "grad_norm": 2.481140613555908, "learning_rate": 2.5922647419310908e-05, "loss": 4.248, "step": 18180 }, { "epoch": 0.48864705092032784, "grad_norm": 2.419713020324707, "learning_rate": 2.5915838213264337e-05, "loss": 4.111, "step": 18185 }, { "epoch": 0.4887814053473062, "grad_norm": 2.344277858734131, "learning_rate": 2.5909029007217762e-05, "loss": 4.2457, "step": 18190 }, { "epoch": 0.4889157597742846, "grad_norm": 2.5366199016571045, "learning_rate": 2.5902219801171185e-05, "loss": 4.171, "step": 18195 }, { "epoch": 0.48905011420126293, "grad_norm": 2.462343692779541, "learning_rate": 2.5895410595124607e-05, "loss": 4.1888, "step": 18200 }, { "epoch": 0.4891844686282413, "grad_norm": 2.4411468505859375, "learning_rate": 2.5888601389078033e-05, "loss": 4.2335, "step": 18205 }, { "epoch": 0.4893188230552197, "grad_norm": 2.454390048980713, "learning_rate": 2.588179218303146e-05, "loss": 4.1776, "step": 18210 }, { "epoch": 0.489453177482198, "grad_norm": 2.463078022003174, "learning_rate": 2.5874982976984884e-05, "loss": 4.1615, "step": 18215 }, { "epoch": 0.4895875319091764, "grad_norm": 2.545379161834717, "learning_rate": 2.586817377093831e-05, "loss": 4.1149, "step": 18220 }, { "epoch": 0.48972188633615477, "grad_norm": 2.4900882244110107, "learning_rate": 2.5861364564891732e-05, "loss": 4.1357, "step": 18225 }, { "epoch": 0.48985624076313317, "grad_norm": 2.1092634201049805, "learning_rate": 2.585455535884516e-05, "loss": 4.2677, "step": 18230 }, { "epoch": 0.4899905951901115, "grad_norm": 2.2814910411834717, "learning_rate": 2.5847746152798586e-05, "loss": 4.1796, "step": 18235 }, { "epoch": 0.49012494961708986, "grad_norm": 2.356308937072754, "learning_rate": 2.584093694675201e-05, "loss": 4.2341, "step": 18240 }, { "epoch": 0.49025930404406826, "grad_norm": 2.57517409324646, "learning_rate": 2.5834127740705434e-05, "loss": 4.1961, "step": 18245 }, { "epoch": 0.4903936584710466, "grad_norm": 2.4986705780029297, "learning_rate": 2.5827318534658863e-05, "loss": 4.0699, "step": 18250 }, { "epoch": 0.490528012898025, "grad_norm": 2.3548741340637207, "learning_rate": 2.5820509328612286e-05, "loss": 4.1076, "step": 18255 }, { "epoch": 0.49066236732500335, "grad_norm": 2.4799814224243164, "learning_rate": 2.5813700122565708e-05, "loss": 4.2076, "step": 18260 }, { "epoch": 0.49079672175198175, "grad_norm": 2.2077834606170654, "learning_rate": 2.5806890916519134e-05, "loss": 4.281, "step": 18265 }, { "epoch": 0.4909310761789601, "grad_norm": 2.4273972511291504, "learning_rate": 2.5800081710472563e-05, "loss": 4.0183, "step": 18270 }, { "epoch": 0.4910654306059385, "grad_norm": 2.2849979400634766, "learning_rate": 2.5793272504425985e-05, "loss": 4.3167, "step": 18275 }, { "epoch": 0.49119978503291684, "grad_norm": 2.0790834426879883, "learning_rate": 2.578646329837941e-05, "loss": 4.048, "step": 18280 }, { "epoch": 0.4913341394598952, "grad_norm": 2.6466872692108154, "learning_rate": 2.5779654092332833e-05, "loss": 4.228, "step": 18285 }, { "epoch": 0.4914684938868736, "grad_norm": 2.357388734817505, "learning_rate": 2.5772844886286262e-05, "loss": 4.1201, "step": 18290 }, { "epoch": 0.4916028483138519, "grad_norm": 2.4313485622406006, "learning_rate": 2.5766035680239688e-05, "loss": 4.035, "step": 18295 }, { "epoch": 0.4917372027408303, "grad_norm": 2.523571014404297, "learning_rate": 2.575922647419311e-05, "loss": 4.2451, "step": 18300 }, { "epoch": 0.49187155716780867, "grad_norm": 2.2592031955718994, "learning_rate": 2.5752417268146532e-05, "loss": 4.2008, "step": 18305 }, { "epoch": 0.49200591159478707, "grad_norm": 2.4525113105773926, "learning_rate": 2.5745608062099965e-05, "loss": 4.288, "step": 18310 }, { "epoch": 0.4921402660217654, "grad_norm": 2.269056558609009, "learning_rate": 2.5738798856053387e-05, "loss": 4.2353, "step": 18315 }, { "epoch": 0.49227462044874376, "grad_norm": 2.251535654067993, "learning_rate": 2.573198965000681e-05, "loss": 4.3064, "step": 18320 }, { "epoch": 0.49240897487572216, "grad_norm": 2.343065023422241, "learning_rate": 2.5725180443960235e-05, "loss": 4.1948, "step": 18325 }, { "epoch": 0.4925433293027005, "grad_norm": 2.4005606174468994, "learning_rate": 2.5718371237913664e-05, "loss": 4.0632, "step": 18330 }, { "epoch": 0.4926776837296789, "grad_norm": 2.308690309524536, "learning_rate": 2.5711562031867086e-05, "loss": 4.1645, "step": 18335 }, { "epoch": 0.49281203815665725, "grad_norm": 2.5850331783294678, "learning_rate": 2.5704752825820512e-05, "loss": 4.2443, "step": 18340 }, { "epoch": 0.49294639258363565, "grad_norm": 2.6231513023376465, "learning_rate": 2.5697943619773934e-05, "loss": 4.2387, "step": 18345 }, { "epoch": 0.493080747010614, "grad_norm": 2.2974085807800293, "learning_rate": 2.569113441372736e-05, "loss": 4.2754, "step": 18350 }, { "epoch": 0.4932151014375924, "grad_norm": 2.4447906017303467, "learning_rate": 2.568432520768079e-05, "loss": 4.1533, "step": 18355 }, { "epoch": 0.49334945586457074, "grad_norm": 2.3164422512054443, "learning_rate": 2.567751600163421e-05, "loss": 4.1415, "step": 18360 }, { "epoch": 0.4934838102915491, "grad_norm": 2.508737087249756, "learning_rate": 2.5670706795587633e-05, "loss": 4.0885, "step": 18365 }, { "epoch": 0.4936181647185275, "grad_norm": 2.281087875366211, "learning_rate": 2.566389758954106e-05, "loss": 4.2904, "step": 18370 }, { "epoch": 0.49375251914550583, "grad_norm": 2.570521831512451, "learning_rate": 2.5657088383494488e-05, "loss": 4.2599, "step": 18375 }, { "epoch": 0.49388687357248423, "grad_norm": 2.4338788986206055, "learning_rate": 2.565027917744791e-05, "loss": 4.1128, "step": 18380 }, { "epoch": 0.4940212279994626, "grad_norm": 2.37184739112854, "learning_rate": 2.5643469971401336e-05, "loss": 4.2131, "step": 18385 }, { "epoch": 0.494155582426441, "grad_norm": 2.2635748386383057, "learning_rate": 2.5636660765354758e-05, "loss": 4.3039, "step": 18390 }, { "epoch": 0.4942899368534193, "grad_norm": 2.269552707672119, "learning_rate": 2.5629851559308187e-05, "loss": 4.2044, "step": 18395 }, { "epoch": 0.49442429128039767, "grad_norm": 2.489553451538086, "learning_rate": 2.5623042353261613e-05, "loss": 4.1601, "step": 18400 }, { "epoch": 0.49455864570737607, "grad_norm": 2.3646981716156006, "learning_rate": 2.5616233147215035e-05, "loss": 4.111, "step": 18405 }, { "epoch": 0.4946930001343544, "grad_norm": 2.3927223682403564, "learning_rate": 2.5609423941168457e-05, "loss": 4.0381, "step": 18410 }, { "epoch": 0.4948273545613328, "grad_norm": 2.395575761795044, "learning_rate": 2.560261473512189e-05, "loss": 4.253, "step": 18415 }, { "epoch": 0.49496170898831116, "grad_norm": 2.612895965576172, "learning_rate": 2.5595805529075312e-05, "loss": 4.2215, "step": 18420 }, { "epoch": 0.49509606341528956, "grad_norm": 2.5365970134735107, "learning_rate": 2.5588996323028734e-05, "loss": 4.1465, "step": 18425 }, { "epoch": 0.4952304178422679, "grad_norm": 2.2703559398651123, "learning_rate": 2.558218711698216e-05, "loss": 4.2273, "step": 18430 }, { "epoch": 0.49536477226924625, "grad_norm": 2.5235514640808105, "learning_rate": 2.557537791093559e-05, "loss": 4.2287, "step": 18435 }, { "epoch": 0.49549912669622465, "grad_norm": 2.7003815174102783, "learning_rate": 2.556856870488901e-05, "loss": 4.2661, "step": 18440 }, { "epoch": 0.495633481123203, "grad_norm": 2.1436195373535156, "learning_rate": 2.5561759498842437e-05, "loss": 4.1953, "step": 18445 }, { "epoch": 0.4957678355501814, "grad_norm": 2.4063124656677246, "learning_rate": 2.555495029279586e-05, "loss": 4.1223, "step": 18450 }, { "epoch": 0.49590218997715974, "grad_norm": 2.580117702484131, "learning_rate": 2.5548141086749288e-05, "loss": 4.1311, "step": 18455 }, { "epoch": 0.49603654440413814, "grad_norm": 2.4798243045806885, "learning_rate": 2.5541331880702714e-05, "loss": 4.338, "step": 18460 }, { "epoch": 0.4961708988311165, "grad_norm": 2.1716935634613037, "learning_rate": 2.5534522674656136e-05, "loss": 4.1286, "step": 18465 }, { "epoch": 0.4963052532580949, "grad_norm": 2.449183464050293, "learning_rate": 2.552771346860956e-05, "loss": 4.1662, "step": 18470 }, { "epoch": 0.4964396076850732, "grad_norm": 2.6888210773468018, "learning_rate": 2.5520904262562988e-05, "loss": 4.1947, "step": 18475 }, { "epoch": 0.49657396211205157, "grad_norm": 2.248119831085205, "learning_rate": 2.5514095056516413e-05, "loss": 4.0818, "step": 18480 }, { "epoch": 0.49670831653902997, "grad_norm": 2.6109721660614014, "learning_rate": 2.5507285850469835e-05, "loss": 4.0692, "step": 18485 }, { "epoch": 0.4968426709660083, "grad_norm": 2.658236503601074, "learning_rate": 2.550047664442326e-05, "loss": 4.2491, "step": 18490 }, { "epoch": 0.4969770253929867, "grad_norm": 2.3914830684661865, "learning_rate": 2.5493667438376683e-05, "loss": 4.1663, "step": 18495 }, { "epoch": 0.49711137981996506, "grad_norm": 2.273757219314575, "learning_rate": 2.5486858232330112e-05, "loss": 4.2471, "step": 18500 }, { "epoch": 0.49724573424694346, "grad_norm": 2.586331844329834, "learning_rate": 2.5480049026283538e-05, "loss": 4.0336, "step": 18505 }, { "epoch": 0.4973800886739218, "grad_norm": 2.536170244216919, "learning_rate": 2.547323982023696e-05, "loss": 4.2369, "step": 18510 }, { "epoch": 0.49751444310090015, "grad_norm": 2.3158907890319824, "learning_rate": 2.5466430614190383e-05, "loss": 4.2748, "step": 18515 }, { "epoch": 0.49764879752787855, "grad_norm": 2.5497050285339355, "learning_rate": 2.545962140814381e-05, "loss": 4.0997, "step": 18520 }, { "epoch": 0.4977831519548569, "grad_norm": 2.5585503578186035, "learning_rate": 2.5452812202097237e-05, "loss": 4.2408, "step": 18525 }, { "epoch": 0.4979175063818353, "grad_norm": 2.3359177112579346, "learning_rate": 2.544600299605066e-05, "loss": 4.1353, "step": 18530 }, { "epoch": 0.49805186080881364, "grad_norm": 2.433411121368408, "learning_rate": 2.5439193790004085e-05, "loss": 4.2434, "step": 18535 }, { "epoch": 0.49818621523579204, "grad_norm": 2.677987575531006, "learning_rate": 2.5432384583957514e-05, "loss": 4.2788, "step": 18540 }, { "epoch": 0.4983205696627704, "grad_norm": 2.7603280544281006, "learning_rate": 2.5425575377910937e-05, "loss": 4.1544, "step": 18545 }, { "epoch": 0.49845492408974873, "grad_norm": 2.78523588180542, "learning_rate": 2.5418766171864362e-05, "loss": 4.2633, "step": 18550 }, { "epoch": 0.49858927851672713, "grad_norm": 2.3146603107452393, "learning_rate": 2.5411956965817785e-05, "loss": 4.1985, "step": 18555 }, { "epoch": 0.4987236329437055, "grad_norm": 2.600654125213623, "learning_rate": 2.5405147759771214e-05, "loss": 4.2182, "step": 18560 }, { "epoch": 0.4988579873706839, "grad_norm": 2.6447434425354004, "learning_rate": 2.539833855372464e-05, "loss": 4.2255, "step": 18565 }, { "epoch": 0.4989923417976622, "grad_norm": 2.43923020362854, "learning_rate": 2.539152934767806e-05, "loss": 4.2077, "step": 18570 }, { "epoch": 0.4991266962246406, "grad_norm": 2.505922794342041, "learning_rate": 2.5384720141631484e-05, "loss": 4.2201, "step": 18575 }, { "epoch": 0.49926105065161897, "grad_norm": 2.757676362991333, "learning_rate": 2.5377910935584913e-05, "loss": 4.1282, "step": 18580 }, { "epoch": 0.49939540507859737, "grad_norm": 2.5847113132476807, "learning_rate": 2.537110172953834e-05, "loss": 4.2183, "step": 18585 }, { "epoch": 0.4995297595055757, "grad_norm": 2.4257121086120605, "learning_rate": 2.536429252349176e-05, "loss": 4.2264, "step": 18590 }, { "epoch": 0.49966411393255405, "grad_norm": 2.2203452587127686, "learning_rate": 2.5357483317445186e-05, "loss": 4.174, "step": 18595 }, { "epoch": 0.49979846835953246, "grad_norm": 2.455432415008545, "learning_rate": 2.5350674111398615e-05, "loss": 4.2194, "step": 18600 }, { "epoch": 0.4999328227865108, "grad_norm": 2.7966620922088623, "learning_rate": 2.5343864905352038e-05, "loss": 4.3535, "step": 18605 }, { "epoch": 0.5000671772134891, "grad_norm": 2.2308266162872314, "learning_rate": 2.5337055699305463e-05, "loss": 4.1993, "step": 18610 }, { "epoch": 0.5002015316404675, "grad_norm": 2.414926052093506, "learning_rate": 2.5330246493258886e-05, "loss": 4.2803, "step": 18615 }, { "epoch": 0.500335886067446, "grad_norm": 2.612109899520874, "learning_rate": 2.5323437287212315e-05, "loss": 4.1166, "step": 18620 }, { "epoch": 0.5004702404944243, "grad_norm": 2.3976144790649414, "learning_rate": 2.5316628081165737e-05, "loss": 4.1845, "step": 18625 }, { "epoch": 0.5006045949214026, "grad_norm": 2.6069955825805664, "learning_rate": 2.5309818875119163e-05, "loss": 4.3571, "step": 18630 }, { "epoch": 0.500738949348381, "grad_norm": 2.329179525375366, "learning_rate": 2.5303009669072585e-05, "loss": 4.3201, "step": 18635 }, { "epoch": 0.5008733037753594, "grad_norm": 2.615586042404175, "learning_rate": 2.529620046302601e-05, "loss": 4.2336, "step": 18640 }, { "epoch": 0.5010076582023377, "grad_norm": 2.347893714904785, "learning_rate": 2.528939125697944e-05, "loss": 4.0614, "step": 18645 }, { "epoch": 0.5011420126293161, "grad_norm": 2.32383131980896, "learning_rate": 2.5282582050932862e-05, "loss": 4.1241, "step": 18650 }, { "epoch": 0.5012763670562945, "grad_norm": 2.35555362701416, "learning_rate": 2.5275772844886287e-05, "loss": 4.3148, "step": 18655 }, { "epoch": 0.5014107214832729, "grad_norm": 2.240527391433716, "learning_rate": 2.526896363883971e-05, "loss": 4.1477, "step": 18660 }, { "epoch": 0.5015450759102512, "grad_norm": 2.566396951675415, "learning_rate": 2.526215443279314e-05, "loss": 4.2485, "step": 18665 }, { "epoch": 0.5016794303372296, "grad_norm": 2.304514169692993, "learning_rate": 2.5255345226746564e-05, "loss": 4.3448, "step": 18670 }, { "epoch": 0.501813784764208, "grad_norm": 2.4402577877044678, "learning_rate": 2.5248536020699987e-05, "loss": 4.1485, "step": 18675 }, { "epoch": 0.5019481391911863, "grad_norm": 2.29703688621521, "learning_rate": 2.524172681465341e-05, "loss": 4.2909, "step": 18680 }, { "epoch": 0.5020824936181647, "grad_norm": 2.25199294090271, "learning_rate": 2.5234917608606838e-05, "loss": 4.1789, "step": 18685 }, { "epoch": 0.5022168480451431, "grad_norm": 2.413407802581787, "learning_rate": 2.5228108402560264e-05, "loss": 4.2036, "step": 18690 }, { "epoch": 0.5023512024721215, "grad_norm": 2.395397901535034, "learning_rate": 2.5221299196513686e-05, "loss": 4.169, "step": 18695 }, { "epoch": 0.5024855568990998, "grad_norm": 2.479898691177368, "learning_rate": 2.521448999046711e-05, "loss": 4.175, "step": 18700 }, { "epoch": 0.5026199113260782, "grad_norm": 2.5617711544036865, "learning_rate": 2.520768078442054e-05, "loss": 4.2006, "step": 18705 }, { "epoch": 0.5027542657530566, "grad_norm": 2.299938440322876, "learning_rate": 2.5200871578373963e-05, "loss": 4.2693, "step": 18710 }, { "epoch": 0.5028886201800349, "grad_norm": 2.3865394592285156, "learning_rate": 2.519406237232739e-05, "loss": 4.026, "step": 18715 }, { "epoch": 0.5030229746070133, "grad_norm": 2.438220262527466, "learning_rate": 2.518725316628081e-05, "loss": 4.3007, "step": 18720 }, { "epoch": 0.5031573290339917, "grad_norm": 2.450645685195923, "learning_rate": 2.518044396023424e-05, "loss": 4.2856, "step": 18725 }, { "epoch": 0.5032916834609701, "grad_norm": 2.4057185649871826, "learning_rate": 2.5173634754187662e-05, "loss": 4.2396, "step": 18730 }, { "epoch": 0.5034260378879484, "grad_norm": 2.6239986419677734, "learning_rate": 2.5166825548141088e-05, "loss": 4.1156, "step": 18735 }, { "epoch": 0.5035603923149268, "grad_norm": 2.380948781967163, "learning_rate": 2.516001634209451e-05, "loss": 4.2586, "step": 18740 }, { "epoch": 0.5036947467419052, "grad_norm": 2.5175178050994873, "learning_rate": 2.515320713604794e-05, "loss": 4.2468, "step": 18745 }, { "epoch": 0.5038291011688835, "grad_norm": 2.7464826107025146, "learning_rate": 2.5146397930001365e-05, "loss": 4.1266, "step": 18750 }, { "epoch": 0.5039634555958619, "grad_norm": 2.3505687713623047, "learning_rate": 2.5139588723954787e-05, "loss": 4.2056, "step": 18755 }, { "epoch": 0.5040978100228403, "grad_norm": 2.288691759109497, "learning_rate": 2.5132779517908213e-05, "loss": 4.2492, "step": 18760 }, { "epoch": 0.5042321644498187, "grad_norm": 2.4170656204223633, "learning_rate": 2.5125970311861642e-05, "loss": 4.2391, "step": 18765 }, { "epoch": 0.504366518876797, "grad_norm": 2.63655686378479, "learning_rate": 2.5119161105815064e-05, "loss": 4.2176, "step": 18770 }, { "epoch": 0.5045008733037754, "grad_norm": 2.398902177810669, "learning_rate": 2.511235189976849e-05, "loss": 4.1487, "step": 18775 }, { "epoch": 0.5046352277307538, "grad_norm": 2.8057374954223633, "learning_rate": 2.5105542693721912e-05, "loss": 4.2147, "step": 18780 }, { "epoch": 0.504769582157732, "grad_norm": 2.477985143661499, "learning_rate": 2.5098733487675334e-05, "loss": 4.1204, "step": 18785 }, { "epoch": 0.5049039365847104, "grad_norm": 2.428417205810547, "learning_rate": 2.5091924281628763e-05, "loss": 4.2479, "step": 18790 }, { "epoch": 0.5050382910116888, "grad_norm": 2.491898536682129, "learning_rate": 2.508511507558219e-05, "loss": 4.1498, "step": 18795 }, { "epoch": 0.5051726454386672, "grad_norm": 2.533215045928955, "learning_rate": 2.507830586953561e-05, "loss": 4.0924, "step": 18800 }, { "epoch": 0.5053069998656455, "grad_norm": 2.603253126144409, "learning_rate": 2.5071496663489037e-05, "loss": 4.148, "step": 18805 }, { "epoch": 0.5054413542926239, "grad_norm": 2.3862900733947754, "learning_rate": 2.5064687457442466e-05, "loss": 4.1716, "step": 18810 }, { "epoch": 0.5055757087196023, "grad_norm": 2.451064109802246, "learning_rate": 2.5057878251395888e-05, "loss": 4.2225, "step": 18815 }, { "epoch": 0.5057100631465807, "grad_norm": 2.4568440914154053, "learning_rate": 2.5051069045349314e-05, "loss": 4.1678, "step": 18820 }, { "epoch": 0.505844417573559, "grad_norm": 2.6371846199035645, "learning_rate": 2.5044259839302736e-05, "loss": 4.121, "step": 18825 }, { "epoch": 0.5059787720005374, "grad_norm": 2.446119546890259, "learning_rate": 2.5037450633256165e-05, "loss": 4.2146, "step": 18830 }, { "epoch": 0.5061131264275158, "grad_norm": 2.512579917907715, "learning_rate": 2.5030641427209587e-05, "loss": 4.071, "step": 18835 }, { "epoch": 0.5062474808544941, "grad_norm": 2.5288357734680176, "learning_rate": 2.5023832221163013e-05, "loss": 4.2804, "step": 18840 }, { "epoch": 0.5063818352814725, "grad_norm": 2.5771141052246094, "learning_rate": 2.5017023015116435e-05, "loss": 4.1704, "step": 18845 }, { "epoch": 0.5065161897084509, "grad_norm": 2.3051512241363525, "learning_rate": 2.5010213809069864e-05, "loss": 4.1872, "step": 18850 }, { "epoch": 0.5066505441354293, "grad_norm": 2.387204885482788, "learning_rate": 2.500340460302329e-05, "loss": 4.1235, "step": 18855 }, { "epoch": 0.5067848985624076, "grad_norm": 2.467358112335205, "learning_rate": 2.4996595396976712e-05, "loss": 4.2603, "step": 18860 }, { "epoch": 0.506919252989386, "grad_norm": 2.3400490283966064, "learning_rate": 2.4989786190930138e-05, "loss": 4.0972, "step": 18865 }, { "epoch": 0.5070536074163644, "grad_norm": 2.454592227935791, "learning_rate": 2.4982976984883564e-05, "loss": 4.3898, "step": 18870 }, { "epoch": 0.5071879618433427, "grad_norm": 2.404630422592163, "learning_rate": 2.497616777883699e-05, "loss": 4.205, "step": 18875 }, { "epoch": 0.5073223162703211, "grad_norm": 2.2266414165496826, "learning_rate": 2.4969358572790415e-05, "loss": 4.2474, "step": 18880 }, { "epoch": 0.5074566706972995, "grad_norm": 2.4259536266326904, "learning_rate": 2.496254936674384e-05, "loss": 4.1152, "step": 18885 }, { "epoch": 0.5075910251242779, "grad_norm": 2.758829116821289, "learning_rate": 2.4955740160697263e-05, "loss": 4.2244, "step": 18890 }, { "epoch": 0.5077253795512562, "grad_norm": 2.3042891025543213, "learning_rate": 2.494893095465069e-05, "loss": 4.1847, "step": 18895 }, { "epoch": 0.5078597339782346, "grad_norm": 2.343984603881836, "learning_rate": 2.4942121748604114e-05, "loss": 4.084, "step": 18900 }, { "epoch": 0.507994088405213, "grad_norm": 2.493533134460449, "learning_rate": 2.493531254255754e-05, "loss": 4.1229, "step": 18905 }, { "epoch": 0.5081284428321913, "grad_norm": 2.4453001022338867, "learning_rate": 2.4928503336510962e-05, "loss": 4.2356, "step": 18910 }, { "epoch": 0.5082627972591697, "grad_norm": 2.4494309425354004, "learning_rate": 2.492169413046439e-05, "loss": 4.1724, "step": 18915 }, { "epoch": 0.5083971516861481, "grad_norm": 2.5660715103149414, "learning_rate": 2.4914884924417813e-05, "loss": 4.1469, "step": 18920 }, { "epoch": 0.5085315061131265, "grad_norm": 2.352365255355835, "learning_rate": 2.490807571837124e-05, "loss": 4.1633, "step": 18925 }, { "epoch": 0.5086658605401048, "grad_norm": 2.2898879051208496, "learning_rate": 2.4901266512324665e-05, "loss": 4.2138, "step": 18930 }, { "epoch": 0.5088002149670832, "grad_norm": 2.643263101577759, "learning_rate": 2.4894457306278087e-05, "loss": 4.1248, "step": 18935 }, { "epoch": 0.5089345693940616, "grad_norm": 2.4025118350982666, "learning_rate": 2.4887648100231513e-05, "loss": 4.2399, "step": 18940 }, { "epoch": 0.5090689238210399, "grad_norm": 2.0745902061462402, "learning_rate": 2.488083889418494e-05, "loss": 4.257, "step": 18945 }, { "epoch": 0.5092032782480183, "grad_norm": 2.488585948944092, "learning_rate": 2.4874029688138364e-05, "loss": 4.2229, "step": 18950 }, { "epoch": 0.5093376326749967, "grad_norm": 2.2216603755950928, "learning_rate": 2.486722048209179e-05, "loss": 4.2164, "step": 18955 }, { "epoch": 0.509471987101975, "grad_norm": 2.3368899822235107, "learning_rate": 2.4860411276045215e-05, "loss": 4.2855, "step": 18960 }, { "epoch": 0.5096063415289533, "grad_norm": 2.5822532176971436, "learning_rate": 2.4853602069998638e-05, "loss": 4.1867, "step": 18965 }, { "epoch": 0.5097406959559317, "grad_norm": 2.4659416675567627, "learning_rate": 2.4846792863952063e-05, "loss": 4.2239, "step": 18970 }, { "epoch": 0.5098750503829101, "grad_norm": 2.497880458831787, "learning_rate": 2.483998365790549e-05, "loss": 4.1616, "step": 18975 }, { "epoch": 0.5100094048098884, "grad_norm": 2.593658447265625, "learning_rate": 2.4833174451858915e-05, "loss": 4.1589, "step": 18980 }, { "epoch": 0.5101437592368668, "grad_norm": 2.472313404083252, "learning_rate": 2.482636524581234e-05, "loss": 4.2971, "step": 18985 }, { "epoch": 0.5102781136638452, "grad_norm": 2.56112003326416, "learning_rate": 2.4819556039765766e-05, "loss": 4.1221, "step": 18990 }, { "epoch": 0.5104124680908236, "grad_norm": 2.4737329483032227, "learning_rate": 2.4812746833719188e-05, "loss": 4.2118, "step": 18995 }, { "epoch": 0.5105468225178019, "grad_norm": 2.2520573139190674, "learning_rate": 2.4805937627672614e-05, "loss": 4.1486, "step": 19000 }, { "epoch": 0.5106811769447803, "grad_norm": 2.6526594161987305, "learning_rate": 2.479912842162604e-05, "loss": 4.1834, "step": 19005 }, { "epoch": 0.5108155313717587, "grad_norm": 2.4082844257354736, "learning_rate": 2.4792319215579465e-05, "loss": 4.2263, "step": 19010 }, { "epoch": 0.510949885798737, "grad_norm": 2.490719795227051, "learning_rate": 2.4785510009532887e-05, "loss": 4.1295, "step": 19015 }, { "epoch": 0.5110842402257154, "grad_norm": 2.6129188537597656, "learning_rate": 2.4778700803486316e-05, "loss": 4.1738, "step": 19020 }, { "epoch": 0.5112185946526938, "grad_norm": 2.576753854751587, "learning_rate": 2.477189159743974e-05, "loss": 4.1368, "step": 19025 }, { "epoch": 0.5113529490796722, "grad_norm": 2.5014922618865967, "learning_rate": 2.4765082391393164e-05, "loss": 4.2416, "step": 19030 }, { "epoch": 0.5114873035066505, "grad_norm": 2.293637275695801, "learning_rate": 2.475827318534659e-05, "loss": 4.2046, "step": 19035 }, { "epoch": 0.5116216579336289, "grad_norm": 2.6398375034332275, "learning_rate": 2.4751463979300016e-05, "loss": 4.1795, "step": 19040 }, { "epoch": 0.5117560123606073, "grad_norm": 2.442321538925171, "learning_rate": 2.4744654773253438e-05, "loss": 4.304, "step": 19045 }, { "epoch": 0.5118903667875857, "grad_norm": 2.187950849533081, "learning_rate": 2.4737845567206867e-05, "loss": 4.2276, "step": 19050 }, { "epoch": 0.512024721214564, "grad_norm": 2.5990514755249023, "learning_rate": 2.473103636116029e-05, "loss": 4.3029, "step": 19055 }, { "epoch": 0.5121590756415424, "grad_norm": 2.5766282081604004, "learning_rate": 2.4724227155113715e-05, "loss": 4.1229, "step": 19060 }, { "epoch": 0.5122934300685208, "grad_norm": 2.4270172119140625, "learning_rate": 2.471741794906714e-05, "loss": 4.154, "step": 19065 }, { "epoch": 0.5124277844954991, "grad_norm": 2.4832682609558105, "learning_rate": 2.4710608743020566e-05, "loss": 4.2012, "step": 19070 }, { "epoch": 0.5125621389224775, "grad_norm": 2.432749032974243, "learning_rate": 2.470379953697399e-05, "loss": 4.2145, "step": 19075 }, { "epoch": 0.5126964933494559, "grad_norm": 2.4161741733551025, "learning_rate": 2.4696990330927418e-05, "loss": 4.2185, "step": 19080 }, { "epoch": 0.5128308477764343, "grad_norm": 2.3733983039855957, "learning_rate": 2.469018112488084e-05, "loss": 4.1963, "step": 19085 }, { "epoch": 0.5129652022034126, "grad_norm": 2.3789846897125244, "learning_rate": 2.4683371918834265e-05, "loss": 4.1089, "step": 19090 }, { "epoch": 0.513099556630391, "grad_norm": 2.6909961700439453, "learning_rate": 2.467656271278769e-05, "loss": 4.2658, "step": 19095 }, { "epoch": 0.5132339110573694, "grad_norm": 2.537989854812622, "learning_rate": 2.4669753506741113e-05, "loss": 4.1815, "step": 19100 }, { "epoch": 0.5133682654843477, "grad_norm": 2.4563934803009033, "learning_rate": 2.466294430069454e-05, "loss": 4.1817, "step": 19105 }, { "epoch": 0.5135026199113261, "grad_norm": 2.511692762374878, "learning_rate": 2.4656135094647965e-05, "loss": 4.2143, "step": 19110 }, { "epoch": 0.5136369743383045, "grad_norm": 2.3812544345855713, "learning_rate": 2.464932588860139e-05, "loss": 4.1468, "step": 19115 }, { "epoch": 0.5137713287652829, "grad_norm": 2.327343702316284, "learning_rate": 2.4642516682554813e-05, "loss": 4.2647, "step": 19120 }, { "epoch": 0.5139056831922612, "grad_norm": 2.407703399658203, "learning_rate": 2.463570747650824e-05, "loss": 4.0866, "step": 19125 }, { "epoch": 0.5140400376192396, "grad_norm": 2.5500471591949463, "learning_rate": 2.4628898270461664e-05, "loss": 4.1409, "step": 19130 }, { "epoch": 0.514174392046218, "grad_norm": 2.407898187637329, "learning_rate": 2.462208906441509e-05, "loss": 4.2394, "step": 19135 }, { "epoch": 0.5143087464731962, "grad_norm": 2.4540274143218994, "learning_rate": 2.4615279858368515e-05, "loss": 4.2458, "step": 19140 }, { "epoch": 0.5144431009001746, "grad_norm": 2.365863800048828, "learning_rate": 2.460847065232194e-05, "loss": 4.1183, "step": 19145 }, { "epoch": 0.514577455327153, "grad_norm": 2.2912471294403076, "learning_rate": 2.4601661446275363e-05, "loss": 4.2602, "step": 19150 }, { "epoch": 0.5147118097541314, "grad_norm": 2.261556386947632, "learning_rate": 2.4594852240228792e-05, "loss": 4.226, "step": 19155 }, { "epoch": 0.5148461641811097, "grad_norm": 2.3950469493865967, "learning_rate": 2.4588043034182215e-05, "loss": 4.2662, "step": 19160 }, { "epoch": 0.5149805186080881, "grad_norm": 2.227627992630005, "learning_rate": 2.458123382813564e-05, "loss": 4.0405, "step": 19165 }, { "epoch": 0.5151148730350665, "grad_norm": 2.258918523788452, "learning_rate": 2.4574424622089066e-05, "loss": 4.1317, "step": 19170 }, { "epoch": 0.5152492274620448, "grad_norm": 2.4608702659606934, "learning_rate": 2.456761541604249e-05, "loss": 4.128, "step": 19175 }, { "epoch": 0.5153835818890232, "grad_norm": 2.6952149868011475, "learning_rate": 2.4560806209995914e-05, "loss": 4.1355, "step": 19180 }, { "epoch": 0.5155179363160016, "grad_norm": 2.3763391971588135, "learning_rate": 2.4553997003949343e-05, "loss": 4.1548, "step": 19185 }, { "epoch": 0.51565229074298, "grad_norm": 2.4071786403656006, "learning_rate": 2.4547187797902765e-05, "loss": 4.0729, "step": 19190 }, { "epoch": 0.5157866451699583, "grad_norm": 2.690171003341675, "learning_rate": 2.454037859185619e-05, "loss": 4.1408, "step": 19195 }, { "epoch": 0.5159209995969367, "grad_norm": 2.3048369884490967, "learning_rate": 2.4533569385809616e-05, "loss": 4.1109, "step": 19200 }, { "epoch": 0.5160553540239151, "grad_norm": 2.2279212474823, "learning_rate": 2.4526760179763042e-05, "loss": 4.2529, "step": 19205 }, { "epoch": 0.5161897084508934, "grad_norm": 2.5764353275299072, "learning_rate": 2.4519950973716464e-05, "loss": 4.2532, "step": 19210 }, { "epoch": 0.5163240628778718, "grad_norm": 2.549027442932129, "learning_rate": 2.4513141767669893e-05, "loss": 4.0419, "step": 19215 }, { "epoch": 0.5164584173048502, "grad_norm": 2.3325693607330322, "learning_rate": 2.4506332561623316e-05, "loss": 4.2411, "step": 19220 }, { "epoch": 0.5165927717318286, "grad_norm": 2.813455820083618, "learning_rate": 2.449952335557674e-05, "loss": 4.1887, "step": 19225 }, { "epoch": 0.5167271261588069, "grad_norm": 2.2589588165283203, "learning_rate": 2.4492714149530167e-05, "loss": 4.1948, "step": 19230 }, { "epoch": 0.5168614805857853, "grad_norm": 2.4710195064544678, "learning_rate": 2.448590494348359e-05, "loss": 4.1208, "step": 19235 }, { "epoch": 0.5169958350127637, "grad_norm": 2.4131791591644287, "learning_rate": 2.4479095737437015e-05, "loss": 3.9861, "step": 19240 }, { "epoch": 0.5171301894397421, "grad_norm": 2.5135104656219482, "learning_rate": 2.447228653139044e-05, "loss": 4.1288, "step": 19245 }, { "epoch": 0.5172645438667204, "grad_norm": 2.712293863296509, "learning_rate": 2.4465477325343866e-05, "loss": 4.2116, "step": 19250 }, { "epoch": 0.5173988982936988, "grad_norm": 2.577848434448242, "learning_rate": 2.445866811929729e-05, "loss": 4.2711, "step": 19255 }, { "epoch": 0.5175332527206772, "grad_norm": 2.5128767490386963, "learning_rate": 2.4451858913250717e-05, "loss": 4.2365, "step": 19260 }, { "epoch": 0.5176676071476555, "grad_norm": 2.6061699390411377, "learning_rate": 2.444504970720414e-05, "loss": 4.1478, "step": 19265 }, { "epoch": 0.5178019615746339, "grad_norm": 2.9753358364105225, "learning_rate": 2.4438240501157565e-05, "loss": 4.1677, "step": 19270 }, { "epoch": 0.5179363160016123, "grad_norm": 2.3526861667633057, "learning_rate": 2.443143129511099e-05, "loss": 4.1839, "step": 19275 }, { "epoch": 0.5180706704285907, "grad_norm": 2.3954553604125977, "learning_rate": 2.4424622089064417e-05, "loss": 4.2778, "step": 19280 }, { "epoch": 0.518205024855569, "grad_norm": 2.2088117599487305, "learning_rate": 2.441781288301784e-05, "loss": 4.1926, "step": 19285 }, { "epoch": 0.5183393792825474, "grad_norm": 2.542858600616455, "learning_rate": 2.4411003676971268e-05, "loss": 4.2399, "step": 19290 }, { "epoch": 0.5184737337095258, "grad_norm": 2.441488265991211, "learning_rate": 2.440419447092469e-05, "loss": 4.2247, "step": 19295 }, { "epoch": 0.518608088136504, "grad_norm": 2.5491998195648193, "learning_rate": 2.4397385264878116e-05, "loss": 4.3001, "step": 19300 }, { "epoch": 0.5187424425634825, "grad_norm": 2.3421573638916016, "learning_rate": 2.439057605883154e-05, "loss": 4.0951, "step": 19305 }, { "epoch": 0.5188767969904609, "grad_norm": 2.2438547611236572, "learning_rate": 2.4383766852784967e-05, "loss": 4.1919, "step": 19310 }, { "epoch": 0.5190111514174393, "grad_norm": 2.3466169834136963, "learning_rate": 2.437695764673839e-05, "loss": 4.1804, "step": 19315 }, { "epoch": 0.5191455058444175, "grad_norm": 2.467222213745117, "learning_rate": 2.437014844069182e-05, "loss": 4.1472, "step": 19320 }, { "epoch": 0.5192798602713959, "grad_norm": 2.591048002243042, "learning_rate": 2.436333923464524e-05, "loss": 4.0082, "step": 19325 }, { "epoch": 0.5194142146983743, "grad_norm": 2.5330843925476074, "learning_rate": 2.4356530028598667e-05, "loss": 4.1657, "step": 19330 }, { "epoch": 0.5195485691253526, "grad_norm": 2.4015862941741943, "learning_rate": 2.4349720822552092e-05, "loss": 4.2106, "step": 19335 }, { "epoch": 0.519682923552331, "grad_norm": 2.364415407180786, "learning_rate": 2.4342911616505518e-05, "loss": 4.1525, "step": 19340 }, { "epoch": 0.5198172779793094, "grad_norm": 2.3948943614959717, "learning_rate": 2.433610241045894e-05, "loss": 4.254, "step": 19345 }, { "epoch": 0.5199516324062878, "grad_norm": 2.387593984603882, "learning_rate": 2.432929320441237e-05, "loss": 4.2196, "step": 19350 }, { "epoch": 0.5200859868332661, "grad_norm": 2.355522632598877, "learning_rate": 2.432248399836579e-05, "loss": 4.118, "step": 19355 }, { "epoch": 0.5202203412602445, "grad_norm": 2.610097646713257, "learning_rate": 2.4315674792319217e-05, "loss": 4.2102, "step": 19360 }, { "epoch": 0.5203546956872229, "grad_norm": 2.479344367980957, "learning_rate": 2.4308865586272643e-05, "loss": 4.1395, "step": 19365 }, { "epoch": 0.5204890501142012, "grad_norm": 2.5003228187561035, "learning_rate": 2.430205638022607e-05, "loss": 4.1125, "step": 19370 }, { "epoch": 0.5206234045411796, "grad_norm": 2.2299089431762695, "learning_rate": 2.429524717417949e-05, "loss": 4.1119, "step": 19375 }, { "epoch": 0.520757758968158, "grad_norm": 2.498774528503418, "learning_rate": 2.4288437968132916e-05, "loss": 4.155, "step": 19380 }, { "epoch": 0.5208921133951364, "grad_norm": 2.692408800125122, "learning_rate": 2.4281628762086342e-05, "loss": 4.0259, "step": 19385 }, { "epoch": 0.5210264678221147, "grad_norm": 2.306915760040283, "learning_rate": 2.4274819556039764e-05, "loss": 4.1222, "step": 19390 }, { "epoch": 0.5211608222490931, "grad_norm": 2.2933619022369385, "learning_rate": 2.4268010349993193e-05, "loss": 4.1561, "step": 19395 }, { "epoch": 0.5212951766760715, "grad_norm": 2.6175570487976074, "learning_rate": 2.4261201143946616e-05, "loss": 4.152, "step": 19400 }, { "epoch": 0.5214295311030498, "grad_norm": 2.5819554328918457, "learning_rate": 2.425439193790004e-05, "loss": 4.2009, "step": 19405 }, { "epoch": 0.5215638855300282, "grad_norm": 2.2931675910949707, "learning_rate": 2.4247582731853467e-05, "loss": 4.1642, "step": 19410 }, { "epoch": 0.5216982399570066, "grad_norm": 2.3909666538238525, "learning_rate": 2.4240773525806893e-05, "loss": 4.189, "step": 19415 }, { "epoch": 0.521832594383985, "grad_norm": 2.3716351985931396, "learning_rate": 2.4233964319760315e-05, "loss": 4.2543, "step": 19420 }, { "epoch": 0.5219669488109633, "grad_norm": 2.3029158115386963, "learning_rate": 2.4227155113713744e-05, "loss": 4.2417, "step": 19425 }, { "epoch": 0.5221013032379417, "grad_norm": 2.3708932399749756, "learning_rate": 2.4220345907667166e-05, "loss": 4.2177, "step": 19430 }, { "epoch": 0.5222356576649201, "grad_norm": 2.4914543628692627, "learning_rate": 2.4213536701620592e-05, "loss": 4.0838, "step": 19435 }, { "epoch": 0.5223700120918985, "grad_norm": 2.5556857585906982, "learning_rate": 2.4206727495574017e-05, "loss": 4.1052, "step": 19440 }, { "epoch": 0.5225043665188768, "grad_norm": 2.434607744216919, "learning_rate": 2.4199918289527443e-05, "loss": 4.1205, "step": 19445 }, { "epoch": 0.5226387209458552, "grad_norm": 2.6894402503967285, "learning_rate": 2.4193109083480865e-05, "loss": 4.1588, "step": 19450 }, { "epoch": 0.5227730753728336, "grad_norm": 2.4294652938842773, "learning_rate": 2.4186299877434294e-05, "loss": 4.2142, "step": 19455 }, { "epoch": 0.5229074297998119, "grad_norm": 2.4246666431427, "learning_rate": 2.4179490671387717e-05, "loss": 4.223, "step": 19460 }, { "epoch": 0.5230417842267903, "grad_norm": 2.605745792388916, "learning_rate": 2.4172681465341142e-05, "loss": 4.1628, "step": 19465 }, { "epoch": 0.5231761386537687, "grad_norm": 2.401439905166626, "learning_rate": 2.4165872259294568e-05, "loss": 4.1364, "step": 19470 }, { "epoch": 0.5233104930807471, "grad_norm": 2.5756683349609375, "learning_rate": 2.4159063053247994e-05, "loss": 4.1874, "step": 19475 }, { "epoch": 0.5234448475077254, "grad_norm": 2.373011589050293, "learning_rate": 2.4152253847201416e-05, "loss": 4.1208, "step": 19480 }, { "epoch": 0.5235792019347038, "grad_norm": 2.520259380340576, "learning_rate": 2.4145444641154845e-05, "loss": 4.1642, "step": 19485 }, { "epoch": 0.5237135563616822, "grad_norm": 2.32365345954895, "learning_rate": 2.4138635435108267e-05, "loss": 4.1664, "step": 19490 }, { "epoch": 0.5238479107886604, "grad_norm": 2.2045445442199707, "learning_rate": 2.4131826229061693e-05, "loss": 4.0915, "step": 19495 }, { "epoch": 0.5239822652156388, "grad_norm": 2.259805679321289, "learning_rate": 2.412501702301512e-05, "loss": 4.1102, "step": 19500 }, { "epoch": 0.5241166196426172, "grad_norm": 2.488907814025879, "learning_rate": 2.4118207816968544e-05, "loss": 4.0722, "step": 19505 }, { "epoch": 0.5242509740695956, "grad_norm": 2.419842481613159, "learning_rate": 2.4111398610921966e-05, "loss": 4.1472, "step": 19510 }, { "epoch": 0.5243853284965739, "grad_norm": 2.644954204559326, "learning_rate": 2.4104589404875396e-05, "loss": 4.1447, "step": 19515 }, { "epoch": 0.5245196829235523, "grad_norm": 2.2284414768218994, "learning_rate": 2.4097780198828818e-05, "loss": 4.1575, "step": 19520 }, { "epoch": 0.5246540373505307, "grad_norm": 2.508227825164795, "learning_rate": 2.4090970992782243e-05, "loss": 4.1627, "step": 19525 }, { "epoch": 0.524788391777509, "grad_norm": 2.3828859329223633, "learning_rate": 2.408416178673567e-05, "loss": 4.2278, "step": 19530 }, { "epoch": 0.5249227462044874, "grad_norm": 2.3429534435272217, "learning_rate": 2.407735258068909e-05, "loss": 4.1168, "step": 19535 }, { "epoch": 0.5250571006314658, "grad_norm": 2.59507417678833, "learning_rate": 2.4070543374642517e-05, "loss": 4.2073, "step": 19540 }, { "epoch": 0.5251914550584442, "grad_norm": 2.602620840072632, "learning_rate": 2.4063734168595943e-05, "loss": 4.2225, "step": 19545 }, { "epoch": 0.5253258094854225, "grad_norm": 2.5905463695526123, "learning_rate": 2.405692496254937e-05, "loss": 4.2048, "step": 19550 }, { "epoch": 0.5254601639124009, "grad_norm": 2.5372214317321777, "learning_rate": 2.405011575650279e-05, "loss": 4.1774, "step": 19555 }, { "epoch": 0.5255945183393793, "grad_norm": 2.317359447479248, "learning_rate": 2.404330655045622e-05, "loss": 4.1496, "step": 19560 }, { "epoch": 0.5257288727663576, "grad_norm": 2.5819942951202393, "learning_rate": 2.4036497344409642e-05, "loss": 4.0662, "step": 19565 }, { "epoch": 0.525863227193336, "grad_norm": 2.517676830291748, "learning_rate": 2.4029688138363068e-05, "loss": 4.1158, "step": 19570 }, { "epoch": 0.5259975816203144, "grad_norm": 2.4072768688201904, "learning_rate": 2.4022878932316493e-05, "loss": 4.1718, "step": 19575 }, { "epoch": 0.5261319360472928, "grad_norm": 2.226813793182373, "learning_rate": 2.401606972626992e-05, "loss": 4.2765, "step": 19580 }, { "epoch": 0.5262662904742711, "grad_norm": 2.7242965698242188, "learning_rate": 2.400926052022334e-05, "loss": 4.187, "step": 19585 }, { "epoch": 0.5264006449012495, "grad_norm": 2.587934970855713, "learning_rate": 2.400245131417677e-05, "loss": 4.1164, "step": 19590 }, { "epoch": 0.5265349993282279, "grad_norm": 2.553853750228882, "learning_rate": 2.3995642108130192e-05, "loss": 4.1806, "step": 19595 }, { "epoch": 0.5266693537552062, "grad_norm": 2.2665791511535645, "learning_rate": 2.3988832902083618e-05, "loss": 4.1183, "step": 19600 }, { "epoch": 0.5268037081821846, "grad_norm": 2.559373617172241, "learning_rate": 2.3982023696037044e-05, "loss": 4.2135, "step": 19605 }, { "epoch": 0.526938062609163, "grad_norm": 2.413282632827759, "learning_rate": 2.397521448999047e-05, "loss": 4.1154, "step": 19610 }, { "epoch": 0.5270724170361414, "grad_norm": 2.336554765701294, "learning_rate": 2.3968405283943892e-05, "loss": 4.209, "step": 19615 }, { "epoch": 0.5272067714631197, "grad_norm": 2.4473876953125, "learning_rate": 2.396159607789732e-05, "loss": 4.2153, "step": 19620 }, { "epoch": 0.5273411258900981, "grad_norm": 2.460981607437134, "learning_rate": 2.3954786871850743e-05, "loss": 4.1904, "step": 19625 }, { "epoch": 0.5274754803170765, "grad_norm": 2.3463668823242188, "learning_rate": 2.394797766580417e-05, "loss": 4.2273, "step": 19630 }, { "epoch": 0.5276098347440548, "grad_norm": 2.411773681640625, "learning_rate": 2.3941168459757594e-05, "loss": 4.0936, "step": 19635 }, { "epoch": 0.5277441891710332, "grad_norm": 2.409456253051758, "learning_rate": 2.393435925371102e-05, "loss": 4.0884, "step": 19640 }, { "epoch": 0.5278785435980116, "grad_norm": 2.3401482105255127, "learning_rate": 2.3927550047664442e-05, "loss": 4.0523, "step": 19645 }, { "epoch": 0.52801289802499, "grad_norm": 2.3576653003692627, "learning_rate": 2.392074084161787e-05, "loss": 4.1577, "step": 19650 }, { "epoch": 0.5281472524519683, "grad_norm": 2.2831578254699707, "learning_rate": 2.3913931635571294e-05, "loss": 4.0053, "step": 19655 }, { "epoch": 0.5282816068789467, "grad_norm": 2.438647985458374, "learning_rate": 2.390712242952472e-05, "loss": 4.1787, "step": 19660 }, { "epoch": 0.528415961305925, "grad_norm": 2.5329291820526123, "learning_rate": 2.3900313223478145e-05, "loss": 4.1798, "step": 19665 }, { "epoch": 0.5285503157329035, "grad_norm": 2.2505908012390137, "learning_rate": 2.389350401743157e-05, "loss": 4.1231, "step": 19670 }, { "epoch": 0.5286846701598817, "grad_norm": 2.3395848274230957, "learning_rate": 2.3886694811384993e-05, "loss": 4.0006, "step": 19675 }, { "epoch": 0.5288190245868601, "grad_norm": 2.4946913719177246, "learning_rate": 2.387988560533842e-05, "loss": 4.0995, "step": 19680 }, { "epoch": 0.5289533790138385, "grad_norm": 2.4786462783813477, "learning_rate": 2.3873076399291844e-05, "loss": 4.0761, "step": 19685 }, { "epoch": 0.5290877334408168, "grad_norm": 2.446791410446167, "learning_rate": 2.3866267193245266e-05, "loss": 4.2139, "step": 19690 }, { "epoch": 0.5292220878677952, "grad_norm": 2.624887228012085, "learning_rate": 2.3859457987198695e-05, "loss": 4.1317, "step": 19695 }, { "epoch": 0.5293564422947736, "grad_norm": 2.192580461502075, "learning_rate": 2.3852648781152118e-05, "loss": 4.2087, "step": 19700 }, { "epoch": 0.529490796721752, "grad_norm": 2.3577182292938232, "learning_rate": 2.3845839575105543e-05, "loss": 4.187, "step": 19705 }, { "epoch": 0.5296251511487303, "grad_norm": 2.6276166439056396, "learning_rate": 2.383903036905897e-05, "loss": 4.3099, "step": 19710 }, { "epoch": 0.5297595055757087, "grad_norm": 2.3528847694396973, "learning_rate": 2.3832221163012395e-05, "loss": 4.3097, "step": 19715 }, { "epoch": 0.5298938600026871, "grad_norm": 2.4681408405303955, "learning_rate": 2.3825411956965817e-05, "loss": 4.2341, "step": 19720 }, { "epoch": 0.5300282144296654, "grad_norm": 2.308509111404419, "learning_rate": 2.3818602750919246e-05, "loss": 4.2227, "step": 19725 }, { "epoch": 0.5301625688566438, "grad_norm": 2.575002670288086, "learning_rate": 2.3811793544872668e-05, "loss": 4.0359, "step": 19730 }, { "epoch": 0.5302969232836222, "grad_norm": 2.4957404136657715, "learning_rate": 2.3804984338826094e-05, "loss": 4.1513, "step": 19735 }, { "epoch": 0.5304312777106006, "grad_norm": 2.232130527496338, "learning_rate": 2.379817513277952e-05, "loss": 4.1318, "step": 19740 }, { "epoch": 0.5305656321375789, "grad_norm": 2.394183397293091, "learning_rate": 2.3791365926732945e-05, "loss": 4.1482, "step": 19745 }, { "epoch": 0.5306999865645573, "grad_norm": 2.6029720306396484, "learning_rate": 2.3784556720686368e-05, "loss": 4.2619, "step": 19750 }, { "epoch": 0.5308343409915357, "grad_norm": 2.2995316982269287, "learning_rate": 2.3777747514639793e-05, "loss": 4.2579, "step": 19755 }, { "epoch": 0.530968695418514, "grad_norm": 2.455338954925537, "learning_rate": 2.377093830859322e-05, "loss": 4.1183, "step": 19760 }, { "epoch": 0.5311030498454924, "grad_norm": 2.4550743103027344, "learning_rate": 2.3764129102546645e-05, "loss": 4.1018, "step": 19765 }, { "epoch": 0.5312374042724708, "grad_norm": 2.2408323287963867, "learning_rate": 2.375731989650007e-05, "loss": 4.1946, "step": 19770 }, { "epoch": 0.5313717586994492, "grad_norm": 2.5385196208953857, "learning_rate": 2.3750510690453496e-05, "loss": 4.1251, "step": 19775 }, { "epoch": 0.5315061131264275, "grad_norm": 2.33528208732605, "learning_rate": 2.3743701484406918e-05, "loss": 4.157, "step": 19780 }, { "epoch": 0.5316404675534059, "grad_norm": 2.416646718978882, "learning_rate": 2.3736892278360344e-05, "loss": 4.047, "step": 19785 }, { "epoch": 0.5317748219803843, "grad_norm": 2.622180223464966, "learning_rate": 2.373008307231377e-05, "loss": 4.2253, "step": 19790 }, { "epoch": 0.5319091764073626, "grad_norm": 2.2783236503601074, "learning_rate": 2.3723273866267195e-05, "loss": 4.1883, "step": 19795 }, { "epoch": 0.532043530834341, "grad_norm": 2.503242254257202, "learning_rate": 2.371646466022062e-05, "loss": 4.2299, "step": 19800 }, { "epoch": 0.5321778852613194, "grad_norm": 2.4925386905670166, "learning_rate": 2.3709655454174046e-05, "loss": 4.1227, "step": 19805 }, { "epoch": 0.5323122396882978, "grad_norm": 2.6601483821868896, "learning_rate": 2.370284624812747e-05, "loss": 4.1279, "step": 19810 }, { "epoch": 0.5324465941152761, "grad_norm": 2.529906749725342, "learning_rate": 2.3696037042080894e-05, "loss": 4.1269, "step": 19815 }, { "epoch": 0.5325809485422545, "grad_norm": 2.488236665725708, "learning_rate": 2.368922783603432e-05, "loss": 4.0333, "step": 19820 }, { "epoch": 0.5327153029692329, "grad_norm": 2.481128454208374, "learning_rate": 2.3682418629987742e-05, "loss": 4.0917, "step": 19825 }, { "epoch": 0.5328496573962112, "grad_norm": 2.5271215438842773, "learning_rate": 2.367560942394117e-05, "loss": 4.0599, "step": 19830 }, { "epoch": 0.5329840118231896, "grad_norm": 2.4280197620391846, "learning_rate": 2.3668800217894594e-05, "loss": 4.1872, "step": 19835 }, { "epoch": 0.533118366250168, "grad_norm": 2.4330058097839355, "learning_rate": 2.366199101184802e-05, "loss": 4.221, "step": 19840 }, { "epoch": 0.5332527206771464, "grad_norm": 2.532562017440796, "learning_rate": 2.3655181805801445e-05, "loss": 4.0234, "step": 19845 }, { "epoch": 0.5333870751041246, "grad_norm": 2.502290725708008, "learning_rate": 2.364837259975487e-05, "loss": 4.1461, "step": 19850 }, { "epoch": 0.533521429531103, "grad_norm": 2.227602005004883, "learning_rate": 2.3641563393708293e-05, "loss": 4.1852, "step": 19855 }, { "epoch": 0.5336557839580814, "grad_norm": 2.3136398792266846, "learning_rate": 2.363475418766172e-05, "loss": 4.1688, "step": 19860 }, { "epoch": 0.5337901383850598, "grad_norm": 2.6027657985687256, "learning_rate": 2.3627944981615144e-05, "loss": 4.0856, "step": 19865 }, { "epoch": 0.5339244928120381, "grad_norm": 2.760267972946167, "learning_rate": 2.362113577556857e-05, "loss": 4.1458, "step": 19870 }, { "epoch": 0.5340588472390165, "grad_norm": 2.481855630874634, "learning_rate": 2.3614326569521995e-05, "loss": 4.2034, "step": 19875 }, { "epoch": 0.5341932016659949, "grad_norm": 2.400932550430298, "learning_rate": 2.360751736347542e-05, "loss": 4.1213, "step": 19880 }, { "epoch": 0.5343275560929732, "grad_norm": 2.3457772731781006, "learning_rate": 2.3600708157428843e-05, "loss": 4.1471, "step": 19885 }, { "epoch": 0.5344619105199516, "grad_norm": 2.9542276859283447, "learning_rate": 2.359389895138227e-05, "loss": 4.1602, "step": 19890 }, { "epoch": 0.53459626494693, "grad_norm": 2.4567620754241943, "learning_rate": 2.3587089745335695e-05, "loss": 4.1741, "step": 19895 }, { "epoch": 0.5347306193739084, "grad_norm": 2.6443021297454834, "learning_rate": 2.358028053928912e-05, "loss": 4.2107, "step": 19900 }, { "epoch": 0.5348649738008867, "grad_norm": 2.4837722778320312, "learning_rate": 2.3573471333242546e-05, "loss": 4.1673, "step": 19905 }, { "epoch": 0.5349993282278651, "grad_norm": 2.377662181854248, "learning_rate": 2.356666212719597e-05, "loss": 4.1024, "step": 19910 }, { "epoch": 0.5351336826548435, "grad_norm": 2.6272366046905518, "learning_rate": 2.3559852921149394e-05, "loss": 4.2115, "step": 19915 }, { "epoch": 0.5352680370818218, "grad_norm": 3.010737895965576, "learning_rate": 2.355304371510282e-05, "loss": 4.2124, "step": 19920 }, { "epoch": 0.5354023915088002, "grad_norm": 2.346762180328369, "learning_rate": 2.3546234509056245e-05, "loss": 4.1673, "step": 19925 }, { "epoch": 0.5355367459357786, "grad_norm": 2.6834068298339844, "learning_rate": 2.353942530300967e-05, "loss": 4.2344, "step": 19930 }, { "epoch": 0.535671100362757, "grad_norm": 2.4434335231781006, "learning_rate": 2.3532616096963097e-05, "loss": 4.2325, "step": 19935 }, { "epoch": 0.5358054547897353, "grad_norm": 2.7561428546905518, "learning_rate": 2.3525806890916522e-05, "loss": 4.205, "step": 19940 }, { "epoch": 0.5359398092167137, "grad_norm": 2.382530927658081, "learning_rate": 2.3518997684869944e-05, "loss": 4.2802, "step": 19945 }, { "epoch": 0.5360741636436921, "grad_norm": 2.6401822566986084, "learning_rate": 2.351218847882337e-05, "loss": 4.2178, "step": 19950 }, { "epoch": 0.5362085180706704, "grad_norm": 2.2574403285980225, "learning_rate": 2.3505379272776796e-05, "loss": 4.2785, "step": 19955 }, { "epoch": 0.5363428724976488, "grad_norm": 2.3282604217529297, "learning_rate": 2.349857006673022e-05, "loss": 4.1487, "step": 19960 }, { "epoch": 0.5364772269246272, "grad_norm": 2.201552391052246, "learning_rate": 2.3491760860683644e-05, "loss": 4.2107, "step": 19965 }, { "epoch": 0.5366115813516056, "grad_norm": 2.5604617595672607, "learning_rate": 2.3484951654637073e-05, "loss": 4.0575, "step": 19970 }, { "epoch": 0.5367459357785839, "grad_norm": 2.2823944091796875, "learning_rate": 2.3478142448590495e-05, "loss": 4.1463, "step": 19975 }, { "epoch": 0.5368802902055623, "grad_norm": 2.5601329803466797, "learning_rate": 2.347133324254392e-05, "loss": 4.1585, "step": 19980 }, { "epoch": 0.5370146446325407, "grad_norm": 2.380958318710327, "learning_rate": 2.3464524036497346e-05, "loss": 4.3339, "step": 19985 }, { "epoch": 0.537148999059519, "grad_norm": 2.4014508724212646, "learning_rate": 2.345771483045077e-05, "loss": 4.1118, "step": 19990 }, { "epoch": 0.5372833534864974, "grad_norm": 2.4354753494262695, "learning_rate": 2.3450905624404194e-05, "loss": 4.1837, "step": 19995 }, { "epoch": 0.5374177079134758, "grad_norm": 2.4942574501037598, "learning_rate": 2.344409641835762e-05, "loss": 4.2234, "step": 20000 }, { "epoch": 0.5375520623404542, "grad_norm": 2.3543336391448975, "learning_rate": 2.3437287212311046e-05, "loss": 4.1467, "step": 20005 }, { "epoch": 0.5376864167674325, "grad_norm": 2.5599191188812256, "learning_rate": 2.343047800626447e-05, "loss": 4.1367, "step": 20010 }, { "epoch": 0.5378207711944109, "grad_norm": 2.486945390701294, "learning_rate": 2.3423668800217897e-05, "loss": 4.0903, "step": 20015 }, { "epoch": 0.5379551256213893, "grad_norm": 2.5304880142211914, "learning_rate": 2.341685959417132e-05, "loss": 4.2478, "step": 20020 }, { "epoch": 0.5380894800483675, "grad_norm": 2.5263166427612305, "learning_rate": 2.3410050388124745e-05, "loss": 4.127, "step": 20025 }, { "epoch": 0.5382238344753459, "grad_norm": 2.4813787937164307, "learning_rate": 2.340324118207817e-05, "loss": 4.1907, "step": 20030 }, { "epoch": 0.5383581889023243, "grad_norm": 2.518040418624878, "learning_rate": 2.3396431976031596e-05, "loss": 4.1916, "step": 20035 }, { "epoch": 0.5384925433293027, "grad_norm": 2.369868516921997, "learning_rate": 2.3389622769985022e-05, "loss": 4.2327, "step": 20040 }, { "epoch": 0.538626897756281, "grad_norm": 2.5046117305755615, "learning_rate": 2.3382813563938447e-05, "loss": 4.149, "step": 20045 }, { "epoch": 0.5387612521832594, "grad_norm": 2.341040849685669, "learning_rate": 2.337600435789187e-05, "loss": 4.1431, "step": 20050 }, { "epoch": 0.5388956066102378, "grad_norm": 2.3161439895629883, "learning_rate": 2.3369195151845295e-05, "loss": 4.2159, "step": 20055 }, { "epoch": 0.5390299610372161, "grad_norm": 2.426114559173584, "learning_rate": 2.336238594579872e-05, "loss": 4.0949, "step": 20060 }, { "epoch": 0.5391643154641945, "grad_norm": 2.4937424659729004, "learning_rate": 2.3355576739752147e-05, "loss": 4.1788, "step": 20065 }, { "epoch": 0.5392986698911729, "grad_norm": 2.465135335922241, "learning_rate": 2.334876753370557e-05, "loss": 4.2443, "step": 20070 }, { "epoch": 0.5394330243181513, "grad_norm": 2.316635847091675, "learning_rate": 2.3341958327658998e-05, "loss": 4.0954, "step": 20075 }, { "epoch": 0.5395673787451296, "grad_norm": 2.580977201461792, "learning_rate": 2.333514912161242e-05, "loss": 4.2978, "step": 20080 }, { "epoch": 0.539701733172108, "grad_norm": 2.176295757293701, "learning_rate": 2.3328339915565846e-05, "loss": 4.1919, "step": 20085 }, { "epoch": 0.5398360875990864, "grad_norm": 2.401799440383911, "learning_rate": 2.332153070951927e-05, "loss": 4.2586, "step": 20090 }, { "epoch": 0.5399704420260648, "grad_norm": 2.4833359718322754, "learning_rate": 2.3314721503472697e-05, "loss": 4.16, "step": 20095 }, { "epoch": 0.5401047964530431, "grad_norm": 2.337731122970581, "learning_rate": 2.330791229742612e-05, "loss": 4.0634, "step": 20100 }, { "epoch": 0.5402391508800215, "grad_norm": 2.3508505821228027, "learning_rate": 2.330110309137955e-05, "loss": 4.169, "step": 20105 }, { "epoch": 0.5403735053069999, "grad_norm": 2.610184907913208, "learning_rate": 2.329429388533297e-05, "loss": 4.2276, "step": 20110 }, { "epoch": 0.5405078597339782, "grad_norm": 2.68168044090271, "learning_rate": 2.3287484679286396e-05, "loss": 4.2143, "step": 20115 }, { "epoch": 0.5406422141609566, "grad_norm": 2.3111865520477295, "learning_rate": 2.3280675473239822e-05, "loss": 4.169, "step": 20120 }, { "epoch": 0.540776568587935, "grad_norm": 2.2827577590942383, "learning_rate": 2.3273866267193244e-05, "loss": 4.1083, "step": 20125 }, { "epoch": 0.5409109230149134, "grad_norm": 2.402019500732422, "learning_rate": 2.326705706114667e-05, "loss": 4.2195, "step": 20130 }, { "epoch": 0.5410452774418917, "grad_norm": 2.4111995697021484, "learning_rate": 2.3260247855100096e-05, "loss": 4.2482, "step": 20135 }, { "epoch": 0.5411796318688701, "grad_norm": 2.2512338161468506, "learning_rate": 2.325343864905352e-05, "loss": 3.9686, "step": 20140 }, { "epoch": 0.5413139862958485, "grad_norm": 2.5896291732788086, "learning_rate": 2.3246629443006947e-05, "loss": 4.263, "step": 20145 }, { "epoch": 0.5414483407228268, "grad_norm": 2.5307767391204834, "learning_rate": 2.3239820236960373e-05, "loss": 4.1731, "step": 20150 }, { "epoch": 0.5415826951498052, "grad_norm": 2.5398061275482178, "learning_rate": 2.3233011030913795e-05, "loss": 4.2506, "step": 20155 }, { "epoch": 0.5417170495767836, "grad_norm": 2.365388870239258, "learning_rate": 2.322620182486722e-05, "loss": 4.2236, "step": 20160 }, { "epoch": 0.541851404003762, "grad_norm": 2.2030937671661377, "learning_rate": 2.3219392618820646e-05, "loss": 4.2017, "step": 20165 }, { "epoch": 0.5419857584307403, "grad_norm": 2.5959572792053223, "learning_rate": 2.3212583412774072e-05, "loss": 4.1611, "step": 20170 }, { "epoch": 0.5421201128577187, "grad_norm": 2.372133255004883, "learning_rate": 2.3205774206727494e-05, "loss": 4.0889, "step": 20175 }, { "epoch": 0.5422544672846971, "grad_norm": 2.4216184616088867, "learning_rate": 2.3198965000680923e-05, "loss": 4.1612, "step": 20180 }, { "epoch": 0.5423888217116754, "grad_norm": 2.2745449542999268, "learning_rate": 2.3192155794634346e-05, "loss": 4.1637, "step": 20185 }, { "epoch": 0.5425231761386538, "grad_norm": 2.36331844329834, "learning_rate": 2.318534658858777e-05, "loss": 4.2324, "step": 20190 }, { "epoch": 0.5426575305656322, "grad_norm": 2.3993477821350098, "learning_rate": 2.3178537382541197e-05, "loss": 4.0434, "step": 20195 }, { "epoch": 0.5427918849926106, "grad_norm": 2.526160717010498, "learning_rate": 2.3171728176494622e-05, "loss": 4.2902, "step": 20200 }, { "epoch": 0.5429262394195888, "grad_norm": 2.3316924571990967, "learning_rate": 2.3164918970448045e-05, "loss": 4.2886, "step": 20205 }, { "epoch": 0.5430605938465672, "grad_norm": 2.6067090034484863, "learning_rate": 2.3158109764401474e-05, "loss": 4.016, "step": 20210 }, { "epoch": 0.5431949482735456, "grad_norm": 2.7873034477233887, "learning_rate": 2.3151300558354896e-05, "loss": 4.2331, "step": 20215 }, { "epoch": 0.5433293027005239, "grad_norm": 2.609191417694092, "learning_rate": 2.3144491352308322e-05, "loss": 4.0894, "step": 20220 }, { "epoch": 0.5434636571275023, "grad_norm": 2.416948080062866, "learning_rate": 2.3137682146261747e-05, "loss": 4.2224, "step": 20225 }, { "epoch": 0.5435980115544807, "grad_norm": 2.4412498474121094, "learning_rate": 2.3130872940215173e-05, "loss": 4.2322, "step": 20230 }, { "epoch": 0.5437323659814591, "grad_norm": 2.6170599460601807, "learning_rate": 2.3124063734168595e-05, "loss": 4.2063, "step": 20235 }, { "epoch": 0.5438667204084374, "grad_norm": 2.6210646629333496, "learning_rate": 2.3117254528122024e-05, "loss": 4.2434, "step": 20240 }, { "epoch": 0.5440010748354158, "grad_norm": 2.3776369094848633, "learning_rate": 2.3110445322075447e-05, "loss": 4.0863, "step": 20245 }, { "epoch": 0.5441354292623942, "grad_norm": 2.761976957321167, "learning_rate": 2.3103636116028872e-05, "loss": 4.2419, "step": 20250 }, { "epoch": 0.5442697836893725, "grad_norm": 2.256260633468628, "learning_rate": 2.3096826909982298e-05, "loss": 4.1232, "step": 20255 }, { "epoch": 0.5444041381163509, "grad_norm": 2.4522786140441895, "learning_rate": 2.3090017703935724e-05, "loss": 4.1971, "step": 20260 }, { "epoch": 0.5445384925433293, "grad_norm": 2.531339168548584, "learning_rate": 2.3083208497889146e-05, "loss": 4.357, "step": 20265 }, { "epoch": 0.5446728469703077, "grad_norm": 2.354278087615967, "learning_rate": 2.307639929184257e-05, "loss": 4.1372, "step": 20270 }, { "epoch": 0.544807201397286, "grad_norm": 2.4236373901367188, "learning_rate": 2.3069590085795997e-05, "loss": 4.1985, "step": 20275 }, { "epoch": 0.5449415558242644, "grad_norm": 2.2392358779907227, "learning_rate": 2.306278087974942e-05, "loss": 4.1053, "step": 20280 }, { "epoch": 0.5450759102512428, "grad_norm": 2.3322393894195557, "learning_rate": 2.305597167370285e-05, "loss": 4.0994, "step": 20285 }, { "epoch": 0.5452102646782212, "grad_norm": 2.534773588180542, "learning_rate": 2.304916246765627e-05, "loss": 4.2588, "step": 20290 }, { "epoch": 0.5453446191051995, "grad_norm": 2.353281259536743, "learning_rate": 2.3042353261609696e-05, "loss": 4.0934, "step": 20295 }, { "epoch": 0.5454789735321779, "grad_norm": 2.494717597961426, "learning_rate": 2.3035544055563122e-05, "loss": 4.003, "step": 20300 }, { "epoch": 0.5456133279591563, "grad_norm": 2.321584463119507, "learning_rate": 2.3028734849516548e-05, "loss": 4.1906, "step": 20305 }, { "epoch": 0.5457476823861346, "grad_norm": 2.26697039604187, "learning_rate": 2.302192564346997e-05, "loss": 4.2283, "step": 20310 }, { "epoch": 0.545882036813113, "grad_norm": 2.3440897464752197, "learning_rate": 2.30151164374234e-05, "loss": 4.1798, "step": 20315 }, { "epoch": 0.5460163912400914, "grad_norm": 2.5112297534942627, "learning_rate": 2.300830723137682e-05, "loss": 4.2309, "step": 20320 }, { "epoch": 0.5461507456670698, "grad_norm": 2.456052303314209, "learning_rate": 2.3001498025330247e-05, "loss": 4.1208, "step": 20325 }, { "epoch": 0.5462851000940481, "grad_norm": 2.561737298965454, "learning_rate": 2.2994688819283673e-05, "loss": 4.1376, "step": 20330 }, { "epoch": 0.5464194545210265, "grad_norm": 2.1906509399414062, "learning_rate": 2.2987879613237098e-05, "loss": 4.1033, "step": 20335 }, { "epoch": 0.5465538089480049, "grad_norm": 2.2807629108428955, "learning_rate": 2.298107040719052e-05, "loss": 4.0566, "step": 20340 }, { "epoch": 0.5466881633749832, "grad_norm": 2.4163055419921875, "learning_rate": 2.297426120114395e-05, "loss": 4.1012, "step": 20345 }, { "epoch": 0.5468225178019616, "grad_norm": 2.231922149658203, "learning_rate": 2.2967451995097372e-05, "loss": 4.0911, "step": 20350 }, { "epoch": 0.54695687222894, "grad_norm": 2.2921791076660156, "learning_rate": 2.2960642789050798e-05, "loss": 4.1309, "step": 20355 }, { "epoch": 0.5470912266559184, "grad_norm": 2.6215689182281494, "learning_rate": 2.2953833583004223e-05, "loss": 4.1754, "step": 20360 }, { "epoch": 0.5472255810828967, "grad_norm": 2.6653294563293457, "learning_rate": 2.294702437695765e-05, "loss": 4.018, "step": 20365 }, { "epoch": 0.547359935509875, "grad_norm": 2.459768295288086, "learning_rate": 2.294021517091107e-05, "loss": 4.2054, "step": 20370 }, { "epoch": 0.5474942899368535, "grad_norm": 2.4907214641571045, "learning_rate": 2.29334059648645e-05, "loss": 4.2036, "step": 20375 }, { "epoch": 0.5476286443638317, "grad_norm": 2.1773276329040527, "learning_rate": 2.2926596758817922e-05, "loss": 4.1489, "step": 20380 }, { "epoch": 0.5477629987908101, "grad_norm": 2.4334163665771484, "learning_rate": 2.2919787552771348e-05, "loss": 4.1849, "step": 20385 }, { "epoch": 0.5478973532177885, "grad_norm": 2.4763669967651367, "learning_rate": 2.2912978346724774e-05, "loss": 4.2323, "step": 20390 }, { "epoch": 0.5480317076447669, "grad_norm": 2.412395715713501, "learning_rate": 2.29061691406782e-05, "loss": 4.1651, "step": 20395 }, { "epoch": 0.5481660620717452, "grad_norm": 2.480158805847168, "learning_rate": 2.289935993463162e-05, "loss": 4.2449, "step": 20400 }, { "epoch": 0.5483004164987236, "grad_norm": 2.4406051635742188, "learning_rate": 2.289255072858505e-05, "loss": 4.1259, "step": 20405 }, { "epoch": 0.548434770925702, "grad_norm": 2.395028829574585, "learning_rate": 2.2885741522538473e-05, "loss": 4.049, "step": 20410 }, { "epoch": 0.5485691253526803, "grad_norm": 2.440051794052124, "learning_rate": 2.28789323164919e-05, "loss": 4.1243, "step": 20415 }, { "epoch": 0.5487034797796587, "grad_norm": 2.453970193862915, "learning_rate": 2.2872123110445324e-05, "loss": 4.2168, "step": 20420 }, { "epoch": 0.5488378342066371, "grad_norm": 2.291794538497925, "learning_rate": 2.2865313904398747e-05, "loss": 4.0539, "step": 20425 }, { "epoch": 0.5489721886336155, "grad_norm": 2.299351692199707, "learning_rate": 2.2858504698352172e-05, "loss": 4.1295, "step": 20430 }, { "epoch": 0.5491065430605938, "grad_norm": 2.4439659118652344, "learning_rate": 2.2851695492305598e-05, "loss": 4.2213, "step": 20435 }, { "epoch": 0.5492408974875722, "grad_norm": 2.3744044303894043, "learning_rate": 2.2844886286259024e-05, "loss": 4.2061, "step": 20440 }, { "epoch": 0.5493752519145506, "grad_norm": 2.4056203365325928, "learning_rate": 2.2838077080212446e-05, "loss": 4.3279, "step": 20445 }, { "epoch": 0.5495096063415289, "grad_norm": 2.2639098167419434, "learning_rate": 2.2831267874165875e-05, "loss": 4.1631, "step": 20450 }, { "epoch": 0.5496439607685073, "grad_norm": 2.667806625366211, "learning_rate": 2.2824458668119297e-05, "loss": 4.2885, "step": 20455 }, { "epoch": 0.5497783151954857, "grad_norm": 2.5146920680999756, "learning_rate": 2.2817649462072723e-05, "loss": 4.2166, "step": 20460 }, { "epoch": 0.5499126696224641, "grad_norm": 2.5367133617401123, "learning_rate": 2.281084025602615e-05, "loss": 4.1183, "step": 20465 }, { "epoch": 0.5500470240494424, "grad_norm": 2.7820565700531006, "learning_rate": 2.2804031049979574e-05, "loss": 4.312, "step": 20470 }, { "epoch": 0.5501813784764208, "grad_norm": 2.483693838119507, "learning_rate": 2.2797221843932996e-05, "loss": 4.0924, "step": 20475 }, { "epoch": 0.5503157329033992, "grad_norm": 2.498946189880371, "learning_rate": 2.2790412637886425e-05, "loss": 4.0793, "step": 20480 }, { "epoch": 0.5504500873303776, "grad_norm": 2.5973119735717773, "learning_rate": 2.2783603431839848e-05, "loss": 4.1266, "step": 20485 }, { "epoch": 0.5505844417573559, "grad_norm": 2.579442024230957, "learning_rate": 2.2776794225793273e-05, "loss": 4.139, "step": 20490 }, { "epoch": 0.5507187961843343, "grad_norm": 2.523515224456787, "learning_rate": 2.27699850197467e-05, "loss": 4.0474, "step": 20495 }, { "epoch": 0.5508531506113127, "grad_norm": 2.3674824237823486, "learning_rate": 2.2763175813700125e-05, "loss": 4.1653, "step": 20500 }, { "epoch": 0.550987505038291, "grad_norm": 2.652372360229492, "learning_rate": 2.2756366607653547e-05, "loss": 4.1785, "step": 20505 }, { "epoch": 0.5511218594652694, "grad_norm": 2.6511383056640625, "learning_rate": 2.2749557401606976e-05, "loss": 4.2264, "step": 20510 }, { "epoch": 0.5512562138922478, "grad_norm": 2.3554482460021973, "learning_rate": 2.2742748195560398e-05, "loss": 3.9877, "step": 20515 }, { "epoch": 0.5513905683192262, "grad_norm": 2.494297504425049, "learning_rate": 2.2735938989513824e-05, "loss": 4.1563, "step": 20520 }, { "epoch": 0.5515249227462045, "grad_norm": 2.714960813522339, "learning_rate": 2.272912978346725e-05, "loss": 4.2001, "step": 20525 }, { "epoch": 0.5516592771731829, "grad_norm": 2.6100189685821533, "learning_rate": 2.2722320577420675e-05, "loss": 4.2379, "step": 20530 }, { "epoch": 0.5517936316001613, "grad_norm": 2.5327558517456055, "learning_rate": 2.2715511371374097e-05, "loss": 4.2828, "step": 20535 }, { "epoch": 0.5519279860271395, "grad_norm": 2.411360740661621, "learning_rate": 2.2708702165327527e-05, "loss": 4.1847, "step": 20540 }, { "epoch": 0.552062340454118, "grad_norm": 2.309910297393799, "learning_rate": 2.270189295928095e-05, "loss": 4.0581, "step": 20545 }, { "epoch": 0.5521966948810964, "grad_norm": 2.232516050338745, "learning_rate": 2.2695083753234374e-05, "loss": 4.1728, "step": 20550 }, { "epoch": 0.5523310493080748, "grad_norm": 2.402663230895996, "learning_rate": 2.26882745471878e-05, "loss": 4.0971, "step": 20555 }, { "epoch": 0.552465403735053, "grad_norm": 2.5849103927612305, "learning_rate": 2.2681465341141226e-05, "loss": 4.2701, "step": 20560 }, { "epoch": 0.5525997581620314, "grad_norm": 2.499608278274536, "learning_rate": 2.2674656135094648e-05, "loss": 4.1197, "step": 20565 }, { "epoch": 0.5527341125890098, "grad_norm": 2.7025771141052246, "learning_rate": 2.2667846929048074e-05, "loss": 4.1194, "step": 20570 }, { "epoch": 0.5528684670159881, "grad_norm": 2.3658978939056396, "learning_rate": 2.26610377230015e-05, "loss": 4.2911, "step": 20575 }, { "epoch": 0.5530028214429665, "grad_norm": 2.522081136703491, "learning_rate": 2.265422851695492e-05, "loss": 4.0739, "step": 20580 }, { "epoch": 0.5531371758699449, "grad_norm": 2.35756516456604, "learning_rate": 2.264741931090835e-05, "loss": 4.1436, "step": 20585 }, { "epoch": 0.5532715302969233, "grad_norm": 2.6146721839904785, "learning_rate": 2.2640610104861773e-05, "loss": 4.1043, "step": 20590 }, { "epoch": 0.5534058847239016, "grad_norm": 2.3395774364471436, "learning_rate": 2.26338008988152e-05, "loss": 4.1637, "step": 20595 }, { "epoch": 0.55354023915088, "grad_norm": 2.4828941822052, "learning_rate": 2.2626991692768624e-05, "loss": 4.034, "step": 20600 }, { "epoch": 0.5536745935778584, "grad_norm": 2.860846519470215, "learning_rate": 2.262018248672205e-05, "loss": 4.2536, "step": 20605 }, { "epoch": 0.5538089480048367, "grad_norm": 2.4971113204956055, "learning_rate": 2.2613373280675472e-05, "loss": 4.1792, "step": 20610 }, { "epoch": 0.5539433024318151, "grad_norm": 2.5421531200408936, "learning_rate": 2.26065640746289e-05, "loss": 4.2742, "step": 20615 }, { "epoch": 0.5540776568587935, "grad_norm": 2.6608033180236816, "learning_rate": 2.2599754868582323e-05, "loss": 4.1716, "step": 20620 }, { "epoch": 0.5542120112857719, "grad_norm": 2.200185775756836, "learning_rate": 2.259294566253575e-05, "loss": 4.2262, "step": 20625 }, { "epoch": 0.5543463657127502, "grad_norm": 2.2931649684906006, "learning_rate": 2.2586136456489175e-05, "loss": 4.1396, "step": 20630 }, { "epoch": 0.5544807201397286, "grad_norm": 2.406660318374634, "learning_rate": 2.25793272504426e-05, "loss": 4.1707, "step": 20635 }, { "epoch": 0.554615074566707, "grad_norm": 2.3451435565948486, "learning_rate": 2.2572518044396023e-05, "loss": 4.1712, "step": 20640 }, { "epoch": 0.5547494289936853, "grad_norm": 2.387694835662842, "learning_rate": 2.2565708838349452e-05, "loss": 4.2478, "step": 20645 }, { "epoch": 0.5548837834206637, "grad_norm": 2.3927502632141113, "learning_rate": 2.2558899632302874e-05, "loss": 4.1262, "step": 20650 }, { "epoch": 0.5550181378476421, "grad_norm": 2.4236385822296143, "learning_rate": 2.25520904262563e-05, "loss": 4.1831, "step": 20655 }, { "epoch": 0.5551524922746205, "grad_norm": 2.401318311691284, "learning_rate": 2.2545281220209725e-05, "loss": 4.0151, "step": 20660 }, { "epoch": 0.5552868467015988, "grad_norm": 2.52939510345459, "learning_rate": 2.253847201416315e-05, "loss": 4.151, "step": 20665 }, { "epoch": 0.5554212011285772, "grad_norm": 2.6336584091186523, "learning_rate": 2.2531662808116573e-05, "loss": 4.1574, "step": 20670 }, { "epoch": 0.5555555555555556, "grad_norm": 2.317826271057129, "learning_rate": 2.2524853602070002e-05, "loss": 4.1583, "step": 20675 }, { "epoch": 0.5556899099825339, "grad_norm": 2.409836530685425, "learning_rate": 2.2518044396023425e-05, "loss": 4.2743, "step": 20680 }, { "epoch": 0.5558242644095123, "grad_norm": 2.292531728744507, "learning_rate": 2.251123518997685e-05, "loss": 4.1753, "step": 20685 }, { "epoch": 0.5559586188364907, "grad_norm": 2.4504222869873047, "learning_rate": 2.2504425983930276e-05, "loss": 4.1164, "step": 20690 }, { "epoch": 0.5560929732634691, "grad_norm": 2.6997151374816895, "learning_rate": 2.24976167778837e-05, "loss": 4.1654, "step": 20695 }, { "epoch": 0.5562273276904474, "grad_norm": 2.6537537574768066, "learning_rate": 2.2490807571837124e-05, "loss": 4.1708, "step": 20700 }, { "epoch": 0.5563616821174258, "grad_norm": 2.4333767890930176, "learning_rate": 2.248399836579055e-05, "loss": 4.0793, "step": 20705 }, { "epoch": 0.5564960365444042, "grad_norm": 2.375190496444702, "learning_rate": 2.2477189159743975e-05, "loss": 4.1256, "step": 20710 }, { "epoch": 0.5566303909713826, "grad_norm": 2.3570146560668945, "learning_rate": 2.24703799536974e-05, "loss": 4.1686, "step": 20715 }, { "epoch": 0.5567647453983608, "grad_norm": 2.606595754623413, "learning_rate": 2.2463570747650826e-05, "loss": 4.2137, "step": 20720 }, { "epoch": 0.5568990998253392, "grad_norm": 2.5160701274871826, "learning_rate": 2.245676154160425e-05, "loss": 4.111, "step": 20725 }, { "epoch": 0.5570334542523177, "grad_norm": 2.5048515796661377, "learning_rate": 2.2449952335557674e-05, "loss": 4.0975, "step": 20730 }, { "epoch": 0.5571678086792959, "grad_norm": 2.754521369934082, "learning_rate": 2.24431431295111e-05, "loss": 4.0853, "step": 20735 }, { "epoch": 0.5573021631062743, "grad_norm": 2.6441848278045654, "learning_rate": 2.2436333923464526e-05, "loss": 4.1549, "step": 20740 }, { "epoch": 0.5574365175332527, "grad_norm": 2.5024492740631104, "learning_rate": 2.2429524717417948e-05, "loss": 4.1744, "step": 20745 }, { "epoch": 0.5575708719602311, "grad_norm": 2.540158987045288, "learning_rate": 2.2422715511371377e-05, "loss": 4.2186, "step": 20750 }, { "epoch": 0.5577052263872094, "grad_norm": 2.559013843536377, "learning_rate": 2.24159063053248e-05, "loss": 4.2427, "step": 20755 }, { "epoch": 0.5578395808141878, "grad_norm": 2.4841814041137695, "learning_rate": 2.2409097099278225e-05, "loss": 4.2206, "step": 20760 }, { "epoch": 0.5579739352411662, "grad_norm": 2.331387519836426, "learning_rate": 2.240228789323165e-05, "loss": 4.1377, "step": 20765 }, { "epoch": 0.5581082896681445, "grad_norm": 2.410034656524658, "learning_rate": 2.2395478687185076e-05, "loss": 4.1734, "step": 20770 }, { "epoch": 0.5582426440951229, "grad_norm": 2.638117551803589, "learning_rate": 2.23886694811385e-05, "loss": 4.2508, "step": 20775 }, { "epoch": 0.5583769985221013, "grad_norm": 2.524665355682373, "learning_rate": 2.2381860275091928e-05, "loss": 4.0133, "step": 20780 }, { "epoch": 0.5585113529490797, "grad_norm": 2.251084327697754, "learning_rate": 2.237505106904535e-05, "loss": 4.1248, "step": 20785 }, { "epoch": 0.558645707376058, "grad_norm": 2.410855293273926, "learning_rate": 2.2368241862998776e-05, "loss": 4.1253, "step": 20790 }, { "epoch": 0.5587800618030364, "grad_norm": 2.6031699180603027, "learning_rate": 2.23614326569522e-05, "loss": 4.0467, "step": 20795 }, { "epoch": 0.5589144162300148, "grad_norm": 2.643695831298828, "learning_rate": 2.2354623450905627e-05, "loss": 4.2206, "step": 20800 }, { "epoch": 0.5590487706569931, "grad_norm": 2.3928866386413574, "learning_rate": 2.234781424485905e-05, "loss": 4.1707, "step": 20805 }, { "epoch": 0.5591831250839715, "grad_norm": 2.354193925857544, "learning_rate": 2.2341005038812475e-05, "loss": 4.1936, "step": 20810 }, { "epoch": 0.5593174795109499, "grad_norm": 2.660057544708252, "learning_rate": 2.23341958327659e-05, "loss": 4.2323, "step": 20815 }, { "epoch": 0.5594518339379283, "grad_norm": 2.5935213565826416, "learning_rate": 2.2327386626719326e-05, "loss": 4.1615, "step": 20820 }, { "epoch": 0.5595861883649066, "grad_norm": 2.332077741622925, "learning_rate": 2.2320577420672752e-05, "loss": 4.2397, "step": 20825 }, { "epoch": 0.559720542791885, "grad_norm": 2.9610326290130615, "learning_rate": 2.2313768214626177e-05, "loss": 4.174, "step": 20830 }, { "epoch": 0.5598548972188634, "grad_norm": 2.5245041847229004, "learning_rate": 2.23069590085796e-05, "loss": 4.1049, "step": 20835 }, { "epoch": 0.5599892516458417, "grad_norm": 2.5319952964782715, "learning_rate": 2.2300149802533025e-05, "loss": 4.2179, "step": 20840 }, { "epoch": 0.5601236060728201, "grad_norm": 2.4502251148223877, "learning_rate": 2.229334059648645e-05, "loss": 4.2045, "step": 20845 }, { "epoch": 0.5602579604997985, "grad_norm": 2.5761771202087402, "learning_rate": 2.2286531390439877e-05, "loss": 4.1342, "step": 20850 }, { "epoch": 0.5603923149267769, "grad_norm": 2.6918392181396484, "learning_rate": 2.2279722184393302e-05, "loss": 4.3097, "step": 20855 }, { "epoch": 0.5605266693537552, "grad_norm": 2.4351468086242676, "learning_rate": 2.2272912978346728e-05, "loss": 4.1196, "step": 20860 }, { "epoch": 0.5606610237807336, "grad_norm": 2.493361711502075, "learning_rate": 2.226610377230015e-05, "loss": 4.3482, "step": 20865 }, { "epoch": 0.560795378207712, "grad_norm": 2.2544288635253906, "learning_rate": 2.2259294566253576e-05, "loss": 4.1034, "step": 20870 }, { "epoch": 0.5609297326346903, "grad_norm": 2.2137484550476074, "learning_rate": 2.2252485360207e-05, "loss": 4.1159, "step": 20875 }, { "epoch": 0.5610640870616687, "grad_norm": 2.253361225128174, "learning_rate": 2.2245676154160424e-05, "loss": 4.1537, "step": 20880 }, { "epoch": 0.5611984414886471, "grad_norm": 2.312361478805542, "learning_rate": 2.2238866948113853e-05, "loss": 4.1809, "step": 20885 }, { "epoch": 0.5613327959156255, "grad_norm": 2.561953067779541, "learning_rate": 2.2232057742067275e-05, "loss": 4.1555, "step": 20890 }, { "epoch": 0.5614671503426037, "grad_norm": 2.6642825603485107, "learning_rate": 2.22252485360207e-05, "loss": 4.2089, "step": 20895 }, { "epoch": 0.5616015047695821, "grad_norm": 2.364079475402832, "learning_rate": 2.2218439329974126e-05, "loss": 4.1105, "step": 20900 }, { "epoch": 0.5617358591965605, "grad_norm": 2.522148609161377, "learning_rate": 2.2211630123927552e-05, "loss": 4.153, "step": 20905 }, { "epoch": 0.561870213623539, "grad_norm": 2.6086487770080566, "learning_rate": 2.2204820917880974e-05, "loss": 4.1529, "step": 20910 }, { "epoch": 0.5620045680505172, "grad_norm": 2.3919057846069336, "learning_rate": 2.21980117118344e-05, "loss": 4.0552, "step": 20915 }, { "epoch": 0.5621389224774956, "grad_norm": 2.891925811767578, "learning_rate": 2.2191202505787826e-05, "loss": 4.2503, "step": 20920 }, { "epoch": 0.562273276904474, "grad_norm": 2.4272701740264893, "learning_rate": 2.218439329974125e-05, "loss": 4.0537, "step": 20925 }, { "epoch": 0.5624076313314523, "grad_norm": 2.4619650840759277, "learning_rate": 2.2177584093694677e-05, "loss": 4.1305, "step": 20930 }, { "epoch": 0.5625419857584307, "grad_norm": 2.7332234382629395, "learning_rate": 2.2170774887648103e-05, "loss": 4.0587, "step": 20935 }, { "epoch": 0.5626763401854091, "grad_norm": 2.4257700443267822, "learning_rate": 2.2163965681601525e-05, "loss": 4.1599, "step": 20940 }, { "epoch": 0.5628106946123875, "grad_norm": 2.581960916519165, "learning_rate": 2.215715647555495e-05, "loss": 4.1741, "step": 20945 }, { "epoch": 0.5629450490393658, "grad_norm": 2.698566198348999, "learning_rate": 2.2150347269508376e-05, "loss": 4.0987, "step": 20950 }, { "epoch": 0.5630794034663442, "grad_norm": 2.712111711502075, "learning_rate": 2.2143538063461802e-05, "loss": 4.1611, "step": 20955 }, { "epoch": 0.5632137578933226, "grad_norm": 2.419236183166504, "learning_rate": 2.2136728857415228e-05, "loss": 4.141, "step": 20960 }, { "epoch": 0.5633481123203009, "grad_norm": 3.135730504989624, "learning_rate": 2.2129919651368653e-05, "loss": 4.0541, "step": 20965 }, { "epoch": 0.5634824667472793, "grad_norm": 2.21832537651062, "learning_rate": 2.2123110445322075e-05, "loss": 4.1552, "step": 20970 }, { "epoch": 0.5636168211742577, "grad_norm": 2.5601906776428223, "learning_rate": 2.21163012392755e-05, "loss": 3.9808, "step": 20975 }, { "epoch": 0.5637511756012361, "grad_norm": 2.39929461479187, "learning_rate": 2.2109492033228927e-05, "loss": 4.1359, "step": 20980 }, { "epoch": 0.5638855300282144, "grad_norm": 2.49480938911438, "learning_rate": 2.2102682827182352e-05, "loss": 4.1742, "step": 20985 }, { "epoch": 0.5640198844551928, "grad_norm": 2.5368010997772217, "learning_rate": 2.2095873621135778e-05, "loss": 4.2245, "step": 20990 }, { "epoch": 0.5641542388821712, "grad_norm": 2.842209815979004, "learning_rate": 2.2089064415089204e-05, "loss": 4.1006, "step": 20995 }, { "epoch": 0.5642885933091495, "grad_norm": 2.4134883880615234, "learning_rate": 2.2082255209042626e-05, "loss": 4.1595, "step": 21000 }, { "epoch": 0.5644229477361279, "grad_norm": 2.6063039302825928, "learning_rate": 2.207544600299605e-05, "loss": 4.0428, "step": 21005 }, { "epoch": 0.5645573021631063, "grad_norm": 2.3195695877075195, "learning_rate": 2.2068636796949477e-05, "loss": 4.0183, "step": 21010 }, { "epoch": 0.5646916565900847, "grad_norm": 2.7520480155944824, "learning_rate": 2.20618275909029e-05, "loss": 4.101, "step": 21015 }, { "epoch": 0.564826011017063, "grad_norm": 2.5005245208740234, "learning_rate": 2.2055018384856325e-05, "loss": 4.1274, "step": 21020 }, { "epoch": 0.5649603654440414, "grad_norm": 2.555042028427124, "learning_rate": 2.204820917880975e-05, "loss": 4.1382, "step": 21025 }, { "epoch": 0.5650947198710198, "grad_norm": 2.6153619289398193, "learning_rate": 2.2041399972763177e-05, "loss": 4.2286, "step": 21030 }, { "epoch": 0.5652290742979981, "grad_norm": 2.40244197845459, "learning_rate": 2.2034590766716602e-05, "loss": 4.1739, "step": 21035 }, { "epoch": 0.5653634287249765, "grad_norm": 2.350398063659668, "learning_rate": 2.2027781560670028e-05, "loss": 4.2636, "step": 21040 }, { "epoch": 0.5654977831519549, "grad_norm": 2.4911649227142334, "learning_rate": 2.202097235462345e-05, "loss": 4.1035, "step": 21045 }, { "epoch": 0.5656321375789333, "grad_norm": 2.304189443588257, "learning_rate": 2.2014163148576876e-05, "loss": 4.1321, "step": 21050 }, { "epoch": 0.5657664920059116, "grad_norm": 2.6927993297576904, "learning_rate": 2.20073539425303e-05, "loss": 4.1029, "step": 21055 }, { "epoch": 0.56590084643289, "grad_norm": 2.429262638092041, "learning_rate": 2.2000544736483727e-05, "loss": 4.1829, "step": 21060 }, { "epoch": 0.5660352008598684, "grad_norm": 2.62589955329895, "learning_rate": 2.1993735530437153e-05, "loss": 4.1843, "step": 21065 }, { "epoch": 0.5661695552868466, "grad_norm": 2.417908191680908, "learning_rate": 2.198692632439058e-05, "loss": 4.2379, "step": 21070 }, { "epoch": 0.566303909713825, "grad_norm": 2.3710758686065674, "learning_rate": 2.1980117118344e-05, "loss": 4.1463, "step": 21075 }, { "epoch": 0.5664382641408034, "grad_norm": 2.3683390617370605, "learning_rate": 2.1973307912297426e-05, "loss": 4.1374, "step": 21080 }, { "epoch": 0.5665726185677818, "grad_norm": 2.4248833656311035, "learning_rate": 2.1966498706250852e-05, "loss": 4.2737, "step": 21085 }, { "epoch": 0.5667069729947601, "grad_norm": 2.4917361736297607, "learning_rate": 2.1959689500204278e-05, "loss": 4.1595, "step": 21090 }, { "epoch": 0.5668413274217385, "grad_norm": 2.4692296981811523, "learning_rate": 2.19528802941577e-05, "loss": 4.0705, "step": 21095 }, { "epoch": 0.5669756818487169, "grad_norm": 2.393446922302246, "learning_rate": 2.194607108811113e-05, "loss": 4.1911, "step": 21100 }, { "epoch": 0.5671100362756953, "grad_norm": 2.706038475036621, "learning_rate": 2.193926188206455e-05, "loss": 4.1224, "step": 21105 }, { "epoch": 0.5672443907026736, "grad_norm": 2.3723690509796143, "learning_rate": 2.1932452676017977e-05, "loss": 4.0568, "step": 21110 }, { "epoch": 0.567378745129652, "grad_norm": 2.3602070808410645, "learning_rate": 2.1925643469971403e-05, "loss": 4.0901, "step": 21115 }, { "epoch": 0.5675130995566304, "grad_norm": 2.5478780269622803, "learning_rate": 2.1918834263924828e-05, "loss": 4.1488, "step": 21120 }, { "epoch": 0.5676474539836087, "grad_norm": 2.8540353775024414, "learning_rate": 2.191202505787825e-05, "loss": 4.1776, "step": 21125 }, { "epoch": 0.5677818084105871, "grad_norm": 2.3678102493286133, "learning_rate": 2.190521585183168e-05, "loss": 4.2047, "step": 21130 }, { "epoch": 0.5679161628375655, "grad_norm": 2.411693572998047, "learning_rate": 2.1898406645785102e-05, "loss": 4.1799, "step": 21135 }, { "epoch": 0.5680505172645439, "grad_norm": 2.441145181655884, "learning_rate": 2.1891597439738527e-05, "loss": 3.9997, "step": 21140 }, { "epoch": 0.5681848716915222, "grad_norm": 2.348498582839966, "learning_rate": 2.1884788233691953e-05, "loss": 4.1109, "step": 21145 }, { "epoch": 0.5683192261185006, "grad_norm": 2.433619499206543, "learning_rate": 2.187797902764538e-05, "loss": 4.0969, "step": 21150 }, { "epoch": 0.568453580545479, "grad_norm": 2.2475550174713135, "learning_rate": 2.18711698215988e-05, "loss": 4.0951, "step": 21155 }, { "epoch": 0.5685879349724573, "grad_norm": 2.471480131149292, "learning_rate": 2.186436061555223e-05, "loss": 4.086, "step": 21160 }, { "epoch": 0.5687222893994357, "grad_norm": 2.2577619552612305, "learning_rate": 2.1857551409505652e-05, "loss": 4.1726, "step": 21165 }, { "epoch": 0.5688566438264141, "grad_norm": 2.572582483291626, "learning_rate": 2.1850742203459078e-05, "loss": 4.2507, "step": 21170 }, { "epoch": 0.5689909982533925, "grad_norm": 2.219261884689331, "learning_rate": 2.1843932997412504e-05, "loss": 4.1053, "step": 21175 }, { "epoch": 0.5691253526803708, "grad_norm": 2.6091339588165283, "learning_rate": 2.1837123791365926e-05, "loss": 4.2243, "step": 21180 }, { "epoch": 0.5692597071073492, "grad_norm": 2.366713047027588, "learning_rate": 2.183031458531935e-05, "loss": 4.348, "step": 21185 }, { "epoch": 0.5693940615343276, "grad_norm": 2.5276148319244385, "learning_rate": 2.1823505379272777e-05, "loss": 4.1545, "step": 21190 }, { "epoch": 0.5695284159613059, "grad_norm": 2.3242762088775635, "learning_rate": 2.1816696173226203e-05, "loss": 4.1163, "step": 21195 }, { "epoch": 0.5696627703882843, "grad_norm": 2.544060468673706, "learning_rate": 2.1809886967179625e-05, "loss": 4.1426, "step": 21200 }, { "epoch": 0.5697971248152627, "grad_norm": 2.455115795135498, "learning_rate": 2.1803077761133054e-05, "loss": 3.987, "step": 21205 }, { "epoch": 0.5699314792422411, "grad_norm": 2.310176134109497, "learning_rate": 2.1796268555086477e-05, "loss": 4.1611, "step": 21210 }, { "epoch": 0.5700658336692194, "grad_norm": 2.234252691268921, "learning_rate": 2.1789459349039902e-05, "loss": 4.2473, "step": 21215 }, { "epoch": 0.5702001880961978, "grad_norm": 2.49185848236084, "learning_rate": 2.1782650142993328e-05, "loss": 4.1715, "step": 21220 }, { "epoch": 0.5703345425231762, "grad_norm": 2.4109110832214355, "learning_rate": 2.1775840936946754e-05, "loss": 4.224, "step": 21225 }, { "epoch": 0.5704688969501545, "grad_norm": 2.588263988494873, "learning_rate": 2.1769031730900176e-05, "loss": 4.201, "step": 21230 }, { "epoch": 0.5706032513771329, "grad_norm": 2.3941123485565186, "learning_rate": 2.1762222524853605e-05, "loss": 4.1412, "step": 21235 }, { "epoch": 0.5707376058041113, "grad_norm": 2.6348400115966797, "learning_rate": 2.1755413318807027e-05, "loss": 4.0545, "step": 21240 }, { "epoch": 0.5708719602310897, "grad_norm": 2.5924599170684814, "learning_rate": 2.1748604112760453e-05, "loss": 4.1349, "step": 21245 }, { "epoch": 0.571006314658068, "grad_norm": 2.41534686088562, "learning_rate": 2.174179490671388e-05, "loss": 4.1472, "step": 21250 }, { "epoch": 0.5711406690850463, "grad_norm": 2.1966664791107178, "learning_rate": 2.1734985700667304e-05, "loss": 4.1537, "step": 21255 }, { "epoch": 0.5712750235120247, "grad_norm": 2.8363473415374756, "learning_rate": 2.1728176494620726e-05, "loss": 3.9646, "step": 21260 }, { "epoch": 0.571409377939003, "grad_norm": 2.4740140438079834, "learning_rate": 2.1721367288574155e-05, "loss": 4.1995, "step": 21265 }, { "epoch": 0.5715437323659814, "grad_norm": 2.234431743621826, "learning_rate": 2.1714558082527578e-05, "loss": 4.183, "step": 21270 }, { "epoch": 0.5716780867929598, "grad_norm": 2.312194347381592, "learning_rate": 2.1707748876481003e-05, "loss": 4.1702, "step": 21275 }, { "epoch": 0.5718124412199382, "grad_norm": 2.1881096363067627, "learning_rate": 2.170093967043443e-05, "loss": 4.1251, "step": 21280 }, { "epoch": 0.5719467956469165, "grad_norm": 2.6155736446380615, "learning_rate": 2.1694130464387855e-05, "loss": 4.1716, "step": 21285 }, { "epoch": 0.5720811500738949, "grad_norm": 2.5879318714141846, "learning_rate": 2.1687321258341277e-05, "loss": 4.1118, "step": 21290 }, { "epoch": 0.5722155045008733, "grad_norm": 2.5080370903015137, "learning_rate": 2.1680512052294706e-05, "loss": 4.2132, "step": 21295 }, { "epoch": 0.5723498589278516, "grad_norm": 2.4968149662017822, "learning_rate": 2.1673702846248128e-05, "loss": 4.1445, "step": 21300 }, { "epoch": 0.57248421335483, "grad_norm": 2.4467763900756836, "learning_rate": 2.1666893640201554e-05, "loss": 4.1272, "step": 21305 }, { "epoch": 0.5726185677818084, "grad_norm": 2.5330915451049805, "learning_rate": 2.166008443415498e-05, "loss": 4.0701, "step": 21310 }, { "epoch": 0.5727529222087868, "grad_norm": 2.406627655029297, "learning_rate": 2.1653275228108402e-05, "loss": 4.0937, "step": 21315 }, { "epoch": 0.5728872766357651, "grad_norm": 2.6535279750823975, "learning_rate": 2.1646466022061827e-05, "loss": 4.0587, "step": 21320 }, { "epoch": 0.5730216310627435, "grad_norm": 2.5053555965423584, "learning_rate": 2.1639656816015253e-05, "loss": 4.2499, "step": 21325 }, { "epoch": 0.5731559854897219, "grad_norm": 2.459890127182007, "learning_rate": 2.163284760996868e-05, "loss": 4.3215, "step": 21330 }, { "epoch": 0.5732903399167003, "grad_norm": 2.328902244567871, "learning_rate": 2.16260384039221e-05, "loss": 4.0986, "step": 21335 }, { "epoch": 0.5734246943436786, "grad_norm": 2.621497631072998, "learning_rate": 2.161922919787553e-05, "loss": 4.1851, "step": 21340 }, { "epoch": 0.573559048770657, "grad_norm": 2.6003329753875732, "learning_rate": 2.1612419991828952e-05, "loss": 4.2634, "step": 21345 }, { "epoch": 0.5736934031976354, "grad_norm": 2.21806263923645, "learning_rate": 2.1605610785782378e-05, "loss": 4.1281, "step": 21350 }, { "epoch": 0.5738277576246137, "grad_norm": 2.319343328475952, "learning_rate": 2.1598801579735804e-05, "loss": 4.1196, "step": 21355 }, { "epoch": 0.5739621120515921, "grad_norm": 2.5140366554260254, "learning_rate": 2.159199237368923e-05, "loss": 4.2326, "step": 21360 }, { "epoch": 0.5740964664785705, "grad_norm": 2.5040814876556396, "learning_rate": 2.158518316764265e-05, "loss": 4.1909, "step": 21365 }, { "epoch": 0.5742308209055489, "grad_norm": 2.4275403022766113, "learning_rate": 2.157837396159608e-05, "loss": 4.1706, "step": 21370 }, { "epoch": 0.5743651753325272, "grad_norm": 2.3612277507781982, "learning_rate": 2.1571564755549503e-05, "loss": 4.1657, "step": 21375 }, { "epoch": 0.5744995297595056, "grad_norm": 2.4991815090179443, "learning_rate": 2.156475554950293e-05, "loss": 4.1876, "step": 21380 }, { "epoch": 0.574633884186484, "grad_norm": 2.522857904434204, "learning_rate": 2.1557946343456354e-05, "loss": 4.0154, "step": 21385 }, { "epoch": 0.5747682386134623, "grad_norm": 2.778536081314087, "learning_rate": 2.155113713740978e-05, "loss": 4.127, "step": 21390 }, { "epoch": 0.5749025930404407, "grad_norm": 2.424699544906616, "learning_rate": 2.1544327931363202e-05, "loss": 4.079, "step": 21395 }, { "epoch": 0.5750369474674191, "grad_norm": 2.8420650959014893, "learning_rate": 2.153751872531663e-05, "loss": 4.1867, "step": 21400 }, { "epoch": 0.5751713018943975, "grad_norm": 2.427560567855835, "learning_rate": 2.1530709519270053e-05, "loss": 4.1642, "step": 21405 }, { "epoch": 0.5753056563213758, "grad_norm": 2.263148784637451, "learning_rate": 2.152390031322348e-05, "loss": 4.0444, "step": 21410 }, { "epoch": 0.5754400107483542, "grad_norm": 2.7196755409240723, "learning_rate": 2.1517091107176905e-05, "loss": 4.0274, "step": 21415 }, { "epoch": 0.5755743651753326, "grad_norm": 2.6666336059570312, "learning_rate": 2.151028190113033e-05, "loss": 4.2947, "step": 21420 }, { "epoch": 0.5757087196023108, "grad_norm": 2.3433547019958496, "learning_rate": 2.1503472695083753e-05, "loss": 4.1682, "step": 21425 }, { "epoch": 0.5758430740292892, "grad_norm": 2.3195765018463135, "learning_rate": 2.1496663489037182e-05, "loss": 4.1648, "step": 21430 }, { "epoch": 0.5759774284562676, "grad_norm": 2.3919289112091064, "learning_rate": 2.1489854282990604e-05, "loss": 4.141, "step": 21435 }, { "epoch": 0.576111782883246, "grad_norm": 2.2470927238464355, "learning_rate": 2.148304507694403e-05, "loss": 4.232, "step": 21440 }, { "epoch": 0.5762461373102243, "grad_norm": 2.4836103916168213, "learning_rate": 2.1476235870897455e-05, "loss": 4.0608, "step": 21445 }, { "epoch": 0.5763804917372027, "grad_norm": 2.297057867050171, "learning_rate": 2.146942666485088e-05, "loss": 4.0939, "step": 21450 }, { "epoch": 0.5765148461641811, "grad_norm": 2.409677028656006, "learning_rate": 2.1462617458804303e-05, "loss": 4.2906, "step": 21455 }, { "epoch": 0.5766492005911594, "grad_norm": 2.8025062084198, "learning_rate": 2.145580825275773e-05, "loss": 4.1385, "step": 21460 }, { "epoch": 0.5767835550181378, "grad_norm": 2.5799732208251953, "learning_rate": 2.1448999046711155e-05, "loss": 4.247, "step": 21465 }, { "epoch": 0.5769179094451162, "grad_norm": 2.610898494720459, "learning_rate": 2.1442189840664577e-05, "loss": 4.1302, "step": 21470 }, { "epoch": 0.5770522638720946, "grad_norm": 2.526172399520874, "learning_rate": 2.1435380634618006e-05, "loss": 4.1503, "step": 21475 }, { "epoch": 0.5771866182990729, "grad_norm": 2.4924721717834473, "learning_rate": 2.1428571428571428e-05, "loss": 4.1262, "step": 21480 }, { "epoch": 0.5773209727260513, "grad_norm": 2.412170171737671, "learning_rate": 2.1421762222524854e-05, "loss": 4.1073, "step": 21485 }, { "epoch": 0.5774553271530297, "grad_norm": 2.571408271789551, "learning_rate": 2.141495301647828e-05, "loss": 4.154, "step": 21490 }, { "epoch": 0.577589681580008, "grad_norm": 2.2729480266571045, "learning_rate": 2.1408143810431705e-05, "loss": 4.1942, "step": 21495 }, { "epoch": 0.5777240360069864, "grad_norm": 2.198112964630127, "learning_rate": 2.1401334604385127e-05, "loss": 4.1168, "step": 21500 }, { "epoch": 0.5778583904339648, "grad_norm": 2.3941686153411865, "learning_rate": 2.1394525398338556e-05, "loss": 4.0659, "step": 21505 }, { "epoch": 0.5779927448609432, "grad_norm": 2.497990846633911, "learning_rate": 2.138771619229198e-05, "loss": 4.1463, "step": 21510 }, { "epoch": 0.5781270992879215, "grad_norm": 2.365633249282837, "learning_rate": 2.1380906986245404e-05, "loss": 4.1456, "step": 21515 }, { "epoch": 0.5782614537148999, "grad_norm": 2.716989755630493, "learning_rate": 2.137409778019883e-05, "loss": 4.1191, "step": 21520 }, { "epoch": 0.5783958081418783, "grad_norm": 2.3200552463531494, "learning_rate": 2.1367288574152256e-05, "loss": 4.1264, "step": 21525 }, { "epoch": 0.5785301625688567, "grad_norm": 2.610867738723755, "learning_rate": 2.1360479368105678e-05, "loss": 4.1158, "step": 21530 }, { "epoch": 0.578664516995835, "grad_norm": 2.740169048309326, "learning_rate": 2.1353670162059107e-05, "loss": 4.1211, "step": 21535 }, { "epoch": 0.5787988714228134, "grad_norm": 2.4921469688415527, "learning_rate": 2.134686095601253e-05, "loss": 4.06, "step": 21540 }, { "epoch": 0.5789332258497918, "grad_norm": 2.4875035285949707, "learning_rate": 2.1340051749965955e-05, "loss": 4.1232, "step": 21545 }, { "epoch": 0.5790675802767701, "grad_norm": 2.601644992828369, "learning_rate": 2.133324254391938e-05, "loss": 4.1255, "step": 21550 }, { "epoch": 0.5792019347037485, "grad_norm": 2.507610559463501, "learning_rate": 2.1326433337872806e-05, "loss": 4.1206, "step": 21555 }, { "epoch": 0.5793362891307269, "grad_norm": 2.703639268875122, "learning_rate": 2.131962413182623e-05, "loss": 4.1207, "step": 21560 }, { "epoch": 0.5794706435577053, "grad_norm": 2.456883430480957, "learning_rate": 2.1312814925779658e-05, "loss": 4.0974, "step": 21565 }, { "epoch": 0.5796049979846836, "grad_norm": 2.650303840637207, "learning_rate": 2.130600571973308e-05, "loss": 4.167, "step": 21570 }, { "epoch": 0.579739352411662, "grad_norm": 2.395326614379883, "learning_rate": 2.1299196513686505e-05, "loss": 4.2058, "step": 21575 }, { "epoch": 0.5798737068386404, "grad_norm": 2.5663039684295654, "learning_rate": 2.129238730763993e-05, "loss": 4.0195, "step": 21580 }, { "epoch": 0.5800080612656187, "grad_norm": 2.563399076461792, "learning_rate": 2.1285578101593357e-05, "loss": 4.2077, "step": 21585 }, { "epoch": 0.5801424156925971, "grad_norm": 2.460015058517456, "learning_rate": 2.127876889554678e-05, "loss": 4.1429, "step": 21590 }, { "epoch": 0.5802767701195755, "grad_norm": 2.565241813659668, "learning_rate": 2.1271959689500208e-05, "loss": 4.2122, "step": 21595 }, { "epoch": 0.5804111245465539, "grad_norm": 2.3245444297790527, "learning_rate": 2.126515048345363e-05, "loss": 4.1782, "step": 21600 }, { "epoch": 0.5805454789735321, "grad_norm": 2.3632869720458984, "learning_rate": 2.1258341277407056e-05, "loss": 4.2287, "step": 21605 }, { "epoch": 0.5806798334005105, "grad_norm": 2.4487085342407227, "learning_rate": 2.125153207136048e-05, "loss": 4.0185, "step": 21610 }, { "epoch": 0.580814187827489, "grad_norm": 2.6842966079711914, "learning_rate": 2.1244722865313904e-05, "loss": 4.0466, "step": 21615 }, { "epoch": 0.5809485422544672, "grad_norm": 2.2872655391693115, "learning_rate": 2.123791365926733e-05, "loss": 4.1465, "step": 21620 }, { "epoch": 0.5810828966814456, "grad_norm": 2.5575578212738037, "learning_rate": 2.1231104453220755e-05, "loss": 4.0063, "step": 21625 }, { "epoch": 0.581217251108424, "grad_norm": 2.6015141010284424, "learning_rate": 2.122429524717418e-05, "loss": 4.0867, "step": 21630 }, { "epoch": 0.5813516055354024, "grad_norm": 2.2065632343292236, "learning_rate": 2.1217486041127603e-05, "loss": 4.0896, "step": 21635 }, { "epoch": 0.5814859599623807, "grad_norm": 2.5754806995391846, "learning_rate": 2.1210676835081032e-05, "loss": 4.2525, "step": 21640 }, { "epoch": 0.5816203143893591, "grad_norm": 2.376110792160034, "learning_rate": 2.1203867629034455e-05, "loss": 4.0846, "step": 21645 }, { "epoch": 0.5817546688163375, "grad_norm": 2.5564475059509277, "learning_rate": 2.119705842298788e-05, "loss": 4.1456, "step": 21650 }, { "epoch": 0.5818890232433158, "grad_norm": 2.4386701583862305, "learning_rate": 2.1190249216941306e-05, "loss": 4.0591, "step": 21655 }, { "epoch": 0.5820233776702942, "grad_norm": 2.3748061656951904, "learning_rate": 2.118344001089473e-05, "loss": 4.0772, "step": 21660 }, { "epoch": 0.5821577320972726, "grad_norm": 2.557924747467041, "learning_rate": 2.1176630804848154e-05, "loss": 4.0266, "step": 21665 }, { "epoch": 0.582292086524251, "grad_norm": 2.337669849395752, "learning_rate": 2.1169821598801583e-05, "loss": 4.1596, "step": 21670 }, { "epoch": 0.5824264409512293, "grad_norm": 2.524343729019165, "learning_rate": 2.1163012392755005e-05, "loss": 4.2227, "step": 21675 }, { "epoch": 0.5825607953782077, "grad_norm": 2.3988471031188965, "learning_rate": 2.115620318670843e-05, "loss": 4.0089, "step": 21680 }, { "epoch": 0.5826951498051861, "grad_norm": 2.673433780670166, "learning_rate": 2.1149393980661856e-05, "loss": 4.21, "step": 21685 }, { "epoch": 0.5828295042321644, "grad_norm": 2.5309226512908936, "learning_rate": 2.1142584774615282e-05, "loss": 4.2518, "step": 21690 }, { "epoch": 0.5829638586591428, "grad_norm": 2.3771448135375977, "learning_rate": 2.1135775568568704e-05, "loss": 4.2681, "step": 21695 }, { "epoch": 0.5830982130861212, "grad_norm": 2.400330066680908, "learning_rate": 2.1128966362522133e-05, "loss": 4.0531, "step": 21700 }, { "epoch": 0.5832325675130996, "grad_norm": 2.4196760654449463, "learning_rate": 2.1122157156475556e-05, "loss": 4.1098, "step": 21705 }, { "epoch": 0.5833669219400779, "grad_norm": 2.4828715324401855, "learning_rate": 2.111534795042898e-05, "loss": 4.1702, "step": 21710 }, { "epoch": 0.5835012763670563, "grad_norm": 2.8380351066589355, "learning_rate": 2.1108538744382407e-05, "loss": 4.1871, "step": 21715 }, { "epoch": 0.5836356307940347, "grad_norm": 2.6905953884124756, "learning_rate": 2.1101729538335833e-05, "loss": 4.1665, "step": 21720 }, { "epoch": 0.583769985221013, "grad_norm": 2.534278392791748, "learning_rate": 2.1094920332289255e-05, "loss": 4.1575, "step": 21725 }, { "epoch": 0.5839043396479914, "grad_norm": 2.3490195274353027, "learning_rate": 2.1088111126242684e-05, "loss": 4.0772, "step": 21730 }, { "epoch": 0.5840386940749698, "grad_norm": 2.5073323249816895, "learning_rate": 2.1081301920196106e-05, "loss": 4.078, "step": 21735 }, { "epoch": 0.5841730485019482, "grad_norm": 2.3051095008850098, "learning_rate": 2.1074492714149532e-05, "loss": 4.2001, "step": 21740 }, { "epoch": 0.5843074029289265, "grad_norm": 2.3367397785186768, "learning_rate": 2.1067683508102957e-05, "loss": 4.1001, "step": 21745 }, { "epoch": 0.5844417573559049, "grad_norm": 2.6593167781829834, "learning_rate": 2.1060874302056383e-05, "loss": 4.2346, "step": 21750 }, { "epoch": 0.5845761117828833, "grad_norm": 2.541367530822754, "learning_rate": 2.1054065096009805e-05, "loss": 4.2642, "step": 21755 }, { "epoch": 0.5847104662098617, "grad_norm": 2.3647971153259277, "learning_rate": 2.104725588996323e-05, "loss": 4.1797, "step": 21760 }, { "epoch": 0.58484482063684, "grad_norm": 2.5706794261932373, "learning_rate": 2.1040446683916657e-05, "loss": 4.1423, "step": 21765 }, { "epoch": 0.5849791750638184, "grad_norm": 2.45180082321167, "learning_rate": 2.103363747787008e-05, "loss": 4.1894, "step": 21770 }, { "epoch": 0.5851135294907968, "grad_norm": 2.325651168823242, "learning_rate": 2.1026828271823508e-05, "loss": 4.1712, "step": 21775 }, { "epoch": 0.585247883917775, "grad_norm": 2.274667501449585, "learning_rate": 2.102001906577693e-05, "loss": 4.1197, "step": 21780 }, { "epoch": 0.5853822383447534, "grad_norm": 2.317607879638672, "learning_rate": 2.1013209859730356e-05, "loss": 4.0482, "step": 21785 }, { "epoch": 0.5855165927717318, "grad_norm": 2.290598154067993, "learning_rate": 2.100640065368378e-05, "loss": 4.1337, "step": 21790 }, { "epoch": 0.5856509471987102, "grad_norm": 2.2927801609039307, "learning_rate": 2.0999591447637207e-05, "loss": 4.1949, "step": 21795 }, { "epoch": 0.5857853016256885, "grad_norm": 2.4697580337524414, "learning_rate": 2.099278224159063e-05, "loss": 4.2268, "step": 21800 }, { "epoch": 0.5859196560526669, "grad_norm": 2.3395369052886963, "learning_rate": 2.098597303554406e-05, "loss": 4.131, "step": 21805 }, { "epoch": 0.5860540104796453, "grad_norm": 2.3884780406951904, "learning_rate": 2.097916382949748e-05, "loss": 4.1601, "step": 21810 }, { "epoch": 0.5861883649066236, "grad_norm": 2.6849544048309326, "learning_rate": 2.0972354623450907e-05, "loss": 4.2637, "step": 21815 }, { "epoch": 0.586322719333602, "grad_norm": 2.737492322921753, "learning_rate": 2.0965545417404332e-05, "loss": 4.1959, "step": 21820 }, { "epoch": 0.5864570737605804, "grad_norm": 2.4521985054016113, "learning_rate": 2.0958736211357758e-05, "loss": 4.1551, "step": 21825 }, { "epoch": 0.5865914281875588, "grad_norm": 2.733161211013794, "learning_rate": 2.095192700531118e-05, "loss": 4.194, "step": 21830 }, { "epoch": 0.5867257826145371, "grad_norm": 2.5179874897003174, "learning_rate": 2.094511779926461e-05, "loss": 4.1532, "step": 21835 }, { "epoch": 0.5868601370415155, "grad_norm": 2.318056344985962, "learning_rate": 2.093830859321803e-05, "loss": 4.2084, "step": 21840 }, { "epoch": 0.5869944914684939, "grad_norm": 2.405965566635132, "learning_rate": 2.0931499387171457e-05, "loss": 4.1152, "step": 21845 }, { "epoch": 0.5871288458954722, "grad_norm": 2.3581771850585938, "learning_rate": 2.0924690181124883e-05, "loss": 4.0698, "step": 21850 }, { "epoch": 0.5872632003224506, "grad_norm": 2.7119951248168945, "learning_rate": 2.091788097507831e-05, "loss": 4.2815, "step": 21855 }, { "epoch": 0.587397554749429, "grad_norm": 2.4672129154205322, "learning_rate": 2.091107176903173e-05, "loss": 4.0579, "step": 21860 }, { "epoch": 0.5875319091764074, "grad_norm": 2.7595431804656982, "learning_rate": 2.0904262562985156e-05, "loss": 4.1299, "step": 21865 }, { "epoch": 0.5876662636033857, "grad_norm": 2.457031488418579, "learning_rate": 2.0897453356938582e-05, "loss": 4.0784, "step": 21870 }, { "epoch": 0.5878006180303641, "grad_norm": 2.6421079635620117, "learning_rate": 2.0890644150892008e-05, "loss": 4.15, "step": 21875 }, { "epoch": 0.5879349724573425, "grad_norm": 2.50795578956604, "learning_rate": 2.0883834944845433e-05, "loss": 4.1966, "step": 21880 }, { "epoch": 0.5880693268843208, "grad_norm": 2.4230599403381348, "learning_rate": 2.087702573879886e-05, "loss": 4.0272, "step": 21885 }, { "epoch": 0.5882036813112992, "grad_norm": 2.385113000869751, "learning_rate": 2.087021653275228e-05, "loss": 3.9862, "step": 21890 }, { "epoch": 0.5883380357382776, "grad_norm": 2.7116150856018066, "learning_rate": 2.0863407326705707e-05, "loss": 4.1324, "step": 21895 }, { "epoch": 0.588472390165256, "grad_norm": 2.4621129035949707, "learning_rate": 2.0856598120659133e-05, "loss": 4.2575, "step": 21900 }, { "epoch": 0.5886067445922343, "grad_norm": 2.600630521774292, "learning_rate": 2.0849788914612555e-05, "loss": 4.1075, "step": 21905 }, { "epoch": 0.5887410990192127, "grad_norm": 2.488813638687134, "learning_rate": 2.0842979708565984e-05, "loss": 4.1968, "step": 21910 }, { "epoch": 0.5888754534461911, "grad_norm": 2.484891653060913, "learning_rate": 2.0836170502519406e-05, "loss": 4.1315, "step": 21915 }, { "epoch": 0.5890098078731694, "grad_norm": 2.4340641498565674, "learning_rate": 2.0829361296472832e-05, "loss": 4.2214, "step": 21920 }, { "epoch": 0.5891441623001478, "grad_norm": 2.7433059215545654, "learning_rate": 2.0822552090426257e-05, "loss": 4.1408, "step": 21925 }, { "epoch": 0.5892785167271262, "grad_norm": 2.3965468406677246, "learning_rate": 2.0815742884379683e-05, "loss": 4.2074, "step": 21930 }, { "epoch": 0.5894128711541046, "grad_norm": 2.3174972534179688, "learning_rate": 2.0808933678333105e-05, "loss": 4.2261, "step": 21935 }, { "epoch": 0.5895472255810829, "grad_norm": 2.507331371307373, "learning_rate": 2.080212447228653e-05, "loss": 4.1473, "step": 21940 }, { "epoch": 0.5896815800080613, "grad_norm": 2.5130231380462646, "learning_rate": 2.0795315266239957e-05, "loss": 4.1083, "step": 21945 }, { "epoch": 0.5898159344350397, "grad_norm": 2.4337618350982666, "learning_rate": 2.0788506060193382e-05, "loss": 4.1723, "step": 21950 }, { "epoch": 0.5899502888620181, "grad_norm": 2.610342502593994, "learning_rate": 2.0781696854146808e-05, "loss": 4.1378, "step": 21955 }, { "epoch": 0.5900846432889963, "grad_norm": 2.3479819297790527, "learning_rate": 2.0774887648100234e-05, "loss": 4.1549, "step": 21960 }, { "epoch": 0.5902189977159747, "grad_norm": 2.491716146469116, "learning_rate": 2.0768078442053656e-05, "loss": 3.9447, "step": 21965 }, { "epoch": 0.5903533521429531, "grad_norm": 2.4012537002563477, "learning_rate": 2.076126923600708e-05, "loss": 4.1278, "step": 21970 }, { "epoch": 0.5904877065699314, "grad_norm": 2.4862544536590576, "learning_rate": 2.0754460029960507e-05, "loss": 4.2167, "step": 21975 }, { "epoch": 0.5906220609969098, "grad_norm": 2.5078492164611816, "learning_rate": 2.0747650823913933e-05, "loss": 4.2002, "step": 21980 }, { "epoch": 0.5907564154238882, "grad_norm": 2.36895489692688, "learning_rate": 2.074084161786736e-05, "loss": 4.2097, "step": 21985 }, { "epoch": 0.5908907698508666, "grad_norm": 2.631854295730591, "learning_rate": 2.0734032411820784e-05, "loss": 4.1482, "step": 21990 }, { "epoch": 0.5910251242778449, "grad_norm": 2.496811628341675, "learning_rate": 2.0727223205774206e-05, "loss": 4.2098, "step": 21995 }, { "epoch": 0.5911594787048233, "grad_norm": 2.411099910736084, "learning_rate": 2.0720413999727632e-05, "loss": 4.2255, "step": 22000 }, { "epoch": 0.5912938331318017, "grad_norm": 2.4848039150238037, "learning_rate": 2.0713604793681058e-05, "loss": 3.9915, "step": 22005 }, { "epoch": 0.59142818755878, "grad_norm": 2.4488344192504883, "learning_rate": 2.0706795587634483e-05, "loss": 4.0866, "step": 22010 }, { "epoch": 0.5915625419857584, "grad_norm": 2.357998847961426, "learning_rate": 2.069998638158791e-05, "loss": 4.129, "step": 22015 }, { "epoch": 0.5916968964127368, "grad_norm": 2.5680506229400635, "learning_rate": 2.0693177175541335e-05, "loss": 4.0841, "step": 22020 }, { "epoch": 0.5918312508397152, "grad_norm": 2.5710983276367188, "learning_rate": 2.0686367969494757e-05, "loss": 4.2075, "step": 22025 }, { "epoch": 0.5919656052666935, "grad_norm": 2.276674270629883, "learning_rate": 2.0679558763448183e-05, "loss": 4.1095, "step": 22030 }, { "epoch": 0.5920999596936719, "grad_norm": 2.4630093574523926, "learning_rate": 2.067274955740161e-05, "loss": 4.1682, "step": 22035 }, { "epoch": 0.5922343141206503, "grad_norm": 2.5769236087799072, "learning_rate": 2.0665940351355034e-05, "loss": 4.1505, "step": 22040 }, { "epoch": 0.5923686685476286, "grad_norm": 2.5042190551757812, "learning_rate": 2.0659131145308456e-05, "loss": 4.0999, "step": 22045 }, { "epoch": 0.592503022974607, "grad_norm": 2.5071041584014893, "learning_rate": 2.0652321939261885e-05, "loss": 4.1222, "step": 22050 }, { "epoch": 0.5926373774015854, "grad_norm": 2.7088139057159424, "learning_rate": 2.0645512733215308e-05, "loss": 4.2416, "step": 22055 }, { "epoch": 0.5927717318285638, "grad_norm": 2.429441213607788, "learning_rate": 2.0638703527168733e-05, "loss": 4.2183, "step": 22060 }, { "epoch": 0.5929060862555421, "grad_norm": 2.394956588745117, "learning_rate": 2.063189432112216e-05, "loss": 4.0863, "step": 22065 }, { "epoch": 0.5930404406825205, "grad_norm": 2.723422050476074, "learning_rate": 2.062508511507558e-05, "loss": 4.11, "step": 22070 }, { "epoch": 0.5931747951094989, "grad_norm": 2.4403798580169678, "learning_rate": 2.0618275909029007e-05, "loss": 4.1297, "step": 22075 }, { "epoch": 0.5933091495364772, "grad_norm": 2.635958433151245, "learning_rate": 2.0611466702982432e-05, "loss": 4.0512, "step": 22080 }, { "epoch": 0.5934435039634556, "grad_norm": 2.4640920162200928, "learning_rate": 2.0604657496935858e-05, "loss": 4.16, "step": 22085 }, { "epoch": 0.593577858390434, "grad_norm": 2.5697410106658936, "learning_rate": 2.0597848290889284e-05, "loss": 4.1867, "step": 22090 }, { "epoch": 0.5937122128174124, "grad_norm": 2.4925036430358887, "learning_rate": 2.059103908484271e-05, "loss": 4.1497, "step": 22095 }, { "epoch": 0.5938465672443907, "grad_norm": 2.5219483375549316, "learning_rate": 2.0584229878796132e-05, "loss": 4.1575, "step": 22100 }, { "epoch": 0.5939809216713691, "grad_norm": 2.61572003364563, "learning_rate": 2.0577420672749557e-05, "loss": 4.0744, "step": 22105 }, { "epoch": 0.5941152760983475, "grad_norm": 2.399329900741577, "learning_rate": 2.0570611466702983e-05, "loss": 4.1379, "step": 22110 }, { "epoch": 0.5942496305253258, "grad_norm": 2.555159091949463, "learning_rate": 2.056380226065641e-05, "loss": 4.2174, "step": 22115 }, { "epoch": 0.5943839849523042, "grad_norm": 2.8729875087738037, "learning_rate": 2.0556993054609834e-05, "loss": 4.2415, "step": 22120 }, { "epoch": 0.5945183393792826, "grad_norm": 2.3400285243988037, "learning_rate": 2.055018384856326e-05, "loss": 4.1075, "step": 22125 }, { "epoch": 0.594652693806261, "grad_norm": 2.435699939727783, "learning_rate": 2.0543374642516682e-05, "loss": 4.024, "step": 22130 }, { "epoch": 0.5947870482332392, "grad_norm": 2.5164711475372314, "learning_rate": 2.0536565436470108e-05, "loss": 4.0939, "step": 22135 }, { "epoch": 0.5949214026602176, "grad_norm": 2.3962244987487793, "learning_rate": 2.0529756230423534e-05, "loss": 4.1665, "step": 22140 }, { "epoch": 0.595055757087196, "grad_norm": 2.6777961254119873, "learning_rate": 2.052294702437696e-05, "loss": 4.2075, "step": 22145 }, { "epoch": 0.5951901115141744, "grad_norm": 2.7548539638519287, "learning_rate": 2.051613781833038e-05, "loss": 4.1348, "step": 22150 }, { "epoch": 0.5953244659411527, "grad_norm": 2.589216470718384, "learning_rate": 2.050932861228381e-05, "loss": 4.1641, "step": 22155 }, { "epoch": 0.5954588203681311, "grad_norm": 2.634667158126831, "learning_rate": 2.0502519406237233e-05, "loss": 4.1732, "step": 22160 }, { "epoch": 0.5955931747951095, "grad_norm": 2.416656494140625, "learning_rate": 2.049571020019066e-05, "loss": 4.0596, "step": 22165 }, { "epoch": 0.5957275292220878, "grad_norm": 2.495785713195801, "learning_rate": 2.0488900994144084e-05, "loss": 4.1884, "step": 22170 }, { "epoch": 0.5958618836490662, "grad_norm": 2.389918088912964, "learning_rate": 2.048209178809751e-05, "loss": 4.1949, "step": 22175 }, { "epoch": 0.5959962380760446, "grad_norm": 2.7620787620544434, "learning_rate": 2.0475282582050932e-05, "loss": 4.1606, "step": 22180 }, { "epoch": 0.596130592503023, "grad_norm": 2.427457809448242, "learning_rate": 2.046847337600436e-05, "loss": 4.1459, "step": 22185 }, { "epoch": 0.5962649469300013, "grad_norm": 2.3366200923919678, "learning_rate": 2.0461664169957783e-05, "loss": 4.1681, "step": 22190 }, { "epoch": 0.5963993013569797, "grad_norm": 2.52113938331604, "learning_rate": 2.045485496391121e-05, "loss": 4.0902, "step": 22195 }, { "epoch": 0.5965336557839581, "grad_norm": 2.553727149963379, "learning_rate": 2.0448045757864635e-05, "loss": 3.9956, "step": 22200 }, { "epoch": 0.5966680102109364, "grad_norm": 2.5613043308258057, "learning_rate": 2.0441236551818057e-05, "loss": 3.9883, "step": 22205 }, { "epoch": 0.5968023646379148, "grad_norm": 2.2177212238311768, "learning_rate": 2.0434427345771483e-05, "loss": 4.1396, "step": 22210 }, { "epoch": 0.5969367190648932, "grad_norm": 2.3658573627471924, "learning_rate": 2.0427618139724908e-05, "loss": 4.0434, "step": 22215 }, { "epoch": 0.5970710734918716, "grad_norm": 2.5011370182037354, "learning_rate": 2.0420808933678334e-05, "loss": 4.0113, "step": 22220 }, { "epoch": 0.5972054279188499, "grad_norm": 2.560046672821045, "learning_rate": 2.041399972763176e-05, "loss": 4.1362, "step": 22225 }, { "epoch": 0.5973397823458283, "grad_norm": 2.45143723487854, "learning_rate": 2.0407190521585185e-05, "loss": 4.1644, "step": 22230 }, { "epoch": 0.5974741367728067, "grad_norm": 2.7357852458953857, "learning_rate": 2.0400381315538608e-05, "loss": 4.145, "step": 22235 }, { "epoch": 0.597608491199785, "grad_norm": 2.4763541221618652, "learning_rate": 2.0393572109492033e-05, "loss": 4.1228, "step": 22240 }, { "epoch": 0.5977428456267634, "grad_norm": 2.6149709224700928, "learning_rate": 2.038676290344546e-05, "loss": 4.0492, "step": 22245 }, { "epoch": 0.5978772000537418, "grad_norm": 2.40150785446167, "learning_rate": 2.0379953697398885e-05, "loss": 4.0577, "step": 22250 }, { "epoch": 0.5980115544807202, "grad_norm": 2.3508381843566895, "learning_rate": 2.0373144491352307e-05, "loss": 4.1088, "step": 22255 }, { "epoch": 0.5981459089076985, "grad_norm": 2.3051350116729736, "learning_rate": 2.0366335285305736e-05, "loss": 4.1648, "step": 22260 }, { "epoch": 0.5982802633346769, "grad_norm": 2.347996711730957, "learning_rate": 2.0359526079259158e-05, "loss": 4.2044, "step": 22265 }, { "epoch": 0.5984146177616553, "grad_norm": 2.5053303241729736, "learning_rate": 2.0352716873212584e-05, "loss": 4.1205, "step": 22270 }, { "epoch": 0.5985489721886336, "grad_norm": 2.537123918533325, "learning_rate": 2.034590766716601e-05, "loss": 4.1183, "step": 22275 }, { "epoch": 0.598683326615612, "grad_norm": 2.2641921043395996, "learning_rate": 2.0339098461119435e-05, "loss": 4.2135, "step": 22280 }, { "epoch": 0.5988176810425904, "grad_norm": 2.5343079566955566, "learning_rate": 2.0332289255072857e-05, "loss": 4.0324, "step": 22285 }, { "epoch": 0.5989520354695688, "grad_norm": 2.4034557342529297, "learning_rate": 2.0325480049026286e-05, "loss": 4.032, "step": 22290 }, { "epoch": 0.5990863898965471, "grad_norm": 2.7022643089294434, "learning_rate": 2.031867084297971e-05, "loss": 4.0532, "step": 22295 }, { "epoch": 0.5992207443235255, "grad_norm": 2.4151153564453125, "learning_rate": 2.0311861636933134e-05, "loss": 4.2941, "step": 22300 }, { "epoch": 0.5993550987505039, "grad_norm": 2.5753273963928223, "learning_rate": 2.030505243088656e-05, "loss": 4.0646, "step": 22305 }, { "epoch": 0.5994894531774821, "grad_norm": 2.689833164215088, "learning_rate": 2.0298243224839986e-05, "loss": 4.0747, "step": 22310 }, { "epoch": 0.5996238076044605, "grad_norm": 2.3037264347076416, "learning_rate": 2.0291434018793408e-05, "loss": 4.1806, "step": 22315 }, { "epoch": 0.599758162031439, "grad_norm": 2.5877652168273926, "learning_rate": 2.0284624812746837e-05, "loss": 4.146, "step": 22320 }, { "epoch": 0.5998925164584173, "grad_norm": 2.3690359592437744, "learning_rate": 2.027781560670026e-05, "loss": 4.2882, "step": 22325 }, { "epoch": 0.6000268708853956, "grad_norm": 2.466937303543091, "learning_rate": 2.0271006400653685e-05, "loss": 4.1442, "step": 22330 }, { "epoch": 0.600161225312374, "grad_norm": 2.5335566997528076, "learning_rate": 2.026419719460711e-05, "loss": 4.215, "step": 22335 }, { "epoch": 0.6002955797393524, "grad_norm": 2.398975372314453, "learning_rate": 2.0257387988560536e-05, "loss": 4.0675, "step": 22340 }, { "epoch": 0.6004299341663307, "grad_norm": 2.4398951530456543, "learning_rate": 2.025057878251396e-05, "loss": 4.1741, "step": 22345 }, { "epoch": 0.6005642885933091, "grad_norm": 2.5973434448242188, "learning_rate": 2.0243769576467387e-05, "loss": 4.0094, "step": 22350 }, { "epoch": 0.6006986430202875, "grad_norm": 2.7983126640319824, "learning_rate": 2.023696037042081e-05, "loss": 4.1926, "step": 22355 }, { "epoch": 0.6008329974472659, "grad_norm": 2.5550599098205566, "learning_rate": 2.0230151164374232e-05, "loss": 4.2696, "step": 22360 }, { "epoch": 0.6009673518742442, "grad_norm": 2.4594802856445312, "learning_rate": 2.022334195832766e-05, "loss": 4.1812, "step": 22365 }, { "epoch": 0.6011017063012226, "grad_norm": 2.3103930950164795, "learning_rate": 2.0216532752281083e-05, "loss": 4.1217, "step": 22370 }, { "epoch": 0.601236060728201, "grad_norm": 2.6952009201049805, "learning_rate": 2.020972354623451e-05, "loss": 4.2248, "step": 22375 }, { "epoch": 0.6013704151551794, "grad_norm": 2.544992446899414, "learning_rate": 2.0202914340187935e-05, "loss": 4.1474, "step": 22380 }, { "epoch": 0.6015047695821577, "grad_norm": 2.479602575302124, "learning_rate": 2.019610513414136e-05, "loss": 4.1407, "step": 22385 }, { "epoch": 0.6016391240091361, "grad_norm": 2.51560378074646, "learning_rate": 2.0189295928094783e-05, "loss": 4.0434, "step": 22390 }, { "epoch": 0.6017734784361145, "grad_norm": 2.2815964221954346, "learning_rate": 2.018248672204821e-05, "loss": 4.0485, "step": 22395 }, { "epoch": 0.6019078328630928, "grad_norm": 2.4722535610198975, "learning_rate": 2.0175677516001634e-05, "loss": 4.1385, "step": 22400 }, { "epoch": 0.6020421872900712, "grad_norm": 2.462843418121338, "learning_rate": 2.016886830995506e-05, "loss": 4.1007, "step": 22405 }, { "epoch": 0.6021765417170496, "grad_norm": 2.6341021060943604, "learning_rate": 2.0162059103908485e-05, "loss": 4.1208, "step": 22410 }, { "epoch": 0.602310896144028, "grad_norm": 2.4814608097076416, "learning_rate": 2.015524989786191e-05, "loss": 4.2284, "step": 22415 }, { "epoch": 0.6024452505710063, "grad_norm": 2.462836742401123, "learning_rate": 2.0148440691815333e-05, "loss": 4.1433, "step": 22420 }, { "epoch": 0.6025796049979847, "grad_norm": 2.3886430263519287, "learning_rate": 2.0141631485768762e-05, "loss": 4.1581, "step": 22425 }, { "epoch": 0.6027139594249631, "grad_norm": 2.4767515659332275, "learning_rate": 2.0134822279722184e-05, "loss": 4.145, "step": 22430 }, { "epoch": 0.6028483138519414, "grad_norm": 2.5555667877197266, "learning_rate": 2.012801307367561e-05, "loss": 4.0701, "step": 22435 }, { "epoch": 0.6029826682789198, "grad_norm": 2.3435330390930176, "learning_rate": 2.0121203867629036e-05, "loss": 4.2202, "step": 22440 }, { "epoch": 0.6031170227058982, "grad_norm": 2.296830415725708, "learning_rate": 2.011439466158246e-05, "loss": 4.1289, "step": 22445 }, { "epoch": 0.6032513771328766, "grad_norm": 2.3952317237854004, "learning_rate": 2.0107585455535884e-05, "loss": 4.0736, "step": 22450 }, { "epoch": 0.6033857315598549, "grad_norm": 2.4349145889282227, "learning_rate": 2.0100776249489313e-05, "loss": 4.2261, "step": 22455 }, { "epoch": 0.6035200859868333, "grad_norm": 2.370131492614746, "learning_rate": 2.0093967043442735e-05, "loss": 4.1734, "step": 22460 }, { "epoch": 0.6036544404138117, "grad_norm": 2.5898501873016357, "learning_rate": 2.008715783739616e-05, "loss": 4.1112, "step": 22465 }, { "epoch": 0.60378879484079, "grad_norm": 2.5350003242492676, "learning_rate": 2.0080348631349586e-05, "loss": 4.2075, "step": 22470 }, { "epoch": 0.6039231492677684, "grad_norm": 2.880460500717163, "learning_rate": 2.0073539425303012e-05, "loss": 4.1533, "step": 22475 }, { "epoch": 0.6040575036947468, "grad_norm": 2.5978922843933105, "learning_rate": 2.0066730219256434e-05, "loss": 4.0797, "step": 22480 }, { "epoch": 0.6041918581217252, "grad_norm": 2.5833215713500977, "learning_rate": 2.0059921013209863e-05, "loss": 4.1007, "step": 22485 }, { "epoch": 0.6043262125487034, "grad_norm": 2.2015109062194824, "learning_rate": 2.0053111807163286e-05, "loss": 4.0737, "step": 22490 }, { "epoch": 0.6044605669756818, "grad_norm": 2.531384229660034, "learning_rate": 2.004630260111671e-05, "loss": 4.2376, "step": 22495 }, { "epoch": 0.6045949214026602, "grad_norm": 2.487185478210449, "learning_rate": 2.0039493395070137e-05, "loss": 4.1552, "step": 22500 }, { "epoch": 0.6047292758296385, "grad_norm": 2.480384111404419, "learning_rate": 2.003268418902356e-05, "loss": 4.065, "step": 22505 }, { "epoch": 0.6048636302566169, "grad_norm": 2.320406675338745, "learning_rate": 2.0025874982976985e-05, "loss": 3.9812, "step": 22510 }, { "epoch": 0.6049979846835953, "grad_norm": 2.50247859954834, "learning_rate": 2.001906577693041e-05, "loss": 4.1094, "step": 22515 }, { "epoch": 0.6051323391105737, "grad_norm": 2.3134708404541016, "learning_rate": 2.0012256570883836e-05, "loss": 4.103, "step": 22520 }, { "epoch": 0.605266693537552, "grad_norm": 2.3216352462768555, "learning_rate": 2.000544736483726e-05, "loss": 4.0664, "step": 22525 }, { "epoch": 0.6054010479645304, "grad_norm": 2.5877232551574707, "learning_rate": 1.9998638158790687e-05, "loss": 4.0288, "step": 22530 }, { "epoch": 0.6055354023915088, "grad_norm": 2.344456672668457, "learning_rate": 1.999182895274411e-05, "loss": 4.1939, "step": 22535 }, { "epoch": 0.6056697568184871, "grad_norm": 2.8099780082702637, "learning_rate": 1.9985019746697535e-05, "loss": 4.2484, "step": 22540 }, { "epoch": 0.6058041112454655, "grad_norm": 2.29130220413208, "learning_rate": 1.997821054065096e-05, "loss": 4.121, "step": 22545 }, { "epoch": 0.6059384656724439, "grad_norm": 2.745087146759033, "learning_rate": 1.9971401334604387e-05, "loss": 4.045, "step": 22550 }, { "epoch": 0.6060728200994223, "grad_norm": 2.53539776802063, "learning_rate": 1.996459212855781e-05, "loss": 4.1005, "step": 22555 }, { "epoch": 0.6062071745264006, "grad_norm": 2.525324821472168, "learning_rate": 1.9957782922511238e-05, "loss": 4.1507, "step": 22560 }, { "epoch": 0.606341528953379, "grad_norm": 2.255467176437378, "learning_rate": 1.995097371646466e-05, "loss": 4.1514, "step": 22565 }, { "epoch": 0.6064758833803574, "grad_norm": 2.606435775756836, "learning_rate": 1.9944164510418086e-05, "loss": 4.074, "step": 22570 }, { "epoch": 0.6066102378073358, "grad_norm": 2.4113659858703613, "learning_rate": 1.993735530437151e-05, "loss": 4.1237, "step": 22575 }, { "epoch": 0.6067445922343141, "grad_norm": 2.4721245765686035, "learning_rate": 1.9930546098324937e-05, "loss": 4.2263, "step": 22580 }, { "epoch": 0.6068789466612925, "grad_norm": 2.4250636100769043, "learning_rate": 1.992373689227836e-05, "loss": 3.9658, "step": 22585 }, { "epoch": 0.6070133010882709, "grad_norm": 2.2815535068511963, "learning_rate": 1.991692768623179e-05, "loss": 4.087, "step": 22590 }, { "epoch": 0.6071476555152492, "grad_norm": 2.380229949951172, "learning_rate": 1.991011848018521e-05, "loss": 4.0158, "step": 22595 }, { "epoch": 0.6072820099422276, "grad_norm": 2.5534026622772217, "learning_rate": 1.9903309274138636e-05, "loss": 4.1271, "step": 22600 }, { "epoch": 0.607416364369206, "grad_norm": 2.5666069984436035, "learning_rate": 1.9896500068092062e-05, "loss": 4.161, "step": 22605 }, { "epoch": 0.6075507187961844, "grad_norm": 2.3857409954071045, "learning_rate": 1.9889690862045488e-05, "loss": 4.0488, "step": 22610 }, { "epoch": 0.6076850732231627, "grad_norm": 2.5152769088745117, "learning_rate": 1.988288165599891e-05, "loss": 4.1582, "step": 22615 }, { "epoch": 0.6078194276501411, "grad_norm": 2.4552152156829834, "learning_rate": 1.987607244995234e-05, "loss": 4.1694, "step": 22620 }, { "epoch": 0.6079537820771195, "grad_norm": 2.242696523666382, "learning_rate": 1.986926324390576e-05, "loss": 4.104, "step": 22625 }, { "epoch": 0.6080881365040978, "grad_norm": 2.6513919830322266, "learning_rate": 1.9862454037859187e-05, "loss": 4.016, "step": 22630 }, { "epoch": 0.6082224909310762, "grad_norm": 2.298769235610962, "learning_rate": 1.9855644831812613e-05, "loss": 4.0393, "step": 22635 }, { "epoch": 0.6083568453580546, "grad_norm": 2.584549903869629, "learning_rate": 1.984883562576604e-05, "loss": 4.1942, "step": 22640 }, { "epoch": 0.608491199785033, "grad_norm": 2.8168532848358154, "learning_rate": 1.984202641971946e-05, "loss": 4.2382, "step": 22645 }, { "epoch": 0.6086255542120113, "grad_norm": 2.5264101028442383, "learning_rate": 1.9835217213672886e-05, "loss": 4.1248, "step": 22650 }, { "epoch": 0.6087599086389897, "grad_norm": 2.3816030025482178, "learning_rate": 1.9828408007626312e-05, "loss": 4.0828, "step": 22655 }, { "epoch": 0.608894263065968, "grad_norm": 2.3510773181915283, "learning_rate": 1.9821598801579734e-05, "loss": 4.2254, "step": 22660 }, { "epoch": 0.6090286174929463, "grad_norm": 2.527829885482788, "learning_rate": 1.9814789595533163e-05, "loss": 4.1724, "step": 22665 }, { "epoch": 0.6091629719199247, "grad_norm": 2.591336250305176, "learning_rate": 1.9807980389486586e-05, "loss": 4.1205, "step": 22670 }, { "epoch": 0.6092973263469031, "grad_norm": 2.543318271636963, "learning_rate": 1.980117118344001e-05, "loss": 4.1714, "step": 22675 }, { "epoch": 0.6094316807738815, "grad_norm": 2.7578721046447754, "learning_rate": 1.9794361977393437e-05, "loss": 4.1432, "step": 22680 }, { "epoch": 0.6095660352008598, "grad_norm": 2.45051908493042, "learning_rate": 1.9787552771346862e-05, "loss": 4.1863, "step": 22685 }, { "epoch": 0.6097003896278382, "grad_norm": 2.3178536891937256, "learning_rate": 1.9780743565300285e-05, "loss": 4.1344, "step": 22690 }, { "epoch": 0.6098347440548166, "grad_norm": 2.548139810562134, "learning_rate": 1.9773934359253714e-05, "loss": 4.154, "step": 22695 }, { "epoch": 0.6099690984817949, "grad_norm": 2.522615909576416, "learning_rate": 1.9767125153207136e-05, "loss": 4.1093, "step": 22700 }, { "epoch": 0.6101034529087733, "grad_norm": 2.471193552017212, "learning_rate": 1.9760315947160562e-05, "loss": 4.0663, "step": 22705 }, { "epoch": 0.6102378073357517, "grad_norm": 2.573371171951294, "learning_rate": 1.9753506741113987e-05, "loss": 4.2157, "step": 22710 }, { "epoch": 0.6103721617627301, "grad_norm": 2.499436378479004, "learning_rate": 1.9746697535067413e-05, "loss": 4.0427, "step": 22715 }, { "epoch": 0.6105065161897084, "grad_norm": 2.2254090309143066, "learning_rate": 1.9739888329020835e-05, "loss": 4.1383, "step": 22720 }, { "epoch": 0.6106408706166868, "grad_norm": 2.4136171340942383, "learning_rate": 1.9733079122974264e-05, "loss": 4.1263, "step": 22725 }, { "epoch": 0.6107752250436652, "grad_norm": 2.8414485454559326, "learning_rate": 1.9726269916927687e-05, "loss": 4.2372, "step": 22730 }, { "epoch": 0.6109095794706435, "grad_norm": 2.499788761138916, "learning_rate": 1.9719460710881112e-05, "loss": 4.2404, "step": 22735 }, { "epoch": 0.6110439338976219, "grad_norm": 2.7592554092407227, "learning_rate": 1.9712651504834538e-05, "loss": 4.0928, "step": 22740 }, { "epoch": 0.6111782883246003, "grad_norm": 2.6579251289367676, "learning_rate": 1.9705842298787964e-05, "loss": 4.08, "step": 22745 }, { "epoch": 0.6113126427515787, "grad_norm": 2.562711477279663, "learning_rate": 1.9699033092741386e-05, "loss": 4.1041, "step": 22750 }, { "epoch": 0.611446997178557, "grad_norm": 2.5202126502990723, "learning_rate": 1.9692223886694815e-05, "loss": 4.1395, "step": 22755 }, { "epoch": 0.6115813516055354, "grad_norm": 2.36799693107605, "learning_rate": 1.9685414680648237e-05, "loss": 4.1119, "step": 22760 }, { "epoch": 0.6117157060325138, "grad_norm": 2.37776780128479, "learning_rate": 1.9678605474601663e-05, "loss": 4.0161, "step": 22765 }, { "epoch": 0.6118500604594922, "grad_norm": 2.215059280395508, "learning_rate": 1.967179626855509e-05, "loss": 4.2113, "step": 22770 }, { "epoch": 0.6119844148864705, "grad_norm": 2.1743268966674805, "learning_rate": 1.9664987062508514e-05, "loss": 4.0858, "step": 22775 }, { "epoch": 0.6121187693134489, "grad_norm": 2.383988380432129, "learning_rate": 1.9658177856461936e-05, "loss": 4.1286, "step": 22780 }, { "epoch": 0.6122531237404273, "grad_norm": 2.6955838203430176, "learning_rate": 1.9651368650415365e-05, "loss": 3.9857, "step": 22785 }, { "epoch": 0.6123874781674056, "grad_norm": 2.483969211578369, "learning_rate": 1.9644559444368788e-05, "loss": 4.0749, "step": 22790 }, { "epoch": 0.612521832594384, "grad_norm": 2.5466063022613525, "learning_rate": 1.9637750238322213e-05, "loss": 4.1663, "step": 22795 }, { "epoch": 0.6126561870213624, "grad_norm": 2.290076494216919, "learning_rate": 1.963094103227564e-05, "loss": 4.0823, "step": 22800 }, { "epoch": 0.6127905414483408, "grad_norm": 2.249882459640503, "learning_rate": 1.962413182622906e-05, "loss": 3.9947, "step": 22805 }, { "epoch": 0.6129248958753191, "grad_norm": 2.435056686401367, "learning_rate": 1.9617322620182487e-05, "loss": 3.9806, "step": 22810 }, { "epoch": 0.6130592503022975, "grad_norm": 2.6419472694396973, "learning_rate": 1.9610513414135913e-05, "loss": 4.1311, "step": 22815 }, { "epoch": 0.6131936047292759, "grad_norm": 2.5757863521575928, "learning_rate": 1.9603704208089338e-05, "loss": 4.1397, "step": 22820 }, { "epoch": 0.6133279591562542, "grad_norm": 2.287513017654419, "learning_rate": 1.959689500204276e-05, "loss": 4.1116, "step": 22825 }, { "epoch": 0.6134623135832326, "grad_norm": 2.4869492053985596, "learning_rate": 1.959008579599619e-05, "loss": 4.1547, "step": 22830 }, { "epoch": 0.613596668010211, "grad_norm": 2.454310894012451, "learning_rate": 1.9583276589949612e-05, "loss": 4.0231, "step": 22835 }, { "epoch": 0.6137310224371894, "grad_norm": 2.587329864501953, "learning_rate": 1.9576467383903038e-05, "loss": 4.0321, "step": 22840 }, { "epoch": 0.6138653768641676, "grad_norm": 2.681021213531494, "learning_rate": 1.9569658177856463e-05, "loss": 4.0545, "step": 22845 }, { "epoch": 0.613999731291146, "grad_norm": 2.6363871097564697, "learning_rate": 1.956284897180989e-05, "loss": 4.0448, "step": 22850 }, { "epoch": 0.6141340857181244, "grad_norm": 2.472973585128784, "learning_rate": 1.955603976576331e-05, "loss": 4.1081, "step": 22855 }, { "epoch": 0.6142684401451027, "grad_norm": 2.3673269748687744, "learning_rate": 1.954923055971674e-05, "loss": 4.1646, "step": 22860 }, { "epoch": 0.6144027945720811, "grad_norm": 2.4687883853912354, "learning_rate": 1.9542421353670162e-05, "loss": 4.2417, "step": 22865 }, { "epoch": 0.6145371489990595, "grad_norm": 2.32926869392395, "learning_rate": 1.9535612147623588e-05, "loss": 4.1214, "step": 22870 }, { "epoch": 0.6146715034260379, "grad_norm": 2.5463759899139404, "learning_rate": 1.9528802941577014e-05, "loss": 4.2027, "step": 22875 }, { "epoch": 0.6148058578530162, "grad_norm": 2.2446656227111816, "learning_rate": 1.952199373553044e-05, "loss": 4.0666, "step": 22880 }, { "epoch": 0.6149402122799946, "grad_norm": 2.4559826850891113, "learning_rate": 1.951518452948386e-05, "loss": 4.1547, "step": 22885 }, { "epoch": 0.615074566706973, "grad_norm": 2.57631516456604, "learning_rate": 1.9508375323437287e-05, "loss": 4.0874, "step": 22890 }, { "epoch": 0.6152089211339513, "grad_norm": 2.479353427886963, "learning_rate": 1.9501566117390713e-05, "loss": 4.1256, "step": 22895 }, { "epoch": 0.6153432755609297, "grad_norm": 2.6702775955200195, "learning_rate": 1.949475691134414e-05, "loss": 4.1814, "step": 22900 }, { "epoch": 0.6154776299879081, "grad_norm": 2.3950088024139404, "learning_rate": 1.9487947705297564e-05, "loss": 3.8723, "step": 22905 }, { "epoch": 0.6156119844148865, "grad_norm": 2.299225091934204, "learning_rate": 1.948113849925099e-05, "loss": 3.9452, "step": 22910 }, { "epoch": 0.6157463388418648, "grad_norm": 2.4507553577423096, "learning_rate": 1.9474329293204412e-05, "loss": 4.1586, "step": 22915 }, { "epoch": 0.6158806932688432, "grad_norm": 3.0516319274902344, "learning_rate": 1.9467520087157838e-05, "loss": 4.2188, "step": 22920 }, { "epoch": 0.6160150476958216, "grad_norm": 2.723109722137451, "learning_rate": 1.9460710881111264e-05, "loss": 4.08, "step": 22925 }, { "epoch": 0.6161494021227999, "grad_norm": 2.381190538406372, "learning_rate": 1.945390167506469e-05, "loss": 4.1565, "step": 22930 }, { "epoch": 0.6162837565497783, "grad_norm": 2.8123371601104736, "learning_rate": 1.9447092469018115e-05, "loss": 4.2339, "step": 22935 }, { "epoch": 0.6164181109767567, "grad_norm": 2.401191473007202, "learning_rate": 1.944028326297154e-05, "loss": 4.1952, "step": 22940 }, { "epoch": 0.6165524654037351, "grad_norm": 2.8417563438415527, "learning_rate": 1.9433474056924963e-05, "loss": 4.1601, "step": 22945 }, { "epoch": 0.6166868198307134, "grad_norm": 2.0689034461975098, "learning_rate": 1.942666485087839e-05, "loss": 4.1328, "step": 22950 }, { "epoch": 0.6168211742576918, "grad_norm": 2.408452272415161, "learning_rate": 1.9419855644831814e-05, "loss": 4.0412, "step": 22955 }, { "epoch": 0.6169555286846702, "grad_norm": 2.252880334854126, "learning_rate": 1.9413046438785236e-05, "loss": 4.0882, "step": 22960 }, { "epoch": 0.6170898831116485, "grad_norm": 2.4818756580352783, "learning_rate": 1.9406237232738665e-05, "loss": 4.2262, "step": 22965 }, { "epoch": 0.6172242375386269, "grad_norm": 2.6965458393096924, "learning_rate": 1.9399428026692088e-05, "loss": 4.2184, "step": 22970 }, { "epoch": 0.6173585919656053, "grad_norm": 2.53266978263855, "learning_rate": 1.9392618820645513e-05, "loss": 4.1018, "step": 22975 }, { "epoch": 0.6174929463925837, "grad_norm": 2.3349545001983643, "learning_rate": 1.938580961459894e-05, "loss": 4.1257, "step": 22980 }, { "epoch": 0.617627300819562, "grad_norm": 2.656087875366211, "learning_rate": 1.9379000408552365e-05, "loss": 4.2083, "step": 22985 }, { "epoch": 0.6177616552465404, "grad_norm": 2.5874805450439453, "learning_rate": 1.9372191202505787e-05, "loss": 3.9964, "step": 22990 }, { "epoch": 0.6178960096735188, "grad_norm": 2.5172836780548096, "learning_rate": 1.9365381996459213e-05, "loss": 4.1564, "step": 22995 }, { "epoch": 0.6180303641004972, "grad_norm": 2.54607892036438, "learning_rate": 1.9358572790412638e-05, "loss": 4.031, "step": 23000 }, { "epoch": 0.6181647185274755, "grad_norm": 2.553339719772339, "learning_rate": 1.9351763584366064e-05, "loss": 4.1521, "step": 23005 }, { "epoch": 0.6182990729544539, "grad_norm": 2.3109114170074463, "learning_rate": 1.934495437831949e-05, "loss": 3.9983, "step": 23010 }, { "epoch": 0.6184334273814323, "grad_norm": 2.8013088703155518, "learning_rate": 1.9338145172272915e-05, "loss": 4.1422, "step": 23015 }, { "epoch": 0.6185677818084105, "grad_norm": 2.4424445629119873, "learning_rate": 1.9331335966226337e-05, "loss": 4.088, "step": 23020 }, { "epoch": 0.6187021362353889, "grad_norm": 2.584385633468628, "learning_rate": 1.9324526760179763e-05, "loss": 4.0885, "step": 23025 }, { "epoch": 0.6188364906623673, "grad_norm": 2.710015296936035, "learning_rate": 1.931771755413319e-05, "loss": 4.0941, "step": 23030 }, { "epoch": 0.6189708450893457, "grad_norm": 2.3924832344055176, "learning_rate": 1.9310908348086614e-05, "loss": 4.0324, "step": 23035 }, { "epoch": 0.619105199516324, "grad_norm": 2.335460662841797, "learning_rate": 1.930409914204004e-05, "loss": 4.0464, "step": 23040 }, { "epoch": 0.6192395539433024, "grad_norm": 2.639539957046509, "learning_rate": 1.9297289935993466e-05, "loss": 4.1846, "step": 23045 }, { "epoch": 0.6193739083702808, "grad_norm": 2.5236294269561768, "learning_rate": 1.9290480729946888e-05, "loss": 4.054, "step": 23050 }, { "epoch": 0.6195082627972591, "grad_norm": 2.551213264465332, "learning_rate": 1.9283671523900314e-05, "loss": 4.0919, "step": 23055 }, { "epoch": 0.6196426172242375, "grad_norm": 2.537510395050049, "learning_rate": 1.927686231785374e-05, "loss": 4.1182, "step": 23060 }, { "epoch": 0.6197769716512159, "grad_norm": 2.229283332824707, "learning_rate": 1.9270053111807165e-05, "loss": 4.0587, "step": 23065 }, { "epoch": 0.6199113260781943, "grad_norm": 2.4474892616271973, "learning_rate": 1.926324390576059e-05, "loss": 4.158, "step": 23070 }, { "epoch": 0.6200456805051726, "grad_norm": 2.518063545227051, "learning_rate": 1.9256434699714016e-05, "loss": 4.0565, "step": 23075 }, { "epoch": 0.620180034932151, "grad_norm": 2.9664876461029053, "learning_rate": 1.924962549366744e-05, "loss": 4.037, "step": 23080 }, { "epoch": 0.6203143893591294, "grad_norm": 2.599026679992676, "learning_rate": 1.9242816287620864e-05, "loss": 4.1896, "step": 23085 }, { "epoch": 0.6204487437861077, "grad_norm": 2.598365068435669, "learning_rate": 1.923600708157429e-05, "loss": 4.1031, "step": 23090 }, { "epoch": 0.6205830982130861, "grad_norm": 2.4832563400268555, "learning_rate": 1.9229197875527712e-05, "loss": 4.132, "step": 23095 }, { "epoch": 0.6207174526400645, "grad_norm": 2.8324997425079346, "learning_rate": 1.9222388669481138e-05, "loss": 4.1435, "step": 23100 }, { "epoch": 0.6208518070670429, "grad_norm": 2.4392940998077393, "learning_rate": 1.9215579463434564e-05, "loss": 4.0553, "step": 23105 }, { "epoch": 0.6209861614940212, "grad_norm": 2.378605365753174, "learning_rate": 1.920877025738799e-05, "loss": 4.1543, "step": 23110 }, { "epoch": 0.6211205159209996, "grad_norm": 2.645852565765381, "learning_rate": 1.9201961051341415e-05, "loss": 4.1299, "step": 23115 }, { "epoch": 0.621254870347978, "grad_norm": 2.9687037467956543, "learning_rate": 1.919515184529484e-05, "loss": 4.0111, "step": 23120 }, { "epoch": 0.6213892247749563, "grad_norm": 2.52667498588562, "learning_rate": 1.9188342639248263e-05, "loss": 4.2092, "step": 23125 }, { "epoch": 0.6215235792019347, "grad_norm": 2.6552600860595703, "learning_rate": 1.918153343320169e-05, "loss": 4.1042, "step": 23130 }, { "epoch": 0.6216579336289131, "grad_norm": 2.560513734817505, "learning_rate": 1.9174724227155114e-05, "loss": 4.1721, "step": 23135 }, { "epoch": 0.6217922880558915, "grad_norm": 2.587587594985962, "learning_rate": 1.916791502110854e-05, "loss": 4.0285, "step": 23140 }, { "epoch": 0.6219266424828698, "grad_norm": 2.562006711959839, "learning_rate": 1.9161105815061965e-05, "loss": 4.1151, "step": 23145 }, { "epoch": 0.6220609969098482, "grad_norm": 2.562177896499634, "learning_rate": 1.915429660901539e-05, "loss": 4.1395, "step": 23150 }, { "epoch": 0.6221953513368266, "grad_norm": 2.449181318283081, "learning_rate": 1.9147487402968813e-05, "loss": 4.1576, "step": 23155 }, { "epoch": 0.6223297057638049, "grad_norm": 2.398756265640259, "learning_rate": 1.914067819692224e-05, "loss": 4.1432, "step": 23160 }, { "epoch": 0.6224640601907833, "grad_norm": 2.3193199634552, "learning_rate": 1.9133868990875665e-05, "loss": 4.2457, "step": 23165 }, { "epoch": 0.6225984146177617, "grad_norm": 2.4474058151245117, "learning_rate": 1.912705978482909e-05, "loss": 4.1154, "step": 23170 }, { "epoch": 0.6227327690447401, "grad_norm": 2.473879814147949, "learning_rate": 1.9120250578782516e-05, "loss": 4.2144, "step": 23175 }, { "epoch": 0.6228671234717184, "grad_norm": 2.364337205886841, "learning_rate": 1.911344137273594e-05, "loss": 4.1412, "step": 23180 }, { "epoch": 0.6230014778986968, "grad_norm": 2.6174914836883545, "learning_rate": 1.9106632166689364e-05, "loss": 4.2325, "step": 23185 }, { "epoch": 0.6231358323256752, "grad_norm": 2.6878976821899414, "learning_rate": 1.909982296064279e-05, "loss": 4.1644, "step": 23190 }, { "epoch": 0.6232701867526536, "grad_norm": 2.5180115699768066, "learning_rate": 1.9093013754596215e-05, "loss": 4.0946, "step": 23195 }, { "epoch": 0.6234045411796318, "grad_norm": 2.4423635005950928, "learning_rate": 1.908620454854964e-05, "loss": 4.083, "step": 23200 }, { "epoch": 0.6235388956066102, "grad_norm": 2.2712295055389404, "learning_rate": 1.9079395342503063e-05, "loss": 4.1627, "step": 23205 }, { "epoch": 0.6236732500335886, "grad_norm": 2.4675514698028564, "learning_rate": 1.9072586136456492e-05, "loss": 4.0117, "step": 23210 }, { "epoch": 0.6238076044605669, "grad_norm": 2.3903191089630127, "learning_rate": 1.9065776930409914e-05, "loss": 4.0991, "step": 23215 }, { "epoch": 0.6239419588875453, "grad_norm": 2.2487778663635254, "learning_rate": 1.905896772436334e-05, "loss": 4.049, "step": 23220 }, { "epoch": 0.6240763133145237, "grad_norm": 2.2209014892578125, "learning_rate": 1.9052158518316766e-05, "loss": 4.1354, "step": 23225 }, { "epoch": 0.6242106677415021, "grad_norm": 2.3601720333099365, "learning_rate": 1.904534931227019e-05, "loss": 4.1254, "step": 23230 }, { "epoch": 0.6243450221684804, "grad_norm": 2.379819393157959, "learning_rate": 1.9038540106223614e-05, "loss": 4.0795, "step": 23235 }, { "epoch": 0.6244793765954588, "grad_norm": 2.4841928482055664, "learning_rate": 1.9031730900177043e-05, "loss": 3.9729, "step": 23240 }, { "epoch": 0.6246137310224372, "grad_norm": 2.404937982559204, "learning_rate": 1.9024921694130465e-05, "loss": 4.0806, "step": 23245 }, { "epoch": 0.6247480854494155, "grad_norm": 2.404106378555298, "learning_rate": 1.901811248808389e-05, "loss": 4.0698, "step": 23250 }, { "epoch": 0.6248824398763939, "grad_norm": 2.6380505561828613, "learning_rate": 1.9011303282037316e-05, "loss": 4.1091, "step": 23255 }, { "epoch": 0.6250167943033723, "grad_norm": 2.383516788482666, "learning_rate": 1.900449407599074e-05, "loss": 4.1119, "step": 23260 }, { "epoch": 0.6251511487303507, "grad_norm": 2.487488269805908, "learning_rate": 1.8997684869944164e-05, "loss": 4.0706, "step": 23265 }, { "epoch": 0.625285503157329, "grad_norm": 2.4726924896240234, "learning_rate": 1.899087566389759e-05, "loss": 4.1028, "step": 23270 }, { "epoch": 0.6254198575843074, "grad_norm": 2.603287696838379, "learning_rate": 1.8984066457851016e-05, "loss": 4.1014, "step": 23275 }, { "epoch": 0.6255542120112858, "grad_norm": 2.420926332473755, "learning_rate": 1.8977257251804438e-05, "loss": 4.0684, "step": 23280 }, { "epoch": 0.6256885664382641, "grad_norm": 2.2720630168914795, "learning_rate": 1.8970448045757867e-05, "loss": 4.1713, "step": 23285 }, { "epoch": 0.6258229208652425, "grad_norm": 2.4648218154907227, "learning_rate": 1.896363883971129e-05, "loss": 4.2537, "step": 23290 }, { "epoch": 0.6259572752922209, "grad_norm": 2.4751076698303223, "learning_rate": 1.8956829633664715e-05, "loss": 3.9498, "step": 23295 }, { "epoch": 0.6260916297191993, "grad_norm": 2.5886898040771484, "learning_rate": 1.895002042761814e-05, "loss": 4.1581, "step": 23300 }, { "epoch": 0.6262259841461776, "grad_norm": 2.632793664932251, "learning_rate": 1.8943211221571566e-05, "loss": 4.1309, "step": 23305 }, { "epoch": 0.626360338573156, "grad_norm": 2.313241481781006, "learning_rate": 1.893640201552499e-05, "loss": 4.0782, "step": 23310 }, { "epoch": 0.6264946930001344, "grad_norm": 2.5592658519744873, "learning_rate": 1.8929592809478417e-05, "loss": 4.1223, "step": 23315 }, { "epoch": 0.6266290474271127, "grad_norm": 2.6594362258911133, "learning_rate": 1.892278360343184e-05, "loss": 4.0878, "step": 23320 }, { "epoch": 0.6267634018540911, "grad_norm": 2.3843441009521484, "learning_rate": 1.8915974397385265e-05, "loss": 4.1715, "step": 23325 }, { "epoch": 0.6268977562810695, "grad_norm": 2.774778127670288, "learning_rate": 1.890916519133869e-05, "loss": 4.1485, "step": 23330 }, { "epoch": 0.6270321107080479, "grad_norm": 2.4670071601867676, "learning_rate": 1.8902355985292117e-05, "loss": 4.033, "step": 23335 }, { "epoch": 0.6271664651350262, "grad_norm": 2.425337076187134, "learning_rate": 1.889554677924554e-05, "loss": 4.1102, "step": 23340 }, { "epoch": 0.6273008195620046, "grad_norm": 2.4518814086914062, "learning_rate": 1.8888737573198968e-05, "loss": 4.0707, "step": 23345 }, { "epoch": 0.627435173988983, "grad_norm": 2.4463744163513184, "learning_rate": 1.888192836715239e-05, "loss": 4.0472, "step": 23350 }, { "epoch": 0.6275695284159613, "grad_norm": 2.4812541007995605, "learning_rate": 1.8875119161105816e-05, "loss": 4.1445, "step": 23355 }, { "epoch": 0.6277038828429397, "grad_norm": 2.60377836227417, "learning_rate": 1.886830995505924e-05, "loss": 4.1688, "step": 23360 }, { "epoch": 0.627838237269918, "grad_norm": 2.58404278755188, "learning_rate": 1.8861500749012667e-05, "loss": 4.0139, "step": 23365 }, { "epoch": 0.6279725916968965, "grad_norm": 2.515747547149658, "learning_rate": 1.885469154296609e-05, "loss": 4.2004, "step": 23370 }, { "epoch": 0.6281069461238747, "grad_norm": 2.4702308177948, "learning_rate": 1.884788233691952e-05, "loss": 4.0859, "step": 23375 }, { "epoch": 0.6282413005508531, "grad_norm": 2.334240674972534, "learning_rate": 1.884107313087294e-05, "loss": 4.1229, "step": 23380 }, { "epoch": 0.6283756549778315, "grad_norm": 2.655916929244995, "learning_rate": 1.8834263924826366e-05, "loss": 4.1102, "step": 23385 }, { "epoch": 0.6285100094048099, "grad_norm": 2.7336225509643555, "learning_rate": 1.8827454718779792e-05, "loss": 4.2423, "step": 23390 }, { "epoch": 0.6286443638317882, "grad_norm": 2.4753386974334717, "learning_rate": 1.8820645512733214e-05, "loss": 4.1041, "step": 23395 }, { "epoch": 0.6287787182587666, "grad_norm": 2.513751745223999, "learning_rate": 1.881383630668664e-05, "loss": 4.1182, "step": 23400 }, { "epoch": 0.628913072685745, "grad_norm": 2.4351415634155273, "learning_rate": 1.8807027100640066e-05, "loss": 4.1418, "step": 23405 }, { "epoch": 0.6290474271127233, "grad_norm": 2.57922101020813, "learning_rate": 1.880021789459349e-05, "loss": 4.0837, "step": 23410 }, { "epoch": 0.6291817815397017, "grad_norm": 2.4887595176696777, "learning_rate": 1.8793408688546914e-05, "loss": 3.9817, "step": 23415 }, { "epoch": 0.6293161359666801, "grad_norm": 2.7448618412017822, "learning_rate": 1.8786599482500343e-05, "loss": 4.2374, "step": 23420 }, { "epoch": 0.6294504903936585, "grad_norm": 2.3277223110198975, "learning_rate": 1.8779790276453765e-05, "loss": 4.2015, "step": 23425 }, { "epoch": 0.6295848448206368, "grad_norm": 2.533170700073242, "learning_rate": 1.877298107040719e-05, "loss": 4.0211, "step": 23430 }, { "epoch": 0.6297191992476152, "grad_norm": 2.5229806900024414, "learning_rate": 1.8766171864360616e-05, "loss": 4.2786, "step": 23435 }, { "epoch": 0.6298535536745936, "grad_norm": 2.602210760116577, "learning_rate": 1.8759362658314042e-05, "loss": 4.1016, "step": 23440 }, { "epoch": 0.6299879081015719, "grad_norm": 2.4182891845703125, "learning_rate": 1.8752553452267464e-05, "loss": 4.1262, "step": 23445 }, { "epoch": 0.6301222625285503, "grad_norm": 2.6210291385650635, "learning_rate": 1.8745744246220893e-05, "loss": 4.2641, "step": 23450 }, { "epoch": 0.6302566169555287, "grad_norm": 2.6051249504089355, "learning_rate": 1.8738935040174315e-05, "loss": 4.0891, "step": 23455 }, { "epoch": 0.6303909713825071, "grad_norm": 2.6324195861816406, "learning_rate": 1.873212583412774e-05, "loss": 3.988, "step": 23460 }, { "epoch": 0.6305253258094854, "grad_norm": 2.5638813972473145, "learning_rate": 1.8725316628081167e-05, "loss": 4.1243, "step": 23465 }, { "epoch": 0.6306596802364638, "grad_norm": 2.108503580093384, "learning_rate": 1.8718507422034592e-05, "loss": 3.9905, "step": 23470 }, { "epoch": 0.6307940346634422, "grad_norm": 2.7509284019470215, "learning_rate": 1.8711698215988015e-05, "loss": 4.1112, "step": 23475 }, { "epoch": 0.6309283890904205, "grad_norm": 2.510148048400879, "learning_rate": 1.8704889009941444e-05, "loss": 4.2239, "step": 23480 }, { "epoch": 0.6310627435173989, "grad_norm": 2.4859464168548584, "learning_rate": 1.8698079803894866e-05, "loss": 4.0365, "step": 23485 }, { "epoch": 0.6311970979443773, "grad_norm": 2.697118043899536, "learning_rate": 1.869127059784829e-05, "loss": 4.1497, "step": 23490 }, { "epoch": 0.6313314523713557, "grad_norm": 2.3508384227752686, "learning_rate": 1.8684461391801717e-05, "loss": 4.0723, "step": 23495 }, { "epoch": 0.631465806798334, "grad_norm": 2.4708471298217773, "learning_rate": 1.8677652185755143e-05, "loss": 4.1264, "step": 23500 }, { "epoch": 0.6316001612253124, "grad_norm": 2.430537462234497, "learning_rate": 1.8670842979708565e-05, "loss": 4.0167, "step": 23505 }, { "epoch": 0.6317345156522908, "grad_norm": 2.691636562347412, "learning_rate": 1.8664033773661994e-05, "loss": 4.0456, "step": 23510 }, { "epoch": 0.6318688700792691, "grad_norm": 2.747800350189209, "learning_rate": 1.8657224567615417e-05, "loss": 4.0016, "step": 23515 }, { "epoch": 0.6320032245062475, "grad_norm": 2.418670415878296, "learning_rate": 1.8650415361568842e-05, "loss": 4.1811, "step": 23520 }, { "epoch": 0.6321375789332259, "grad_norm": 2.52363920211792, "learning_rate": 1.8643606155522268e-05, "loss": 4.124, "step": 23525 }, { "epoch": 0.6322719333602043, "grad_norm": 2.4816198348999023, "learning_rate": 1.8636796949475694e-05, "loss": 4.16, "step": 23530 }, { "epoch": 0.6324062877871826, "grad_norm": 2.465169668197632, "learning_rate": 1.8629987743429116e-05, "loss": 4.0824, "step": 23535 }, { "epoch": 0.632540642214161, "grad_norm": 2.6134915351867676, "learning_rate": 1.862317853738254e-05, "loss": 4.1515, "step": 23540 }, { "epoch": 0.6326749966411394, "grad_norm": 2.3546528816223145, "learning_rate": 1.8616369331335967e-05, "loss": 4.1687, "step": 23545 }, { "epoch": 0.6328093510681176, "grad_norm": 2.637240409851074, "learning_rate": 1.860956012528939e-05, "loss": 4.1086, "step": 23550 }, { "epoch": 0.632943705495096, "grad_norm": 2.5345969200134277, "learning_rate": 1.860275091924282e-05, "loss": 4.2036, "step": 23555 }, { "epoch": 0.6330780599220744, "grad_norm": 2.5617806911468506, "learning_rate": 1.859594171319624e-05, "loss": 4.1482, "step": 23560 }, { "epoch": 0.6332124143490528, "grad_norm": 2.3352832794189453, "learning_rate": 1.8589132507149666e-05, "loss": 4.0621, "step": 23565 }, { "epoch": 0.6333467687760311, "grad_norm": 2.714040517807007, "learning_rate": 1.8582323301103092e-05, "loss": 4.0689, "step": 23570 }, { "epoch": 0.6334811232030095, "grad_norm": 2.555560350418091, "learning_rate": 1.8575514095056518e-05, "loss": 4.2055, "step": 23575 }, { "epoch": 0.6336154776299879, "grad_norm": 2.444643020629883, "learning_rate": 1.856870488900994e-05, "loss": 4.1872, "step": 23580 }, { "epoch": 0.6337498320569662, "grad_norm": 2.4564497470855713, "learning_rate": 1.856189568296337e-05, "loss": 4.0709, "step": 23585 }, { "epoch": 0.6338841864839446, "grad_norm": 2.537414073944092, "learning_rate": 1.855508647691679e-05, "loss": 4.1854, "step": 23590 }, { "epoch": 0.634018540910923, "grad_norm": 2.719449520111084, "learning_rate": 1.8548277270870217e-05, "loss": 4.0443, "step": 23595 }, { "epoch": 0.6341528953379014, "grad_norm": 2.7071638107299805, "learning_rate": 1.8541468064823643e-05, "loss": 4.2322, "step": 23600 }, { "epoch": 0.6342872497648797, "grad_norm": 2.389280080795288, "learning_rate": 1.8534658858777068e-05, "loss": 4.1885, "step": 23605 }, { "epoch": 0.6344216041918581, "grad_norm": 2.6367533206939697, "learning_rate": 1.852784965273049e-05, "loss": 4.145, "step": 23610 }, { "epoch": 0.6345559586188365, "grad_norm": 2.486896276473999, "learning_rate": 1.852104044668392e-05, "loss": 4.0314, "step": 23615 }, { "epoch": 0.6346903130458149, "grad_norm": 2.4305179119110107, "learning_rate": 1.8514231240637342e-05, "loss": 4.1263, "step": 23620 }, { "epoch": 0.6348246674727932, "grad_norm": 2.4880106449127197, "learning_rate": 1.8507422034590767e-05, "loss": 4.0739, "step": 23625 }, { "epoch": 0.6349590218997716, "grad_norm": 2.849853038787842, "learning_rate": 1.8500612828544193e-05, "loss": 4.2104, "step": 23630 }, { "epoch": 0.63509337632675, "grad_norm": 2.3692846298217773, "learning_rate": 1.849380362249762e-05, "loss": 4.1339, "step": 23635 }, { "epoch": 0.6352277307537283, "grad_norm": 2.431380271911621, "learning_rate": 1.848699441645104e-05, "loss": 4.2618, "step": 23640 }, { "epoch": 0.6353620851807067, "grad_norm": 2.4687514305114746, "learning_rate": 1.848018521040447e-05, "loss": 4.0515, "step": 23645 }, { "epoch": 0.6354964396076851, "grad_norm": 2.6786999702453613, "learning_rate": 1.8473376004357892e-05, "loss": 4.0787, "step": 23650 }, { "epoch": 0.6356307940346635, "grad_norm": 2.6440019607543945, "learning_rate": 1.8466566798311318e-05, "loss": 4.0959, "step": 23655 }, { "epoch": 0.6357651484616418, "grad_norm": 2.59122371673584, "learning_rate": 1.8459757592264744e-05, "loss": 4.1463, "step": 23660 }, { "epoch": 0.6358995028886202, "grad_norm": 2.390857696533203, "learning_rate": 1.845294838621817e-05, "loss": 4.1967, "step": 23665 }, { "epoch": 0.6360338573155986, "grad_norm": 2.3467447757720947, "learning_rate": 1.844613918017159e-05, "loss": 4.1183, "step": 23670 }, { "epoch": 0.6361682117425769, "grad_norm": 2.643886089324951, "learning_rate": 1.843932997412502e-05, "loss": 4.1481, "step": 23675 }, { "epoch": 0.6363025661695553, "grad_norm": 2.538806438446045, "learning_rate": 1.8432520768078443e-05, "loss": 4.0686, "step": 23680 }, { "epoch": 0.6364369205965337, "grad_norm": 2.4303650856018066, "learning_rate": 1.842571156203187e-05, "loss": 4.154, "step": 23685 }, { "epoch": 0.6365712750235121, "grad_norm": 2.5481116771698, "learning_rate": 1.8418902355985294e-05, "loss": 4.0916, "step": 23690 }, { "epoch": 0.6367056294504904, "grad_norm": 2.4088287353515625, "learning_rate": 1.8412093149938717e-05, "loss": 4.1122, "step": 23695 }, { "epoch": 0.6368399838774688, "grad_norm": 2.5548882484436035, "learning_rate": 1.8405283943892142e-05, "loss": 4.2029, "step": 23700 }, { "epoch": 0.6369743383044472, "grad_norm": 2.6370370388031006, "learning_rate": 1.8398474737845568e-05, "loss": 4.1001, "step": 23705 }, { "epoch": 0.6371086927314255, "grad_norm": 2.501089096069336, "learning_rate": 1.8391665531798994e-05, "loss": 4.0841, "step": 23710 }, { "epoch": 0.6372430471584039, "grad_norm": 2.562368869781494, "learning_rate": 1.8384856325752416e-05, "loss": 4.1725, "step": 23715 }, { "epoch": 0.6373774015853823, "grad_norm": 2.37835955619812, "learning_rate": 1.8378047119705845e-05, "loss": 4.1658, "step": 23720 }, { "epoch": 0.6375117560123607, "grad_norm": 2.4549942016601562, "learning_rate": 1.8371237913659267e-05, "loss": 4.2019, "step": 23725 }, { "epoch": 0.6376461104393389, "grad_norm": 2.302461624145508, "learning_rate": 1.8364428707612693e-05, "loss": 4.178, "step": 23730 }, { "epoch": 0.6377804648663173, "grad_norm": 2.283285140991211, "learning_rate": 1.835761950156612e-05, "loss": 4.1144, "step": 23735 }, { "epoch": 0.6379148192932957, "grad_norm": 2.2567577362060547, "learning_rate": 1.8350810295519544e-05, "loss": 4.1405, "step": 23740 }, { "epoch": 0.638049173720274, "grad_norm": 2.4493722915649414, "learning_rate": 1.8344001089472966e-05, "loss": 4.1162, "step": 23745 }, { "epoch": 0.6381835281472524, "grad_norm": 2.5306286811828613, "learning_rate": 1.8337191883426395e-05, "loss": 4.1075, "step": 23750 }, { "epoch": 0.6383178825742308, "grad_norm": 2.7744383811950684, "learning_rate": 1.8330382677379818e-05, "loss": 4.1201, "step": 23755 }, { "epoch": 0.6384522370012092, "grad_norm": 2.6288442611694336, "learning_rate": 1.8323573471333243e-05, "loss": 4.0181, "step": 23760 }, { "epoch": 0.6385865914281875, "grad_norm": 2.571013927459717, "learning_rate": 1.831676426528667e-05, "loss": 4.0778, "step": 23765 }, { "epoch": 0.6387209458551659, "grad_norm": 2.5074269771575928, "learning_rate": 1.8309955059240095e-05, "loss": 3.9797, "step": 23770 }, { "epoch": 0.6388553002821443, "grad_norm": 2.270559310913086, "learning_rate": 1.8303145853193517e-05, "loss": 4.1654, "step": 23775 }, { "epoch": 0.6389896547091226, "grad_norm": 2.520716428756714, "learning_rate": 1.8296336647146946e-05, "loss": 4.1029, "step": 23780 }, { "epoch": 0.639124009136101, "grad_norm": 2.38871431350708, "learning_rate": 1.8289527441100368e-05, "loss": 4.1179, "step": 23785 }, { "epoch": 0.6392583635630794, "grad_norm": 2.7593979835510254, "learning_rate": 1.8282718235053794e-05, "loss": 4.1032, "step": 23790 }, { "epoch": 0.6393927179900578, "grad_norm": 2.3903448581695557, "learning_rate": 1.827590902900722e-05, "loss": 4.0687, "step": 23795 }, { "epoch": 0.6395270724170361, "grad_norm": 2.509956121444702, "learning_rate": 1.8269099822960645e-05, "loss": 4.0281, "step": 23800 }, { "epoch": 0.6396614268440145, "grad_norm": 2.368028402328491, "learning_rate": 1.8262290616914067e-05, "loss": 4.1365, "step": 23805 }, { "epoch": 0.6397957812709929, "grad_norm": 2.5107011795043945, "learning_rate": 1.8255481410867496e-05, "loss": 4.1545, "step": 23810 }, { "epoch": 0.6399301356979713, "grad_norm": 2.436769962310791, "learning_rate": 1.824867220482092e-05, "loss": 4.1481, "step": 23815 }, { "epoch": 0.6400644901249496, "grad_norm": 2.3996737003326416, "learning_rate": 1.8241862998774344e-05, "loss": 4.1041, "step": 23820 }, { "epoch": 0.640198844551928, "grad_norm": 2.498392343521118, "learning_rate": 1.823505379272777e-05, "loss": 4.0279, "step": 23825 }, { "epoch": 0.6403331989789064, "grad_norm": 2.4884490966796875, "learning_rate": 1.8228244586681196e-05, "loss": 4.0626, "step": 23830 }, { "epoch": 0.6404675534058847, "grad_norm": 2.7227866649627686, "learning_rate": 1.8221435380634618e-05, "loss": 4.1442, "step": 23835 }, { "epoch": 0.6406019078328631, "grad_norm": 2.567849636077881, "learning_rate": 1.8214626174588044e-05, "loss": 4.1008, "step": 23840 }, { "epoch": 0.6407362622598415, "grad_norm": 2.673973798751831, "learning_rate": 1.820781696854147e-05, "loss": 4.0957, "step": 23845 }, { "epoch": 0.6408706166868199, "grad_norm": 2.6582841873168945, "learning_rate": 1.820100776249489e-05, "loss": 4.1615, "step": 23850 }, { "epoch": 0.6410049711137982, "grad_norm": 2.7365658283233643, "learning_rate": 1.819419855644832e-05, "loss": 4.0928, "step": 23855 }, { "epoch": 0.6411393255407766, "grad_norm": 2.4883408546447754, "learning_rate": 1.8187389350401743e-05, "loss": 4.0893, "step": 23860 }, { "epoch": 0.641273679967755, "grad_norm": 2.628448247909546, "learning_rate": 1.818058014435517e-05, "loss": 4.137, "step": 23865 }, { "epoch": 0.6414080343947333, "grad_norm": 2.6400766372680664, "learning_rate": 1.8173770938308594e-05, "loss": 4.0436, "step": 23870 }, { "epoch": 0.6415423888217117, "grad_norm": 2.8825888633728027, "learning_rate": 1.816696173226202e-05, "loss": 4.0475, "step": 23875 }, { "epoch": 0.6416767432486901, "grad_norm": 2.4864284992218018, "learning_rate": 1.8160152526215442e-05, "loss": 3.9497, "step": 23880 }, { "epoch": 0.6418110976756685, "grad_norm": 2.3812966346740723, "learning_rate": 1.815334332016887e-05, "loss": 4.1262, "step": 23885 }, { "epoch": 0.6419454521026468, "grad_norm": 2.404181957244873, "learning_rate": 1.8146534114122293e-05, "loss": 4.1794, "step": 23890 }, { "epoch": 0.6420798065296252, "grad_norm": 2.670949935913086, "learning_rate": 1.813972490807572e-05, "loss": 4.1559, "step": 23895 }, { "epoch": 0.6422141609566036, "grad_norm": 2.530696392059326, "learning_rate": 1.8132915702029145e-05, "loss": 3.9382, "step": 23900 }, { "epoch": 0.6423485153835818, "grad_norm": 2.38751220703125, "learning_rate": 1.812610649598257e-05, "loss": 4.1264, "step": 23905 }, { "epoch": 0.6424828698105602, "grad_norm": 2.4727799892425537, "learning_rate": 1.8119297289935993e-05, "loss": 4.0848, "step": 23910 }, { "epoch": 0.6426172242375386, "grad_norm": 2.458280324935913, "learning_rate": 1.8112488083889422e-05, "loss": 4.106, "step": 23915 }, { "epoch": 0.642751578664517, "grad_norm": 2.6680119037628174, "learning_rate": 1.8105678877842844e-05, "loss": 4.1118, "step": 23920 }, { "epoch": 0.6428859330914953, "grad_norm": 2.6694414615631104, "learning_rate": 1.809886967179627e-05, "loss": 4.0348, "step": 23925 }, { "epoch": 0.6430202875184737, "grad_norm": 2.6987314224243164, "learning_rate": 1.8092060465749695e-05, "loss": 4.056, "step": 23930 }, { "epoch": 0.6431546419454521, "grad_norm": 2.3725807666778564, "learning_rate": 1.808525125970312e-05, "loss": 4.1045, "step": 23935 }, { "epoch": 0.6432889963724304, "grad_norm": 2.5497794151306152, "learning_rate": 1.8078442053656543e-05, "loss": 4.1706, "step": 23940 }, { "epoch": 0.6434233507994088, "grad_norm": 2.352515459060669, "learning_rate": 1.807163284760997e-05, "loss": 3.9439, "step": 23945 }, { "epoch": 0.6435577052263872, "grad_norm": 2.4654340744018555, "learning_rate": 1.8064823641563395e-05, "loss": 4.1169, "step": 23950 }, { "epoch": 0.6436920596533656, "grad_norm": 2.6187195777893066, "learning_rate": 1.805801443551682e-05, "loss": 4.087, "step": 23955 }, { "epoch": 0.6438264140803439, "grad_norm": 2.564617872238159, "learning_rate": 1.8051205229470246e-05, "loss": 4.0549, "step": 23960 }, { "epoch": 0.6439607685073223, "grad_norm": 2.4929139614105225, "learning_rate": 1.804439602342367e-05, "loss": 4.1109, "step": 23965 }, { "epoch": 0.6440951229343007, "grad_norm": 2.246645927429199, "learning_rate": 1.8037586817377094e-05, "loss": 4.2088, "step": 23970 }, { "epoch": 0.644229477361279, "grad_norm": 2.221834421157837, "learning_rate": 1.803077761133052e-05, "loss": 4.1836, "step": 23975 }, { "epoch": 0.6443638317882574, "grad_norm": 2.3892807960510254, "learning_rate": 1.8023968405283945e-05, "loss": 4.144, "step": 23980 }, { "epoch": 0.6444981862152358, "grad_norm": 2.687943696975708, "learning_rate": 1.801715919923737e-05, "loss": 4.1796, "step": 23985 }, { "epoch": 0.6446325406422142, "grad_norm": 2.4180691242218018, "learning_rate": 1.8010349993190796e-05, "loss": 4.1278, "step": 23990 }, { "epoch": 0.6447668950691925, "grad_norm": 2.6881208419799805, "learning_rate": 1.800354078714422e-05, "loss": 4.1401, "step": 23995 }, { "epoch": 0.6449012494961709, "grad_norm": 2.2429795265197754, "learning_rate": 1.7996731581097644e-05, "loss": 4.075, "step": 24000 }, { "epoch": 0.6450356039231493, "grad_norm": 2.478213310241699, "learning_rate": 1.798992237505107e-05, "loss": 4.1086, "step": 24005 }, { "epoch": 0.6451699583501276, "grad_norm": 2.5723352432250977, "learning_rate": 1.7983113169004496e-05, "loss": 4.17, "step": 24010 }, { "epoch": 0.645304312777106, "grad_norm": 2.44465970993042, "learning_rate": 1.7976303962957918e-05, "loss": 4.2546, "step": 24015 }, { "epoch": 0.6454386672040844, "grad_norm": 2.5174989700317383, "learning_rate": 1.7969494756911347e-05, "loss": 4.2404, "step": 24020 }, { "epoch": 0.6455730216310628, "grad_norm": 2.5027527809143066, "learning_rate": 1.796268555086477e-05, "loss": 4.0736, "step": 24025 }, { "epoch": 0.6457073760580411, "grad_norm": 2.6018006801605225, "learning_rate": 1.7955876344818195e-05, "loss": 4.0513, "step": 24030 }, { "epoch": 0.6458417304850195, "grad_norm": 2.2839057445526123, "learning_rate": 1.794906713877162e-05, "loss": 4.0872, "step": 24035 }, { "epoch": 0.6459760849119979, "grad_norm": 2.470343589782715, "learning_rate": 1.7942257932725046e-05, "loss": 4.1103, "step": 24040 }, { "epoch": 0.6461104393389763, "grad_norm": 2.6035096645355225, "learning_rate": 1.793544872667847e-05, "loss": 3.9309, "step": 24045 }, { "epoch": 0.6462447937659546, "grad_norm": 2.5959882736206055, "learning_rate": 1.7928639520631894e-05, "loss": 4.2064, "step": 24050 }, { "epoch": 0.646379148192933, "grad_norm": 2.7281436920166016, "learning_rate": 1.792183031458532e-05, "loss": 4.1966, "step": 24055 }, { "epoch": 0.6465135026199114, "grad_norm": 2.656104326248169, "learning_rate": 1.7915021108538745e-05, "loss": 4.1146, "step": 24060 }, { "epoch": 0.6466478570468897, "grad_norm": 2.3168928623199463, "learning_rate": 1.790821190249217e-05, "loss": 3.9576, "step": 24065 }, { "epoch": 0.646782211473868, "grad_norm": 2.439548969268799, "learning_rate": 1.7901402696445597e-05, "loss": 4.0711, "step": 24070 }, { "epoch": 0.6469165659008465, "grad_norm": 2.649630546569824, "learning_rate": 1.789459349039902e-05, "loss": 4.0327, "step": 24075 }, { "epoch": 0.6470509203278249, "grad_norm": 2.47031569480896, "learning_rate": 1.7887784284352445e-05, "loss": 3.9958, "step": 24080 }, { "epoch": 0.6471852747548031, "grad_norm": 2.575955867767334, "learning_rate": 1.788097507830587e-05, "loss": 4.027, "step": 24085 }, { "epoch": 0.6473196291817815, "grad_norm": 2.2539994716644287, "learning_rate": 1.7874165872259296e-05, "loss": 3.9701, "step": 24090 }, { "epoch": 0.6474539836087599, "grad_norm": 2.60310697555542, "learning_rate": 1.786735666621272e-05, "loss": 4.09, "step": 24095 }, { "epoch": 0.6475883380357382, "grad_norm": 2.4930222034454346, "learning_rate": 1.7860547460166147e-05, "loss": 4.1123, "step": 24100 }, { "epoch": 0.6477226924627166, "grad_norm": 2.4262917041778564, "learning_rate": 1.785373825411957e-05, "loss": 4.2577, "step": 24105 }, { "epoch": 0.647857046889695, "grad_norm": 2.3126251697540283, "learning_rate": 1.7846929048072995e-05, "loss": 4.1058, "step": 24110 }, { "epoch": 0.6479914013166734, "grad_norm": 2.5512583255767822, "learning_rate": 1.784011984202642e-05, "loss": 4.1864, "step": 24115 }, { "epoch": 0.6481257557436517, "grad_norm": 2.454341173171997, "learning_rate": 1.7833310635979847e-05, "loss": 4.2259, "step": 24120 }, { "epoch": 0.6482601101706301, "grad_norm": 2.76254940032959, "learning_rate": 1.782650142993327e-05, "loss": 4.0664, "step": 24125 }, { "epoch": 0.6483944645976085, "grad_norm": 2.512204885482788, "learning_rate": 1.7819692223886698e-05, "loss": 4.1579, "step": 24130 }, { "epoch": 0.6485288190245868, "grad_norm": 2.597622871398926, "learning_rate": 1.781288301784012e-05, "loss": 4.1518, "step": 24135 }, { "epoch": 0.6486631734515652, "grad_norm": 2.5101253986358643, "learning_rate": 1.7806073811793546e-05, "loss": 3.9893, "step": 24140 }, { "epoch": 0.6487975278785436, "grad_norm": 2.403233051300049, "learning_rate": 1.779926460574697e-05, "loss": 4.1149, "step": 24145 }, { "epoch": 0.648931882305522, "grad_norm": 2.6212189197540283, "learning_rate": 1.7792455399700394e-05, "loss": 4.1019, "step": 24150 }, { "epoch": 0.6490662367325003, "grad_norm": 2.514524459838867, "learning_rate": 1.778564619365382e-05, "loss": 4.1005, "step": 24155 }, { "epoch": 0.6492005911594787, "grad_norm": 2.342414379119873, "learning_rate": 1.7778836987607245e-05, "loss": 4.1548, "step": 24160 }, { "epoch": 0.6493349455864571, "grad_norm": 2.471890687942505, "learning_rate": 1.777202778156067e-05, "loss": 4.1681, "step": 24165 }, { "epoch": 0.6494693000134354, "grad_norm": 2.527994394302368, "learning_rate": 1.7765218575514096e-05, "loss": 3.9966, "step": 24170 }, { "epoch": 0.6496036544404138, "grad_norm": 2.55450439453125, "learning_rate": 1.7758409369467522e-05, "loss": 4.1512, "step": 24175 }, { "epoch": 0.6497380088673922, "grad_norm": 2.499756097793579, "learning_rate": 1.7751600163420944e-05, "loss": 4.08, "step": 24180 }, { "epoch": 0.6498723632943706, "grad_norm": 2.152082920074463, "learning_rate": 1.774479095737437e-05, "loss": 4.0514, "step": 24185 }, { "epoch": 0.6500067177213489, "grad_norm": 2.4695308208465576, "learning_rate": 1.7737981751327796e-05, "loss": 4.1256, "step": 24190 }, { "epoch": 0.6501410721483273, "grad_norm": 2.271550178527832, "learning_rate": 1.773117254528122e-05, "loss": 4.1455, "step": 24195 }, { "epoch": 0.6502754265753057, "grad_norm": 2.519408702850342, "learning_rate": 1.7724363339234647e-05, "loss": 4.0881, "step": 24200 }, { "epoch": 0.650409781002284, "grad_norm": 2.741361379623413, "learning_rate": 1.7717554133188073e-05, "loss": 4.0979, "step": 24205 }, { "epoch": 0.6505441354292624, "grad_norm": 2.3821797370910645, "learning_rate": 1.7710744927141495e-05, "loss": 4.0628, "step": 24210 }, { "epoch": 0.6506784898562408, "grad_norm": 2.4963490962982178, "learning_rate": 1.770393572109492e-05, "loss": 4.093, "step": 24215 }, { "epoch": 0.6508128442832192, "grad_norm": 2.154620885848999, "learning_rate": 1.7697126515048346e-05, "loss": 4.0586, "step": 24220 }, { "epoch": 0.6509471987101975, "grad_norm": 2.65537166595459, "learning_rate": 1.7690317309001772e-05, "loss": 4.2049, "step": 24225 }, { "epoch": 0.6510815531371759, "grad_norm": 2.3128857612609863, "learning_rate": 1.7683508102955194e-05, "loss": 4.0714, "step": 24230 }, { "epoch": 0.6512159075641543, "grad_norm": 2.6045753955841064, "learning_rate": 1.7676698896908623e-05, "loss": 4.1728, "step": 24235 }, { "epoch": 0.6513502619911327, "grad_norm": 2.408907413482666, "learning_rate": 1.7669889690862045e-05, "loss": 4.1417, "step": 24240 }, { "epoch": 0.651484616418111, "grad_norm": 2.5807552337646484, "learning_rate": 1.766308048481547e-05, "loss": 4.1074, "step": 24245 }, { "epoch": 0.6516189708450894, "grad_norm": 2.642622470855713, "learning_rate": 1.7656271278768897e-05, "loss": 4.0736, "step": 24250 }, { "epoch": 0.6517533252720678, "grad_norm": 2.3323090076446533, "learning_rate": 1.7649462072722322e-05, "loss": 4.1872, "step": 24255 }, { "epoch": 0.651887679699046, "grad_norm": 2.597944974899292, "learning_rate": 1.7642652866675745e-05, "loss": 4.1838, "step": 24260 }, { "epoch": 0.6520220341260244, "grad_norm": 2.5142457485198975, "learning_rate": 1.7635843660629174e-05, "loss": 4.0876, "step": 24265 }, { "epoch": 0.6521563885530028, "grad_norm": 2.6136395931243896, "learning_rate": 1.7629034454582596e-05, "loss": 4.1168, "step": 24270 }, { "epoch": 0.6522907429799812, "grad_norm": 2.5781824588775635, "learning_rate": 1.762222524853602e-05, "loss": 3.9381, "step": 24275 }, { "epoch": 0.6524250974069595, "grad_norm": 2.4265387058258057, "learning_rate": 1.7615416042489447e-05, "loss": 4.1294, "step": 24280 }, { "epoch": 0.6525594518339379, "grad_norm": 2.331796407699585, "learning_rate": 1.760860683644287e-05, "loss": 3.9434, "step": 24285 }, { "epoch": 0.6526938062609163, "grad_norm": 2.469675064086914, "learning_rate": 1.7601797630396295e-05, "loss": 4.099, "step": 24290 }, { "epoch": 0.6528281606878946, "grad_norm": 2.712191343307495, "learning_rate": 1.759498842434972e-05, "loss": 4.0513, "step": 24295 }, { "epoch": 0.652962515114873, "grad_norm": 2.5762579441070557, "learning_rate": 1.7588179218303147e-05, "loss": 4.0775, "step": 24300 }, { "epoch": 0.6530968695418514, "grad_norm": 2.7294833660125732, "learning_rate": 1.7581370012256572e-05, "loss": 4.0909, "step": 24305 }, { "epoch": 0.6532312239688298, "grad_norm": 2.6736409664154053, "learning_rate": 1.7574560806209998e-05, "loss": 4.0962, "step": 24310 }, { "epoch": 0.6533655783958081, "grad_norm": 2.439054250717163, "learning_rate": 1.756775160016342e-05, "loss": 4.2383, "step": 24315 }, { "epoch": 0.6534999328227865, "grad_norm": 2.476149559020996, "learning_rate": 1.7560942394116846e-05, "loss": 4.0377, "step": 24320 }, { "epoch": 0.6536342872497649, "grad_norm": 2.71405291557312, "learning_rate": 1.755413318807027e-05, "loss": 4.3044, "step": 24325 }, { "epoch": 0.6537686416767432, "grad_norm": 2.6060214042663574, "learning_rate": 1.7547323982023697e-05, "loss": 4.0513, "step": 24330 }, { "epoch": 0.6539029961037216, "grad_norm": 2.4815680980682373, "learning_rate": 1.754051477597712e-05, "loss": 4.1232, "step": 24335 }, { "epoch": 0.6540373505307, "grad_norm": 2.401942014694214, "learning_rate": 1.753370556993055e-05, "loss": 4.0883, "step": 24340 }, { "epoch": 0.6541717049576784, "grad_norm": 2.4696788787841797, "learning_rate": 1.752689636388397e-05, "loss": 4.0169, "step": 24345 }, { "epoch": 0.6543060593846567, "grad_norm": 2.653489112854004, "learning_rate": 1.7520087157837396e-05, "loss": 4.0838, "step": 24350 }, { "epoch": 0.6544404138116351, "grad_norm": 2.515920877456665, "learning_rate": 1.7513277951790822e-05, "loss": 4.0425, "step": 24355 }, { "epoch": 0.6545747682386135, "grad_norm": 2.6152942180633545, "learning_rate": 1.7506468745744248e-05, "loss": 4.0437, "step": 24360 }, { "epoch": 0.6547091226655918, "grad_norm": 2.244910955429077, "learning_rate": 1.749965953969767e-05, "loss": 4.0964, "step": 24365 }, { "epoch": 0.6548434770925702, "grad_norm": 2.3278908729553223, "learning_rate": 1.74928503336511e-05, "loss": 4.1439, "step": 24370 }, { "epoch": 0.6549778315195486, "grad_norm": 2.4289348125457764, "learning_rate": 1.748604112760452e-05, "loss": 4.1834, "step": 24375 }, { "epoch": 0.655112185946527, "grad_norm": 2.6016147136688232, "learning_rate": 1.7479231921557947e-05, "loss": 3.963, "step": 24380 }, { "epoch": 0.6552465403735053, "grad_norm": 2.5902950763702393, "learning_rate": 1.7472422715511373e-05, "loss": 4.1394, "step": 24385 }, { "epoch": 0.6553808948004837, "grad_norm": 2.422241449356079, "learning_rate": 1.7465613509464798e-05, "loss": 4.1666, "step": 24390 }, { "epoch": 0.6555152492274621, "grad_norm": 2.633828639984131, "learning_rate": 1.745880430341822e-05, "loss": 4.1107, "step": 24395 }, { "epoch": 0.6556496036544404, "grad_norm": 2.316654682159424, "learning_rate": 1.745199509737165e-05, "loss": 4.0794, "step": 24400 }, { "epoch": 0.6557839580814188, "grad_norm": 2.8310577869415283, "learning_rate": 1.7445185891325072e-05, "loss": 4.0834, "step": 24405 }, { "epoch": 0.6559183125083972, "grad_norm": 2.440497636795044, "learning_rate": 1.7438376685278497e-05, "loss": 4.1115, "step": 24410 }, { "epoch": 0.6560526669353756, "grad_norm": 2.45755672454834, "learning_rate": 1.7431567479231923e-05, "loss": 4.1449, "step": 24415 }, { "epoch": 0.6561870213623539, "grad_norm": 2.4841885566711426, "learning_rate": 1.742475827318535e-05, "loss": 4.057, "step": 24420 }, { "epoch": 0.6563213757893323, "grad_norm": 2.41619873046875, "learning_rate": 1.741794906713877e-05, "loss": 4.0798, "step": 24425 }, { "epoch": 0.6564557302163107, "grad_norm": 2.747192621231079, "learning_rate": 1.74111398610922e-05, "loss": 4.0493, "step": 24430 }, { "epoch": 0.656590084643289, "grad_norm": 2.42177677154541, "learning_rate": 1.7404330655045622e-05, "loss": 4.1373, "step": 24435 }, { "epoch": 0.6567244390702673, "grad_norm": 2.3373184204101562, "learning_rate": 1.7397521448999045e-05, "loss": 4.0239, "step": 24440 }, { "epoch": 0.6568587934972457, "grad_norm": 2.5453245639801025, "learning_rate": 1.7390712242952474e-05, "loss": 4.0732, "step": 24445 }, { "epoch": 0.6569931479242241, "grad_norm": 2.41300106048584, "learning_rate": 1.7383903036905896e-05, "loss": 4.0503, "step": 24450 }, { "epoch": 0.6571275023512024, "grad_norm": 2.3283960819244385, "learning_rate": 1.737709383085932e-05, "loss": 4.1827, "step": 24455 }, { "epoch": 0.6572618567781808, "grad_norm": 2.498337984085083, "learning_rate": 1.7370284624812747e-05, "loss": 4.1566, "step": 24460 }, { "epoch": 0.6573962112051592, "grad_norm": 2.5517868995666504, "learning_rate": 1.7363475418766173e-05, "loss": 4.0128, "step": 24465 }, { "epoch": 0.6575305656321376, "grad_norm": 2.4267733097076416, "learning_rate": 1.7356666212719595e-05, "loss": 4.1411, "step": 24470 }, { "epoch": 0.6576649200591159, "grad_norm": 2.5806798934936523, "learning_rate": 1.7349857006673024e-05, "loss": 4.1616, "step": 24475 }, { "epoch": 0.6577992744860943, "grad_norm": 2.52950382232666, "learning_rate": 1.7343047800626446e-05, "loss": 4.1669, "step": 24480 }, { "epoch": 0.6579336289130727, "grad_norm": 2.48915696144104, "learning_rate": 1.7336238594579872e-05, "loss": 4.0702, "step": 24485 }, { "epoch": 0.658067983340051, "grad_norm": 2.544912576675415, "learning_rate": 1.7329429388533298e-05, "loss": 4.0026, "step": 24490 }, { "epoch": 0.6582023377670294, "grad_norm": 2.432206630706787, "learning_rate": 1.7322620182486723e-05, "loss": 4.1387, "step": 24495 }, { "epoch": 0.6583366921940078, "grad_norm": 2.521678924560547, "learning_rate": 1.7315810976440146e-05, "loss": 4.1354, "step": 24500 }, { "epoch": 0.6584710466209862, "grad_norm": 2.418189525604248, "learning_rate": 1.7309001770393575e-05, "loss": 4.1564, "step": 24505 }, { "epoch": 0.6586054010479645, "grad_norm": 2.4838197231292725, "learning_rate": 1.7302192564346997e-05, "loss": 4.0486, "step": 24510 }, { "epoch": 0.6587397554749429, "grad_norm": 2.4238879680633545, "learning_rate": 1.7295383358300423e-05, "loss": 4.1166, "step": 24515 }, { "epoch": 0.6588741099019213, "grad_norm": 2.8184475898742676, "learning_rate": 1.728857415225385e-05, "loss": 4.047, "step": 24520 }, { "epoch": 0.6590084643288996, "grad_norm": 2.291536331176758, "learning_rate": 1.7281764946207274e-05, "loss": 3.9852, "step": 24525 }, { "epoch": 0.659142818755878, "grad_norm": 2.384695529937744, "learning_rate": 1.7274955740160696e-05, "loss": 4.0376, "step": 24530 }, { "epoch": 0.6592771731828564, "grad_norm": 2.104480743408203, "learning_rate": 1.7268146534114125e-05, "loss": 4.2217, "step": 24535 }, { "epoch": 0.6594115276098348, "grad_norm": 2.3060505390167236, "learning_rate": 1.7261337328067548e-05, "loss": 4.1561, "step": 24540 }, { "epoch": 0.6595458820368131, "grad_norm": 2.5552971363067627, "learning_rate": 1.7254528122020973e-05, "loss": 4.0796, "step": 24545 }, { "epoch": 0.6596802364637915, "grad_norm": 2.4885447025299072, "learning_rate": 1.72477189159744e-05, "loss": 4.1043, "step": 24550 }, { "epoch": 0.6598145908907699, "grad_norm": 2.44622802734375, "learning_rate": 1.7240909709927825e-05, "loss": 4.1431, "step": 24555 }, { "epoch": 0.6599489453177482, "grad_norm": 2.3995563983917236, "learning_rate": 1.7234100503881247e-05, "loss": 4.1439, "step": 24560 }, { "epoch": 0.6600832997447266, "grad_norm": 2.566452741622925, "learning_rate": 1.7227291297834676e-05, "loss": 4.1962, "step": 24565 }, { "epoch": 0.660217654171705, "grad_norm": 2.6441328525543213, "learning_rate": 1.7220482091788098e-05, "loss": 4.0496, "step": 24570 }, { "epoch": 0.6603520085986834, "grad_norm": 2.536961078643799, "learning_rate": 1.7213672885741524e-05, "loss": 4.135, "step": 24575 }, { "epoch": 0.6604863630256617, "grad_norm": 2.541011333465576, "learning_rate": 1.720686367969495e-05, "loss": 4.1777, "step": 24580 }, { "epoch": 0.6606207174526401, "grad_norm": 2.3747928142547607, "learning_rate": 1.7200054473648372e-05, "loss": 4.0871, "step": 24585 }, { "epoch": 0.6607550718796185, "grad_norm": 3.007676362991333, "learning_rate": 1.7193245267601797e-05, "loss": 4.0789, "step": 24590 }, { "epoch": 0.6608894263065968, "grad_norm": 2.5727133750915527, "learning_rate": 1.7186436061555223e-05, "loss": 4.1358, "step": 24595 }, { "epoch": 0.6610237807335752, "grad_norm": 2.504718542098999, "learning_rate": 1.717962685550865e-05, "loss": 4.0425, "step": 24600 }, { "epoch": 0.6611581351605536, "grad_norm": 2.3066539764404297, "learning_rate": 1.717281764946207e-05, "loss": 4.1306, "step": 24605 }, { "epoch": 0.661292489587532, "grad_norm": 2.6594419479370117, "learning_rate": 1.71660084434155e-05, "loss": 4.1269, "step": 24610 }, { "epoch": 0.6614268440145102, "grad_norm": 2.69704008102417, "learning_rate": 1.7159199237368922e-05, "loss": 4.1212, "step": 24615 }, { "epoch": 0.6615611984414886, "grad_norm": 2.783600330352783, "learning_rate": 1.7152390031322348e-05, "loss": 4.1565, "step": 24620 }, { "epoch": 0.661695552868467, "grad_norm": 2.550464630126953, "learning_rate": 1.7145580825275774e-05, "loss": 4.1148, "step": 24625 }, { "epoch": 0.6618299072954453, "grad_norm": 2.5871169567108154, "learning_rate": 1.71387716192292e-05, "loss": 4.1259, "step": 24630 }, { "epoch": 0.6619642617224237, "grad_norm": 2.4237937927246094, "learning_rate": 1.713196241318262e-05, "loss": 4.0795, "step": 24635 }, { "epoch": 0.6620986161494021, "grad_norm": 2.2721176147460938, "learning_rate": 1.712515320713605e-05, "loss": 4.0973, "step": 24640 }, { "epoch": 0.6622329705763805, "grad_norm": 2.4347729682922363, "learning_rate": 1.7118344001089473e-05, "loss": 4.1864, "step": 24645 }, { "epoch": 0.6623673250033588, "grad_norm": 2.442473888397217, "learning_rate": 1.71115347950429e-05, "loss": 3.996, "step": 24650 }, { "epoch": 0.6625016794303372, "grad_norm": 2.4915847778320312, "learning_rate": 1.7104725588996324e-05, "loss": 4.1246, "step": 24655 }, { "epoch": 0.6626360338573156, "grad_norm": 2.4784507751464844, "learning_rate": 1.709791638294975e-05, "loss": 4.1265, "step": 24660 }, { "epoch": 0.662770388284294, "grad_norm": 2.3777551651000977, "learning_rate": 1.7091107176903172e-05, "loss": 4.0704, "step": 24665 }, { "epoch": 0.6629047427112723, "grad_norm": 2.485563278198242, "learning_rate": 1.70842979708566e-05, "loss": 4.1407, "step": 24670 }, { "epoch": 0.6630390971382507, "grad_norm": 2.5457584857940674, "learning_rate": 1.7077488764810023e-05, "loss": 4.099, "step": 24675 }, { "epoch": 0.6631734515652291, "grad_norm": 2.586071729660034, "learning_rate": 1.707067955876345e-05, "loss": 4.0926, "step": 24680 }, { "epoch": 0.6633078059922074, "grad_norm": 2.3716979026794434, "learning_rate": 1.7063870352716875e-05, "loss": 4.0058, "step": 24685 }, { "epoch": 0.6634421604191858, "grad_norm": 2.5290942192077637, "learning_rate": 1.70570611466703e-05, "loss": 4.0418, "step": 24690 }, { "epoch": 0.6635765148461642, "grad_norm": 2.341618537902832, "learning_rate": 1.7050251940623723e-05, "loss": 4.1547, "step": 24695 }, { "epoch": 0.6637108692731426, "grad_norm": 2.4289629459381104, "learning_rate": 1.704344273457715e-05, "loss": 4.0125, "step": 24700 }, { "epoch": 0.6638452237001209, "grad_norm": 2.4045491218566895, "learning_rate": 1.7036633528530574e-05, "loss": 4.1559, "step": 24705 }, { "epoch": 0.6639795781270993, "grad_norm": 2.6459834575653076, "learning_rate": 1.7029824322484e-05, "loss": 3.957, "step": 24710 }, { "epoch": 0.6641139325540777, "grad_norm": 2.489624500274658, "learning_rate": 1.7023015116437425e-05, "loss": 4.0732, "step": 24715 }, { "epoch": 0.664248286981056, "grad_norm": 2.6033644676208496, "learning_rate": 1.701620591039085e-05, "loss": 3.9813, "step": 24720 }, { "epoch": 0.6643826414080344, "grad_norm": 2.848665237426758, "learning_rate": 1.7009396704344273e-05, "loss": 4.1094, "step": 24725 }, { "epoch": 0.6645169958350128, "grad_norm": 2.356055974960327, "learning_rate": 1.70025874982977e-05, "loss": 4.1987, "step": 24730 }, { "epoch": 0.6646513502619912, "grad_norm": 2.2857542037963867, "learning_rate": 1.6995778292251125e-05, "loss": 3.851, "step": 24735 }, { "epoch": 0.6647857046889695, "grad_norm": 2.4776763916015625, "learning_rate": 1.6988969086204547e-05, "loss": 4.1136, "step": 24740 }, { "epoch": 0.6649200591159479, "grad_norm": 2.8978004455566406, "learning_rate": 1.6982159880157976e-05, "loss": 4.0751, "step": 24745 }, { "epoch": 0.6650544135429263, "grad_norm": 2.4352550506591797, "learning_rate": 1.6975350674111398e-05, "loss": 4.1022, "step": 24750 }, { "epoch": 0.6651887679699046, "grad_norm": 2.5188076496124268, "learning_rate": 1.6968541468064824e-05, "loss": 4.0651, "step": 24755 }, { "epoch": 0.665323122396883, "grad_norm": 2.5174953937530518, "learning_rate": 1.696173226201825e-05, "loss": 3.9803, "step": 24760 }, { "epoch": 0.6654574768238614, "grad_norm": 2.65630841255188, "learning_rate": 1.6954923055971675e-05, "loss": 4.1051, "step": 24765 }, { "epoch": 0.6655918312508398, "grad_norm": 2.598051071166992, "learning_rate": 1.6948113849925097e-05, "loss": 4.1021, "step": 24770 }, { "epoch": 0.665726185677818, "grad_norm": 2.4839766025543213, "learning_rate": 1.6941304643878526e-05, "loss": 3.9821, "step": 24775 }, { "epoch": 0.6658605401047965, "grad_norm": 2.4451494216918945, "learning_rate": 1.693449543783195e-05, "loss": 3.9787, "step": 24780 }, { "epoch": 0.6659948945317749, "grad_norm": 2.370553731918335, "learning_rate": 1.6927686231785374e-05, "loss": 4.0361, "step": 24785 }, { "epoch": 0.6661292489587531, "grad_norm": 2.4774012565612793, "learning_rate": 1.69208770257388e-05, "loss": 4.1408, "step": 24790 }, { "epoch": 0.6662636033857315, "grad_norm": 2.44213604927063, "learning_rate": 1.6914067819692226e-05, "loss": 4.0494, "step": 24795 }, { "epoch": 0.6663979578127099, "grad_norm": 2.6451785564422607, "learning_rate": 1.6907258613645648e-05, "loss": 4.2173, "step": 24800 }, { "epoch": 0.6665323122396883, "grad_norm": 2.5545859336853027, "learning_rate": 1.6900449407599077e-05, "loss": 4.0913, "step": 24805 }, { "epoch": 0.6666666666666666, "grad_norm": 2.8899617195129395, "learning_rate": 1.68936402015525e-05, "loss": 3.9716, "step": 24810 }, { "epoch": 0.666801021093645, "grad_norm": 2.3707151412963867, "learning_rate": 1.6886830995505925e-05, "loss": 4.0142, "step": 24815 }, { "epoch": 0.6669353755206234, "grad_norm": 2.4498772621154785, "learning_rate": 1.688002178945935e-05, "loss": 4.1129, "step": 24820 }, { "epoch": 0.6670697299476017, "grad_norm": 2.6980397701263428, "learning_rate": 1.6873212583412776e-05, "loss": 4.1241, "step": 24825 }, { "epoch": 0.6672040843745801, "grad_norm": 2.606616735458374, "learning_rate": 1.68664033773662e-05, "loss": 4.0066, "step": 24830 }, { "epoch": 0.6673384388015585, "grad_norm": 2.4780101776123047, "learning_rate": 1.6859594171319628e-05, "loss": 4.146, "step": 24835 }, { "epoch": 0.6674727932285369, "grad_norm": 2.627702236175537, "learning_rate": 1.685278496527305e-05, "loss": 4.0169, "step": 24840 }, { "epoch": 0.6676071476555152, "grad_norm": 2.54738712310791, "learning_rate": 1.6845975759226475e-05, "loss": 4.1185, "step": 24845 }, { "epoch": 0.6677415020824936, "grad_norm": 2.6914215087890625, "learning_rate": 1.68391665531799e-05, "loss": 4.1587, "step": 24850 }, { "epoch": 0.667875856509472, "grad_norm": 2.2947144508361816, "learning_rate": 1.6832357347133327e-05, "loss": 4.1164, "step": 24855 }, { "epoch": 0.6680102109364504, "grad_norm": 2.473254680633545, "learning_rate": 1.682554814108675e-05, "loss": 3.9639, "step": 24860 }, { "epoch": 0.6681445653634287, "grad_norm": 2.7933645248413086, "learning_rate": 1.6818738935040178e-05, "loss": 4.0223, "step": 24865 }, { "epoch": 0.6682789197904071, "grad_norm": 2.551804542541504, "learning_rate": 1.68119297289936e-05, "loss": 4.1079, "step": 24870 }, { "epoch": 0.6684132742173855, "grad_norm": 2.2804107666015625, "learning_rate": 1.6805120522947026e-05, "loss": 4.1846, "step": 24875 }, { "epoch": 0.6685476286443638, "grad_norm": 2.984001636505127, "learning_rate": 1.679831131690045e-05, "loss": 4.0941, "step": 24880 }, { "epoch": 0.6686819830713422, "grad_norm": 2.3935153484344482, "learning_rate": 1.6791502110853874e-05, "loss": 4.09, "step": 24885 }, { "epoch": 0.6688163374983206, "grad_norm": 2.5095157623291016, "learning_rate": 1.67846929048073e-05, "loss": 4.1348, "step": 24890 }, { "epoch": 0.668950691925299, "grad_norm": 2.4136135578155518, "learning_rate": 1.6777883698760725e-05, "loss": 4.1284, "step": 24895 }, { "epoch": 0.6690850463522773, "grad_norm": 2.368994951248169, "learning_rate": 1.677107449271415e-05, "loss": 4.1264, "step": 24900 }, { "epoch": 0.6692194007792557, "grad_norm": 2.667616367340088, "learning_rate": 1.6764265286667573e-05, "loss": 4.0379, "step": 24905 }, { "epoch": 0.6693537552062341, "grad_norm": 2.409773349761963, "learning_rate": 1.6757456080621002e-05, "loss": 3.9984, "step": 24910 }, { "epoch": 0.6694881096332124, "grad_norm": 2.5774850845336914, "learning_rate": 1.6750646874574424e-05, "loss": 4.0731, "step": 24915 }, { "epoch": 0.6696224640601908, "grad_norm": 2.4436726570129395, "learning_rate": 1.674383766852785e-05, "loss": 4.1422, "step": 24920 }, { "epoch": 0.6697568184871692, "grad_norm": 2.680602788925171, "learning_rate": 1.6737028462481276e-05, "loss": 4.2136, "step": 24925 }, { "epoch": 0.6698911729141476, "grad_norm": 2.725328207015991, "learning_rate": 1.67302192564347e-05, "loss": 4.1725, "step": 24930 }, { "epoch": 0.6700255273411259, "grad_norm": 2.410062789916992, "learning_rate": 1.6723410050388124e-05, "loss": 4.0956, "step": 24935 }, { "epoch": 0.6701598817681043, "grad_norm": 3.037128210067749, "learning_rate": 1.6716600844341553e-05, "loss": 4.1038, "step": 24940 }, { "epoch": 0.6702942361950827, "grad_norm": 2.517120361328125, "learning_rate": 1.6709791638294975e-05, "loss": 4.1687, "step": 24945 }, { "epoch": 0.670428590622061, "grad_norm": 2.439589262008667, "learning_rate": 1.67029824322484e-05, "loss": 4.1117, "step": 24950 }, { "epoch": 0.6705629450490393, "grad_norm": 2.5008647441864014, "learning_rate": 1.6696173226201826e-05, "loss": 4.1593, "step": 24955 }, { "epoch": 0.6706972994760177, "grad_norm": 2.4022274017333984, "learning_rate": 1.6689364020155252e-05, "loss": 4.1285, "step": 24960 }, { "epoch": 0.6708316539029962, "grad_norm": 2.344142436981201, "learning_rate": 1.6682554814108674e-05, "loss": 4.0911, "step": 24965 }, { "epoch": 0.6709660083299744, "grad_norm": 2.4908666610717773, "learning_rate": 1.6675745608062103e-05, "loss": 4.1673, "step": 24970 }, { "epoch": 0.6711003627569528, "grad_norm": 2.468057632446289, "learning_rate": 1.6668936402015526e-05, "loss": 4.1656, "step": 24975 }, { "epoch": 0.6712347171839312, "grad_norm": 2.608591318130493, "learning_rate": 1.666212719596895e-05, "loss": 4.1317, "step": 24980 }, { "epoch": 0.6713690716109095, "grad_norm": 2.7042131423950195, "learning_rate": 1.6655317989922377e-05, "loss": 4.0841, "step": 24985 }, { "epoch": 0.6715034260378879, "grad_norm": 2.3576300144195557, "learning_rate": 1.6648508783875803e-05, "loss": 3.9972, "step": 24990 }, { "epoch": 0.6716377804648663, "grad_norm": 2.3807365894317627, "learning_rate": 1.6641699577829225e-05, "loss": 4.0903, "step": 24995 }, { "epoch": 0.6717721348918447, "grad_norm": 2.4945669174194336, "learning_rate": 1.663489037178265e-05, "loss": 4.1182, "step": 25000 }, { "epoch": 0.671906489318823, "grad_norm": 2.506798028945923, "learning_rate": 1.6628081165736076e-05, "loss": 4.0514, "step": 25005 }, { "epoch": 0.6720408437458014, "grad_norm": 2.511164903640747, "learning_rate": 1.6621271959689502e-05, "loss": 4.1195, "step": 25010 }, { "epoch": 0.6721751981727798, "grad_norm": 2.6371006965637207, "learning_rate": 1.6614462753642927e-05, "loss": 4.1205, "step": 25015 }, { "epoch": 0.6723095525997581, "grad_norm": 2.484762191772461, "learning_rate": 1.6607653547596353e-05, "loss": 4.0628, "step": 25020 }, { "epoch": 0.6724439070267365, "grad_norm": 2.40940260887146, "learning_rate": 1.6600844341549775e-05, "loss": 4.053, "step": 25025 }, { "epoch": 0.6725782614537149, "grad_norm": 2.408735752105713, "learning_rate": 1.65940351355032e-05, "loss": 3.9809, "step": 25030 }, { "epoch": 0.6727126158806933, "grad_norm": 2.540616989135742, "learning_rate": 1.6587225929456627e-05, "loss": 4.1174, "step": 25035 }, { "epoch": 0.6728469703076716, "grad_norm": 2.550203800201416, "learning_rate": 1.658041672341005e-05, "loss": 4.0909, "step": 25040 }, { "epoch": 0.67298132473465, "grad_norm": 2.496736764907837, "learning_rate": 1.6573607517363478e-05, "loss": 4.1767, "step": 25045 }, { "epoch": 0.6731156791616284, "grad_norm": 2.5579285621643066, "learning_rate": 1.65667983113169e-05, "loss": 4.1306, "step": 25050 }, { "epoch": 0.6732500335886068, "grad_norm": 2.306535482406616, "learning_rate": 1.6559989105270326e-05, "loss": 4.0516, "step": 25055 }, { "epoch": 0.6733843880155851, "grad_norm": 2.364377498626709, "learning_rate": 1.655317989922375e-05, "loss": 4.1486, "step": 25060 }, { "epoch": 0.6735187424425635, "grad_norm": 2.5267958641052246, "learning_rate": 1.6546370693177177e-05, "loss": 4.0085, "step": 25065 }, { "epoch": 0.6736530968695419, "grad_norm": 2.5147600173950195, "learning_rate": 1.65395614871306e-05, "loss": 4.028, "step": 25070 }, { "epoch": 0.6737874512965202, "grad_norm": 2.49813175201416, "learning_rate": 1.6532752281084025e-05, "loss": 4.1508, "step": 25075 }, { "epoch": 0.6739218057234986, "grad_norm": 2.7407331466674805, "learning_rate": 1.652594307503745e-05, "loss": 4.0682, "step": 25080 }, { "epoch": 0.674056160150477, "grad_norm": 2.3560874462127686, "learning_rate": 1.6519133868990876e-05, "loss": 4.1007, "step": 25085 }, { "epoch": 0.6741905145774554, "grad_norm": 2.745978593826294, "learning_rate": 1.6512324662944302e-05, "loss": 4.1509, "step": 25090 }, { "epoch": 0.6743248690044337, "grad_norm": 2.433622360229492, "learning_rate": 1.6505515456897728e-05, "loss": 4.0509, "step": 25095 }, { "epoch": 0.6744592234314121, "grad_norm": 2.4271531105041504, "learning_rate": 1.649870625085115e-05, "loss": 4.1035, "step": 25100 }, { "epoch": 0.6745935778583905, "grad_norm": 2.503331422805786, "learning_rate": 1.6491897044804576e-05, "loss": 4.083, "step": 25105 }, { "epoch": 0.6747279322853688, "grad_norm": 2.6872217655181885, "learning_rate": 1.6485087838758e-05, "loss": 4.1697, "step": 25110 }, { "epoch": 0.6748622867123472, "grad_norm": 2.4616341590881348, "learning_rate": 1.6478278632711427e-05, "loss": 3.9935, "step": 25115 }, { "epoch": 0.6749966411393256, "grad_norm": 2.5138587951660156, "learning_rate": 1.6471469426664853e-05, "loss": 4.0369, "step": 25120 }, { "epoch": 0.675130995566304, "grad_norm": 2.6759262084960938, "learning_rate": 1.646466022061828e-05, "loss": 4.195, "step": 25125 }, { "epoch": 0.6752653499932822, "grad_norm": 2.4312796592712402, "learning_rate": 1.64578510145717e-05, "loss": 4.0867, "step": 25130 }, { "epoch": 0.6753997044202606, "grad_norm": 2.4523637294769287, "learning_rate": 1.6451041808525126e-05, "loss": 4.0974, "step": 25135 }, { "epoch": 0.675534058847239, "grad_norm": 2.490473508834839, "learning_rate": 1.6444232602478552e-05, "loss": 4.0327, "step": 25140 }, { "epoch": 0.6756684132742173, "grad_norm": 2.505502462387085, "learning_rate": 1.6437423396431978e-05, "loss": 4.1572, "step": 25145 }, { "epoch": 0.6758027677011957, "grad_norm": 2.736435651779175, "learning_rate": 1.6430614190385403e-05, "loss": 4.1907, "step": 25150 }, { "epoch": 0.6759371221281741, "grad_norm": 2.2636497020721436, "learning_rate": 1.642380498433883e-05, "loss": 4.0376, "step": 25155 }, { "epoch": 0.6760714765551525, "grad_norm": 2.6706228256225586, "learning_rate": 1.641699577829225e-05, "loss": 4.0184, "step": 25160 }, { "epoch": 0.6762058309821308, "grad_norm": 2.5945546627044678, "learning_rate": 1.6410186572245677e-05, "loss": 4.0317, "step": 25165 }, { "epoch": 0.6763401854091092, "grad_norm": 2.7675836086273193, "learning_rate": 1.6403377366199103e-05, "loss": 4.1769, "step": 25170 }, { "epoch": 0.6764745398360876, "grad_norm": 2.761752128601074, "learning_rate": 1.6396568160152525e-05, "loss": 4.1013, "step": 25175 }, { "epoch": 0.6766088942630659, "grad_norm": 2.3509011268615723, "learning_rate": 1.638975895410595e-05, "loss": 4.0968, "step": 25180 }, { "epoch": 0.6767432486900443, "grad_norm": 2.4771106243133545, "learning_rate": 1.6382949748059376e-05, "loss": 3.9593, "step": 25185 }, { "epoch": 0.6768776031170227, "grad_norm": 2.4847488403320312, "learning_rate": 1.6376140542012802e-05, "loss": 4.1489, "step": 25190 }, { "epoch": 0.6770119575440011, "grad_norm": 2.6947457790374756, "learning_rate": 1.6369331335966227e-05, "loss": 4.0419, "step": 25195 }, { "epoch": 0.6771463119709794, "grad_norm": 2.3962581157684326, "learning_rate": 1.6362522129919653e-05, "loss": 4.1903, "step": 25200 }, { "epoch": 0.6772806663979578, "grad_norm": 2.4352054595947266, "learning_rate": 1.6355712923873075e-05, "loss": 4.0537, "step": 25205 }, { "epoch": 0.6774150208249362, "grad_norm": 2.3407158851623535, "learning_rate": 1.63489037178265e-05, "loss": 4.081, "step": 25210 }, { "epoch": 0.6775493752519145, "grad_norm": 2.4292871952056885, "learning_rate": 1.6342094511779927e-05, "loss": 3.9978, "step": 25215 }, { "epoch": 0.6776837296788929, "grad_norm": 2.278900384902954, "learning_rate": 1.6335285305733352e-05, "loss": 4.1258, "step": 25220 }, { "epoch": 0.6778180841058713, "grad_norm": 2.290712833404541, "learning_rate": 1.6328476099686778e-05, "loss": 4.0273, "step": 25225 }, { "epoch": 0.6779524385328497, "grad_norm": 2.54319167137146, "learning_rate": 1.6321666893640204e-05, "loss": 3.9982, "step": 25230 }, { "epoch": 0.678086792959828, "grad_norm": 2.1533541679382324, "learning_rate": 1.6314857687593626e-05, "loss": 4.071, "step": 25235 }, { "epoch": 0.6782211473868064, "grad_norm": 2.38978910446167, "learning_rate": 1.630804848154705e-05, "loss": 3.9851, "step": 25240 }, { "epoch": 0.6783555018137848, "grad_norm": 2.516136884689331, "learning_rate": 1.6301239275500477e-05, "loss": 3.9925, "step": 25245 }, { "epoch": 0.6784898562407631, "grad_norm": 2.4939894676208496, "learning_rate": 1.6294430069453903e-05, "loss": 4.0702, "step": 25250 }, { "epoch": 0.6786242106677415, "grad_norm": 2.3189172744750977, "learning_rate": 1.628762086340733e-05, "loss": 3.9893, "step": 25255 }, { "epoch": 0.6787585650947199, "grad_norm": 2.460808753967285, "learning_rate": 1.6280811657360754e-05, "loss": 4.168, "step": 25260 }, { "epoch": 0.6788929195216983, "grad_norm": 2.590711832046509, "learning_rate": 1.6274002451314176e-05, "loss": 4.0801, "step": 25265 }, { "epoch": 0.6790272739486766, "grad_norm": 2.290926694869995, "learning_rate": 1.6267193245267602e-05, "loss": 4.1355, "step": 25270 }, { "epoch": 0.679161628375655, "grad_norm": 2.607848882675171, "learning_rate": 1.6260384039221028e-05, "loss": 4.1542, "step": 25275 }, { "epoch": 0.6792959828026334, "grad_norm": 2.6196579933166504, "learning_rate": 1.6253574833174453e-05, "loss": 4.2005, "step": 25280 }, { "epoch": 0.6794303372296118, "grad_norm": 2.4785892963409424, "learning_rate": 1.6246765627127876e-05, "loss": 4.1412, "step": 25285 }, { "epoch": 0.6795646916565901, "grad_norm": 2.637890338897705, "learning_rate": 1.6239956421081305e-05, "loss": 3.8874, "step": 25290 }, { "epoch": 0.6796990460835685, "grad_norm": 2.7890937328338623, "learning_rate": 1.6233147215034727e-05, "loss": 4.1866, "step": 25295 }, { "epoch": 0.6798334005105469, "grad_norm": 2.3304672241210938, "learning_rate": 1.6226338008988153e-05, "loss": 4.0755, "step": 25300 }, { "epoch": 0.6799677549375251, "grad_norm": 2.596991777420044, "learning_rate": 1.621952880294158e-05, "loss": 4.1018, "step": 25305 }, { "epoch": 0.6801021093645035, "grad_norm": 2.4190316200256348, "learning_rate": 1.6212719596895004e-05, "loss": 4.1101, "step": 25310 }, { "epoch": 0.680236463791482, "grad_norm": 2.4319753646850586, "learning_rate": 1.6205910390848426e-05, "loss": 4.1525, "step": 25315 }, { "epoch": 0.6803708182184603, "grad_norm": 2.641751766204834, "learning_rate": 1.6199101184801855e-05, "loss": 4.0055, "step": 25320 }, { "epoch": 0.6805051726454386, "grad_norm": 2.360935926437378, "learning_rate": 1.6192291978755278e-05, "loss": 4.2018, "step": 25325 }, { "epoch": 0.680639527072417, "grad_norm": 2.505279302597046, "learning_rate": 1.6185482772708703e-05, "loss": 4.0116, "step": 25330 }, { "epoch": 0.6807738814993954, "grad_norm": 2.4192252159118652, "learning_rate": 1.617867356666213e-05, "loss": 4.092, "step": 25335 }, { "epoch": 0.6809082359263737, "grad_norm": 2.542574167251587, "learning_rate": 1.617186436061555e-05, "loss": 4.0643, "step": 25340 }, { "epoch": 0.6810425903533521, "grad_norm": 2.599747896194458, "learning_rate": 1.6165055154568977e-05, "loss": 4.0905, "step": 25345 }, { "epoch": 0.6811769447803305, "grad_norm": 2.850055694580078, "learning_rate": 1.6158245948522402e-05, "loss": 4.0939, "step": 25350 }, { "epoch": 0.6813112992073089, "grad_norm": 2.4007153511047363, "learning_rate": 1.6151436742475828e-05, "loss": 4.1392, "step": 25355 }, { "epoch": 0.6814456536342872, "grad_norm": 2.3454623222351074, "learning_rate": 1.6144627536429254e-05, "loss": 4.0838, "step": 25360 }, { "epoch": 0.6815800080612656, "grad_norm": 2.3933870792388916, "learning_rate": 1.613781833038268e-05, "loss": 4.0893, "step": 25365 }, { "epoch": 0.681714362488244, "grad_norm": 2.5503878593444824, "learning_rate": 1.61310091243361e-05, "loss": 4.0006, "step": 25370 }, { "epoch": 0.6818487169152223, "grad_norm": 2.646012783050537, "learning_rate": 1.6124199918289527e-05, "loss": 4.0072, "step": 25375 }, { "epoch": 0.6819830713422007, "grad_norm": 2.5767366886138916, "learning_rate": 1.6117390712242953e-05, "loss": 4.085, "step": 25380 }, { "epoch": 0.6821174257691791, "grad_norm": 2.274829864501953, "learning_rate": 1.611058150619638e-05, "loss": 4.0885, "step": 25385 }, { "epoch": 0.6822517801961575, "grad_norm": 2.2980470657348633, "learning_rate": 1.61037723001498e-05, "loss": 3.99, "step": 25390 }, { "epoch": 0.6823861346231358, "grad_norm": 2.6445915699005127, "learning_rate": 1.609696309410323e-05, "loss": 4.1387, "step": 25395 }, { "epoch": 0.6825204890501142, "grad_norm": 2.5391974449157715, "learning_rate": 1.6090153888056652e-05, "loss": 4.084, "step": 25400 }, { "epoch": 0.6826548434770926, "grad_norm": 2.553640604019165, "learning_rate": 1.6083344682010078e-05, "loss": 4.1359, "step": 25405 }, { "epoch": 0.6827891979040709, "grad_norm": 2.5445332527160645, "learning_rate": 1.6076535475963504e-05, "loss": 4.1411, "step": 25410 }, { "epoch": 0.6829235523310493, "grad_norm": 4.114529609680176, "learning_rate": 1.606972626991693e-05, "loss": 4.0377, "step": 25415 }, { "epoch": 0.6830579067580277, "grad_norm": 2.8669817447662354, "learning_rate": 1.606291706387035e-05, "loss": 4.1125, "step": 25420 }, { "epoch": 0.6831922611850061, "grad_norm": 2.3865182399749756, "learning_rate": 1.605610785782378e-05, "loss": 4.095, "step": 25425 }, { "epoch": 0.6833266156119844, "grad_norm": 2.2028820514678955, "learning_rate": 1.6049298651777203e-05, "loss": 4.0908, "step": 25430 }, { "epoch": 0.6834609700389628, "grad_norm": 2.378664016723633, "learning_rate": 1.604248944573063e-05, "loss": 4.0043, "step": 25435 }, { "epoch": 0.6835953244659412, "grad_norm": 2.6211299896240234, "learning_rate": 1.6035680239684054e-05, "loss": 4.0018, "step": 25440 }, { "epoch": 0.6837296788929195, "grad_norm": 2.7178854942321777, "learning_rate": 1.602887103363748e-05, "loss": 4.21, "step": 25445 }, { "epoch": 0.6838640333198979, "grad_norm": 2.5928947925567627, "learning_rate": 1.6022061827590902e-05, "loss": 4.0779, "step": 25450 }, { "epoch": 0.6839983877468763, "grad_norm": 2.3193347454071045, "learning_rate": 1.601525262154433e-05, "loss": 4.0455, "step": 25455 }, { "epoch": 0.6841327421738547, "grad_norm": 2.448166847229004, "learning_rate": 1.6008443415497753e-05, "loss": 4.1464, "step": 25460 }, { "epoch": 0.684267096600833, "grad_norm": 2.6618316173553467, "learning_rate": 1.600163420945118e-05, "loss": 4.1221, "step": 25465 }, { "epoch": 0.6844014510278114, "grad_norm": 2.28771710395813, "learning_rate": 1.5994825003404605e-05, "loss": 3.9775, "step": 25470 }, { "epoch": 0.6845358054547898, "grad_norm": 2.210848093032837, "learning_rate": 1.5988015797358027e-05, "loss": 4.0862, "step": 25475 }, { "epoch": 0.6846701598817682, "grad_norm": 2.332528591156006, "learning_rate": 1.5981206591311453e-05, "loss": 4.241, "step": 25480 }, { "epoch": 0.6848045143087464, "grad_norm": 2.4872846603393555, "learning_rate": 1.5974397385264878e-05, "loss": 4.059, "step": 25485 }, { "epoch": 0.6849388687357248, "grad_norm": 2.25761079788208, "learning_rate": 1.5967588179218304e-05, "loss": 3.96, "step": 25490 }, { "epoch": 0.6850732231627032, "grad_norm": 2.8823843002319336, "learning_rate": 1.5960778973171726e-05, "loss": 4.1613, "step": 25495 }, { "epoch": 0.6852075775896815, "grad_norm": 2.3922770023345947, "learning_rate": 1.5953969767125155e-05, "loss": 4.1417, "step": 25500 }, { "epoch": 0.6853419320166599, "grad_norm": 2.8931331634521484, "learning_rate": 1.5947160561078577e-05, "loss": 4.0753, "step": 25505 }, { "epoch": 0.6854762864436383, "grad_norm": 2.5333609580993652, "learning_rate": 1.5940351355032003e-05, "loss": 4.2131, "step": 25510 }, { "epoch": 0.6856106408706167, "grad_norm": 2.664149045944214, "learning_rate": 1.593354214898543e-05, "loss": 4.0082, "step": 25515 }, { "epoch": 0.685744995297595, "grad_norm": 2.6572744846343994, "learning_rate": 1.5926732942938854e-05, "loss": 4.1089, "step": 25520 }, { "epoch": 0.6858793497245734, "grad_norm": 2.384441614151001, "learning_rate": 1.5919923736892277e-05, "loss": 3.9885, "step": 25525 }, { "epoch": 0.6860137041515518, "grad_norm": 2.392374038696289, "learning_rate": 1.5913114530845706e-05, "loss": 4.0874, "step": 25530 }, { "epoch": 0.6861480585785301, "grad_norm": 2.5323967933654785, "learning_rate": 1.5906305324799128e-05, "loss": 4.1106, "step": 25535 }, { "epoch": 0.6862824130055085, "grad_norm": 2.487945318222046, "learning_rate": 1.5899496118752554e-05, "loss": 4.0933, "step": 25540 }, { "epoch": 0.6864167674324869, "grad_norm": 2.9004316329956055, "learning_rate": 1.589268691270598e-05, "loss": 4.1185, "step": 25545 }, { "epoch": 0.6865511218594653, "grad_norm": 2.3116226196289062, "learning_rate": 1.5885877706659405e-05, "loss": 4.1599, "step": 25550 }, { "epoch": 0.6866854762864436, "grad_norm": 2.4261958599090576, "learning_rate": 1.5879068500612827e-05, "loss": 3.9264, "step": 25555 }, { "epoch": 0.686819830713422, "grad_norm": 2.582418918609619, "learning_rate": 1.5872259294566256e-05, "loss": 4.08, "step": 25560 }, { "epoch": 0.6869541851404004, "grad_norm": 2.6855051517486572, "learning_rate": 1.586545008851968e-05, "loss": 4.0962, "step": 25565 }, { "epoch": 0.6870885395673787, "grad_norm": 2.6909902095794678, "learning_rate": 1.5858640882473104e-05, "loss": 4.1101, "step": 25570 }, { "epoch": 0.6872228939943571, "grad_norm": 2.3775010108947754, "learning_rate": 1.585183167642653e-05, "loss": 4.1017, "step": 25575 }, { "epoch": 0.6873572484213355, "grad_norm": 2.6080777645111084, "learning_rate": 1.5845022470379956e-05, "loss": 4.2074, "step": 25580 }, { "epoch": 0.6874916028483139, "grad_norm": 2.846959114074707, "learning_rate": 1.5838213264333378e-05, "loss": 4.0835, "step": 25585 }, { "epoch": 0.6876259572752922, "grad_norm": 2.5025112628936768, "learning_rate": 1.5831404058286807e-05, "loss": 4.1205, "step": 25590 }, { "epoch": 0.6877603117022706, "grad_norm": 2.6334714889526367, "learning_rate": 1.582459485224023e-05, "loss": 4.2026, "step": 25595 }, { "epoch": 0.687894666129249, "grad_norm": 2.3208086490631104, "learning_rate": 1.5817785646193655e-05, "loss": 4.1398, "step": 25600 }, { "epoch": 0.6880290205562273, "grad_norm": 2.4433276653289795, "learning_rate": 1.581097644014708e-05, "loss": 4.0472, "step": 25605 }, { "epoch": 0.6881633749832057, "grad_norm": 2.306164503097534, "learning_rate": 1.5804167234100506e-05, "loss": 4.065, "step": 25610 }, { "epoch": 0.6882977294101841, "grad_norm": 2.7130324840545654, "learning_rate": 1.579735802805393e-05, "loss": 4.2022, "step": 25615 }, { "epoch": 0.6884320838371625, "grad_norm": 2.398503303527832, "learning_rate": 1.5790548822007357e-05, "loss": 4.102, "step": 25620 }, { "epoch": 0.6885664382641408, "grad_norm": 2.4207873344421387, "learning_rate": 1.578373961596078e-05, "loss": 4.0968, "step": 25625 }, { "epoch": 0.6887007926911192, "grad_norm": 2.4732120037078857, "learning_rate": 1.5776930409914202e-05, "loss": 4.1201, "step": 25630 }, { "epoch": 0.6888351471180976, "grad_norm": 2.6079118251800537, "learning_rate": 1.577012120386763e-05, "loss": 4.1456, "step": 25635 }, { "epoch": 0.6889695015450759, "grad_norm": 2.6444857120513916, "learning_rate": 1.5763311997821053e-05, "loss": 4.1871, "step": 25640 }, { "epoch": 0.6891038559720543, "grad_norm": 2.657609224319458, "learning_rate": 1.575650279177448e-05, "loss": 3.9731, "step": 25645 }, { "epoch": 0.6892382103990327, "grad_norm": 2.6098034381866455, "learning_rate": 1.5749693585727905e-05, "loss": 4.099, "step": 25650 }, { "epoch": 0.6893725648260111, "grad_norm": 2.384411096572876, "learning_rate": 1.574288437968133e-05, "loss": 4.0822, "step": 25655 }, { "epoch": 0.6895069192529893, "grad_norm": 2.4045567512512207, "learning_rate": 1.5736075173634753e-05, "loss": 3.9606, "step": 25660 }, { "epoch": 0.6896412736799677, "grad_norm": 2.6607918739318848, "learning_rate": 1.572926596758818e-05, "loss": 4.061, "step": 25665 }, { "epoch": 0.6897756281069461, "grad_norm": 2.9390389919281006, "learning_rate": 1.5722456761541604e-05, "loss": 4.0317, "step": 25670 }, { "epoch": 0.6899099825339244, "grad_norm": 2.4175853729248047, "learning_rate": 1.571564755549503e-05, "loss": 3.949, "step": 25675 }, { "epoch": 0.6900443369609028, "grad_norm": 2.511821746826172, "learning_rate": 1.5708838349448455e-05, "loss": 4.0159, "step": 25680 }, { "epoch": 0.6901786913878812, "grad_norm": 2.6313982009887695, "learning_rate": 1.570202914340188e-05, "loss": 3.9743, "step": 25685 }, { "epoch": 0.6903130458148596, "grad_norm": 2.465038537979126, "learning_rate": 1.5695219937355303e-05, "loss": 4.0921, "step": 25690 }, { "epoch": 0.6904474002418379, "grad_norm": 2.6531152725219727, "learning_rate": 1.5688410731308732e-05, "loss": 3.9834, "step": 25695 }, { "epoch": 0.6905817546688163, "grad_norm": 2.4204304218292236, "learning_rate": 1.5681601525262154e-05, "loss": 4.1214, "step": 25700 }, { "epoch": 0.6907161090957947, "grad_norm": 2.6551647186279297, "learning_rate": 1.567479231921558e-05, "loss": 4.1632, "step": 25705 }, { "epoch": 0.6908504635227731, "grad_norm": 2.617694139480591, "learning_rate": 1.5667983113169006e-05, "loss": 4.0901, "step": 25710 }, { "epoch": 0.6909848179497514, "grad_norm": 2.4258627891540527, "learning_rate": 1.566117390712243e-05, "loss": 4.1443, "step": 25715 }, { "epoch": 0.6911191723767298, "grad_norm": 2.5481674671173096, "learning_rate": 1.5654364701075854e-05, "loss": 4.0165, "step": 25720 }, { "epoch": 0.6912535268037082, "grad_norm": 2.423997640609741, "learning_rate": 1.5647555495029283e-05, "loss": 3.9961, "step": 25725 }, { "epoch": 0.6913878812306865, "grad_norm": 2.5215060710906982, "learning_rate": 1.5640746288982705e-05, "loss": 4.18, "step": 25730 }, { "epoch": 0.6915222356576649, "grad_norm": 2.3623666763305664, "learning_rate": 1.563393708293613e-05, "loss": 3.9764, "step": 25735 }, { "epoch": 0.6916565900846433, "grad_norm": 2.2654683589935303, "learning_rate": 1.5627127876889556e-05, "loss": 4.1225, "step": 25740 }, { "epoch": 0.6917909445116217, "grad_norm": 2.313547372817993, "learning_rate": 1.5620318670842982e-05, "loss": 4.0553, "step": 25745 }, { "epoch": 0.6919252989386, "grad_norm": 2.5809359550476074, "learning_rate": 1.5613509464796404e-05, "loss": 4.156, "step": 25750 }, { "epoch": 0.6920596533655784, "grad_norm": 2.3951072692871094, "learning_rate": 1.5606700258749833e-05, "loss": 4.035, "step": 25755 }, { "epoch": 0.6921940077925568, "grad_norm": 2.5846664905548096, "learning_rate": 1.5599891052703256e-05, "loss": 4.1759, "step": 25760 }, { "epoch": 0.6923283622195351, "grad_norm": 2.52017879486084, "learning_rate": 1.559308184665668e-05, "loss": 4.0831, "step": 25765 }, { "epoch": 0.6924627166465135, "grad_norm": 2.46498441696167, "learning_rate": 1.5586272640610107e-05, "loss": 4.1885, "step": 25770 }, { "epoch": 0.6925970710734919, "grad_norm": 2.6984992027282715, "learning_rate": 1.557946343456353e-05, "loss": 4.0509, "step": 25775 }, { "epoch": 0.6927314255004703, "grad_norm": 2.6313719749450684, "learning_rate": 1.5572654228516955e-05, "loss": 3.9799, "step": 25780 }, { "epoch": 0.6928657799274486, "grad_norm": 2.61991024017334, "learning_rate": 1.556584502247038e-05, "loss": 4.0061, "step": 25785 }, { "epoch": 0.693000134354427, "grad_norm": 2.458453893661499, "learning_rate": 1.5559035816423806e-05, "loss": 4.064, "step": 25790 }, { "epoch": 0.6931344887814054, "grad_norm": 2.4678404331207275, "learning_rate": 1.555222661037723e-05, "loss": 4.1361, "step": 25795 }, { "epoch": 0.6932688432083837, "grad_norm": 2.52002215385437, "learning_rate": 1.5545417404330657e-05, "loss": 4.1595, "step": 25800 }, { "epoch": 0.6934031976353621, "grad_norm": 2.4061975479125977, "learning_rate": 1.553860819828408e-05, "loss": 4.1731, "step": 25805 }, { "epoch": 0.6935375520623405, "grad_norm": 2.614473342895508, "learning_rate": 1.5531798992237505e-05, "loss": 4.0842, "step": 25810 }, { "epoch": 0.6936719064893189, "grad_norm": 2.4207589626312256, "learning_rate": 1.552498978619093e-05, "loss": 4.028, "step": 25815 }, { "epoch": 0.6938062609162972, "grad_norm": 2.4167206287384033, "learning_rate": 1.5518180580144357e-05, "loss": 4.0356, "step": 25820 }, { "epoch": 0.6939406153432756, "grad_norm": 2.510004997253418, "learning_rate": 1.551137137409778e-05, "loss": 4.0186, "step": 25825 }, { "epoch": 0.694074969770254, "grad_norm": 2.481091022491455, "learning_rate": 1.5504562168051208e-05, "loss": 4.1985, "step": 25830 }, { "epoch": 0.6942093241972322, "grad_norm": 2.5186636447906494, "learning_rate": 1.549775296200463e-05, "loss": 4.0809, "step": 25835 }, { "epoch": 0.6943436786242106, "grad_norm": 2.8444268703460693, "learning_rate": 1.5490943755958056e-05, "loss": 4.0639, "step": 25840 }, { "epoch": 0.694478033051189, "grad_norm": 2.498209238052368, "learning_rate": 1.548413454991148e-05, "loss": 4.1191, "step": 25845 }, { "epoch": 0.6946123874781674, "grad_norm": 2.4159672260284424, "learning_rate": 1.5477325343864907e-05, "loss": 4.1708, "step": 25850 }, { "epoch": 0.6947467419051457, "grad_norm": 2.6812493801116943, "learning_rate": 1.547051613781833e-05, "loss": 3.9502, "step": 25855 }, { "epoch": 0.6948810963321241, "grad_norm": 2.5849053859710693, "learning_rate": 1.546370693177176e-05, "loss": 4.1407, "step": 25860 }, { "epoch": 0.6950154507591025, "grad_norm": 2.4570093154907227, "learning_rate": 1.545689772572518e-05, "loss": 4.2139, "step": 25865 }, { "epoch": 0.6951498051860808, "grad_norm": 2.679844856262207, "learning_rate": 1.5450088519678606e-05, "loss": 4.1888, "step": 25870 }, { "epoch": 0.6952841596130592, "grad_norm": 2.3229005336761475, "learning_rate": 1.5443279313632032e-05, "loss": 4.0711, "step": 25875 }, { "epoch": 0.6954185140400376, "grad_norm": 2.542799949645996, "learning_rate": 1.5436470107585458e-05, "loss": 4.0377, "step": 25880 }, { "epoch": 0.695552868467016, "grad_norm": 2.6928157806396484, "learning_rate": 1.542966090153888e-05, "loss": 3.9875, "step": 25885 }, { "epoch": 0.6956872228939943, "grad_norm": 2.333193063735962, "learning_rate": 1.542285169549231e-05, "loss": 4.0253, "step": 25890 }, { "epoch": 0.6958215773209727, "grad_norm": 2.515695571899414, "learning_rate": 1.541604248944573e-05, "loss": 3.9663, "step": 25895 }, { "epoch": 0.6959559317479511, "grad_norm": 2.560884952545166, "learning_rate": 1.5409233283399157e-05, "loss": 4.1259, "step": 25900 }, { "epoch": 0.6960902861749295, "grad_norm": 2.3469066619873047, "learning_rate": 1.5402424077352583e-05, "loss": 4.0477, "step": 25905 }, { "epoch": 0.6962246406019078, "grad_norm": 2.4805779457092285, "learning_rate": 1.539561487130601e-05, "loss": 4.1603, "step": 25910 }, { "epoch": 0.6963589950288862, "grad_norm": 2.5253777503967285, "learning_rate": 1.538880566525943e-05, "loss": 4.1476, "step": 25915 }, { "epoch": 0.6964933494558646, "grad_norm": 2.321458101272583, "learning_rate": 1.5381996459212856e-05, "loss": 3.9253, "step": 25920 }, { "epoch": 0.6966277038828429, "grad_norm": 2.587799549102783, "learning_rate": 1.5375187253166282e-05, "loss": 4.2668, "step": 25925 }, { "epoch": 0.6967620583098213, "grad_norm": 2.582622528076172, "learning_rate": 1.5368378047119704e-05, "loss": 3.9654, "step": 25930 }, { "epoch": 0.6968964127367997, "grad_norm": 2.7423887252807617, "learning_rate": 1.5361568841073133e-05, "loss": 4.1667, "step": 25935 }, { "epoch": 0.6970307671637781, "grad_norm": 2.495927572250366, "learning_rate": 1.5354759635026555e-05, "loss": 4.0584, "step": 25940 }, { "epoch": 0.6971651215907564, "grad_norm": 2.783794403076172, "learning_rate": 1.534795042897998e-05, "loss": 3.925, "step": 25945 }, { "epoch": 0.6972994760177348, "grad_norm": 2.5604984760284424, "learning_rate": 1.5341141222933407e-05, "loss": 4.0635, "step": 25950 }, { "epoch": 0.6974338304447132, "grad_norm": 2.4534244537353516, "learning_rate": 1.5334332016886832e-05, "loss": 4.1049, "step": 25955 }, { "epoch": 0.6975681848716915, "grad_norm": 2.4351933002471924, "learning_rate": 1.5327522810840255e-05, "loss": 4.1289, "step": 25960 }, { "epoch": 0.6977025392986699, "grad_norm": 2.7799644470214844, "learning_rate": 1.5320713604793684e-05, "loss": 4.0219, "step": 25965 }, { "epoch": 0.6978368937256483, "grad_norm": 2.5682930946350098, "learning_rate": 1.5313904398747106e-05, "loss": 4.0897, "step": 25970 }, { "epoch": 0.6979712481526267, "grad_norm": 2.560134172439575, "learning_rate": 1.530709519270053e-05, "loss": 4.0724, "step": 25975 }, { "epoch": 0.698105602579605, "grad_norm": 2.387601613998413, "learning_rate": 1.5300285986653957e-05, "loss": 4.0859, "step": 25980 }, { "epoch": 0.6982399570065834, "grad_norm": 2.8333263397216797, "learning_rate": 1.5293476780607383e-05, "loss": 4.1522, "step": 25985 }, { "epoch": 0.6983743114335618, "grad_norm": 2.4672279357910156, "learning_rate": 1.5286667574560805e-05, "loss": 4.0748, "step": 25990 }, { "epoch": 0.6985086658605401, "grad_norm": 2.6123502254486084, "learning_rate": 1.5279858368514234e-05, "loss": 3.9964, "step": 25995 }, { "epoch": 0.6986430202875185, "grad_norm": 2.3748061656951904, "learning_rate": 1.5273049162467657e-05, "loss": 4.2095, "step": 26000 }, { "epoch": 0.6987773747144969, "grad_norm": 2.618396759033203, "learning_rate": 1.5266239956421082e-05, "loss": 3.9188, "step": 26005 }, { "epoch": 0.6989117291414753, "grad_norm": 2.712648868560791, "learning_rate": 1.5259430750374508e-05, "loss": 4.0466, "step": 26010 }, { "epoch": 0.6990460835684535, "grad_norm": 2.7643544673919678, "learning_rate": 1.5252621544327934e-05, "loss": 4.2019, "step": 26015 }, { "epoch": 0.699180437995432, "grad_norm": 2.5711889266967773, "learning_rate": 1.5245812338281358e-05, "loss": 4.0279, "step": 26020 }, { "epoch": 0.6993147924224103, "grad_norm": 2.510981321334839, "learning_rate": 1.5239003132234783e-05, "loss": 4.1401, "step": 26025 }, { "epoch": 0.6994491468493886, "grad_norm": 2.73475980758667, "learning_rate": 1.5232193926188207e-05, "loss": 4.0333, "step": 26030 }, { "epoch": 0.699583501276367, "grad_norm": 2.507845878601074, "learning_rate": 1.5225384720141633e-05, "loss": 4.0343, "step": 26035 }, { "epoch": 0.6997178557033454, "grad_norm": 2.4098801612854004, "learning_rate": 1.5218575514095057e-05, "loss": 4.0579, "step": 26040 }, { "epoch": 0.6998522101303238, "grad_norm": 2.5739855766296387, "learning_rate": 1.5211766308048484e-05, "loss": 4.0744, "step": 26045 }, { "epoch": 0.6999865645573021, "grad_norm": 2.4235591888427734, "learning_rate": 1.5204957102001906e-05, "loss": 4.2313, "step": 26050 }, { "epoch": 0.7001209189842805, "grad_norm": 2.592965602874756, "learning_rate": 1.5198147895955334e-05, "loss": 4.0123, "step": 26055 }, { "epoch": 0.7002552734112589, "grad_norm": 2.484647274017334, "learning_rate": 1.5191338689908758e-05, "loss": 4.1326, "step": 26060 }, { "epoch": 0.7003896278382372, "grad_norm": 2.39859676361084, "learning_rate": 1.5184529483862183e-05, "loss": 4.0571, "step": 26065 }, { "epoch": 0.7005239822652156, "grad_norm": 2.7295615673065186, "learning_rate": 1.5177720277815607e-05, "loss": 4.0313, "step": 26070 }, { "epoch": 0.700658336692194, "grad_norm": 2.836071252822876, "learning_rate": 1.5170911071769031e-05, "loss": 4.1449, "step": 26075 }, { "epoch": 0.7007926911191724, "grad_norm": 2.424800395965576, "learning_rate": 1.5164101865722457e-05, "loss": 4.1189, "step": 26080 }, { "epoch": 0.7009270455461507, "grad_norm": 2.645490884780884, "learning_rate": 1.5157292659675881e-05, "loss": 4.1088, "step": 26085 }, { "epoch": 0.7010613999731291, "grad_norm": 2.3608758449554443, "learning_rate": 1.5150483453629308e-05, "loss": 4.1022, "step": 26090 }, { "epoch": 0.7011957544001075, "grad_norm": 2.830937623977661, "learning_rate": 1.5143674247582732e-05, "loss": 4.1119, "step": 26095 }, { "epoch": 0.7013301088270859, "grad_norm": 2.8303327560424805, "learning_rate": 1.5136865041536158e-05, "loss": 4.1447, "step": 26100 }, { "epoch": 0.7014644632540642, "grad_norm": 2.8125314712524414, "learning_rate": 1.5130055835489582e-05, "loss": 4.0966, "step": 26105 }, { "epoch": 0.7015988176810426, "grad_norm": 2.4406838417053223, "learning_rate": 1.5123246629443008e-05, "loss": 3.9993, "step": 26110 }, { "epoch": 0.701733172108021, "grad_norm": 2.3273072242736816, "learning_rate": 1.5116437423396431e-05, "loss": 4.1481, "step": 26115 }, { "epoch": 0.7018675265349993, "grad_norm": 2.6003124713897705, "learning_rate": 1.5109628217349859e-05, "loss": 3.9864, "step": 26120 }, { "epoch": 0.7020018809619777, "grad_norm": 2.5560860633850098, "learning_rate": 1.5102819011303281e-05, "loss": 4.0363, "step": 26125 }, { "epoch": 0.7021362353889561, "grad_norm": 2.6664600372314453, "learning_rate": 1.5096009805256708e-05, "loss": 3.9703, "step": 26130 }, { "epoch": 0.7022705898159345, "grad_norm": 2.293596029281616, "learning_rate": 1.5089200599210132e-05, "loss": 4.125, "step": 26135 }, { "epoch": 0.7024049442429128, "grad_norm": 2.6780011653900146, "learning_rate": 1.5082391393163558e-05, "loss": 4.0173, "step": 26140 }, { "epoch": 0.7025392986698912, "grad_norm": 2.6006250381469727, "learning_rate": 1.5075582187116982e-05, "loss": 4.102, "step": 26145 }, { "epoch": 0.7026736530968696, "grad_norm": 2.3827452659606934, "learning_rate": 1.506877298107041e-05, "loss": 4.016, "step": 26150 }, { "epoch": 0.7028080075238479, "grad_norm": 2.819798469543457, "learning_rate": 1.5061963775023832e-05, "loss": 4.0897, "step": 26155 }, { "epoch": 0.7029423619508263, "grad_norm": 2.3865647315979004, "learning_rate": 1.5055154568977259e-05, "loss": 4.2287, "step": 26160 }, { "epoch": 0.7030767163778047, "grad_norm": 2.5091264247894287, "learning_rate": 1.5048345362930683e-05, "loss": 4.0695, "step": 26165 }, { "epoch": 0.7032110708047831, "grad_norm": 2.2062034606933594, "learning_rate": 1.5041536156884109e-05, "loss": 4.0841, "step": 26170 }, { "epoch": 0.7033454252317614, "grad_norm": 2.5209779739379883, "learning_rate": 1.5034726950837533e-05, "loss": 4.0818, "step": 26175 }, { "epoch": 0.7034797796587398, "grad_norm": 2.980149030685425, "learning_rate": 1.502791774479096e-05, "loss": 4.1858, "step": 26180 }, { "epoch": 0.7036141340857182, "grad_norm": 2.374828338623047, "learning_rate": 1.5021108538744382e-05, "loss": 3.852, "step": 26185 }, { "epoch": 0.7037484885126964, "grad_norm": 2.4109325408935547, "learning_rate": 1.501429933269781e-05, "loss": 4.0448, "step": 26190 }, { "epoch": 0.7038828429396748, "grad_norm": 2.5528323650360107, "learning_rate": 1.5007490126651234e-05, "loss": 4.0493, "step": 26195 }, { "epoch": 0.7040171973666532, "grad_norm": 2.489856243133545, "learning_rate": 1.500068092060466e-05, "loss": 4.1052, "step": 26200 }, { "epoch": 0.7041515517936316, "grad_norm": 2.6666648387908936, "learning_rate": 1.4993871714558083e-05, "loss": 3.9554, "step": 26205 }, { "epoch": 0.7042859062206099, "grad_norm": 2.6465401649475098, "learning_rate": 1.4987062508511509e-05, "loss": 4.1362, "step": 26210 }, { "epoch": 0.7044202606475883, "grad_norm": 2.885828971862793, "learning_rate": 1.4980253302464933e-05, "loss": 4.0758, "step": 26215 }, { "epoch": 0.7045546150745667, "grad_norm": 2.9625277519226074, "learning_rate": 1.4973444096418357e-05, "loss": 4.1831, "step": 26220 }, { "epoch": 0.704688969501545, "grad_norm": 2.434601068496704, "learning_rate": 1.4966634890371784e-05, "loss": 4.0336, "step": 26225 }, { "epoch": 0.7048233239285234, "grad_norm": 2.815753221511841, "learning_rate": 1.4959825684325206e-05, "loss": 4.0622, "step": 26230 }, { "epoch": 0.7049576783555018, "grad_norm": 2.665599822998047, "learning_rate": 1.4953016478278634e-05, "loss": 4.0688, "step": 26235 }, { "epoch": 0.7050920327824802, "grad_norm": 2.629580497741699, "learning_rate": 1.4946207272232058e-05, "loss": 4.0754, "step": 26240 }, { "epoch": 0.7052263872094585, "grad_norm": 2.5625669956207275, "learning_rate": 1.4939398066185483e-05, "loss": 4.1611, "step": 26245 }, { "epoch": 0.7053607416364369, "grad_norm": 2.6608223915100098, "learning_rate": 1.4932588860138907e-05, "loss": 4.095, "step": 26250 }, { "epoch": 0.7054950960634153, "grad_norm": 2.640458106994629, "learning_rate": 1.4925779654092335e-05, "loss": 4.1411, "step": 26255 }, { "epoch": 0.7056294504903936, "grad_norm": 2.2115797996520996, "learning_rate": 1.4918970448045757e-05, "loss": 4.1492, "step": 26260 }, { "epoch": 0.705763804917372, "grad_norm": 2.2571756839752197, "learning_rate": 1.4912161241999184e-05, "loss": 4.1457, "step": 26265 }, { "epoch": 0.7058981593443504, "grad_norm": 2.726890802383423, "learning_rate": 1.4905352035952608e-05, "loss": 4.1387, "step": 26270 }, { "epoch": 0.7060325137713288, "grad_norm": 2.4840593338012695, "learning_rate": 1.4898542829906034e-05, "loss": 4.1677, "step": 26275 }, { "epoch": 0.7061668681983071, "grad_norm": 2.315808057785034, "learning_rate": 1.4891733623859458e-05, "loss": 4.0852, "step": 26280 }, { "epoch": 0.7063012226252855, "grad_norm": 2.670393228530884, "learning_rate": 1.4884924417812885e-05, "loss": 4.0576, "step": 26285 }, { "epoch": 0.7064355770522639, "grad_norm": 2.3993144035339355, "learning_rate": 1.4878115211766307e-05, "loss": 4.0056, "step": 26290 }, { "epoch": 0.7065699314792422, "grad_norm": 2.2575385570526123, "learning_rate": 1.4871306005719735e-05, "loss": 4.0221, "step": 26295 }, { "epoch": 0.7067042859062206, "grad_norm": 2.685051202774048, "learning_rate": 1.4864496799673159e-05, "loss": 4.1057, "step": 26300 }, { "epoch": 0.706838640333199, "grad_norm": 2.6557161808013916, "learning_rate": 1.4857687593626584e-05, "loss": 4.0754, "step": 26305 }, { "epoch": 0.7069729947601774, "grad_norm": 2.5684964656829834, "learning_rate": 1.4850878387580008e-05, "loss": 3.9826, "step": 26310 }, { "epoch": 0.7071073491871557, "grad_norm": 2.6620099544525146, "learning_rate": 1.4844069181533434e-05, "loss": 4.0753, "step": 26315 }, { "epoch": 0.7072417036141341, "grad_norm": 2.7526049613952637, "learning_rate": 1.4837259975486858e-05, "loss": 3.9656, "step": 26320 }, { "epoch": 0.7073760580411125, "grad_norm": 2.5699222087860107, "learning_rate": 1.4830450769440285e-05, "loss": 4.0007, "step": 26325 }, { "epoch": 0.7075104124680909, "grad_norm": 2.7119340896606445, "learning_rate": 1.482364156339371e-05, "loss": 4.138, "step": 26330 }, { "epoch": 0.7076447668950692, "grad_norm": 2.333024024963379, "learning_rate": 1.4816832357347135e-05, "loss": 4.0829, "step": 26335 }, { "epoch": 0.7077791213220476, "grad_norm": 2.4139792919158936, "learning_rate": 1.4810023151300559e-05, "loss": 3.9464, "step": 26340 }, { "epoch": 0.707913475749026, "grad_norm": 2.683587074279785, "learning_rate": 1.4803213945253985e-05, "loss": 3.9938, "step": 26345 }, { "epoch": 0.7080478301760043, "grad_norm": 2.499746799468994, "learning_rate": 1.4796404739207409e-05, "loss": 4.1731, "step": 26350 }, { "epoch": 0.7081821846029827, "grad_norm": 2.4002585411071777, "learning_rate": 1.4789595533160836e-05, "loss": 4.0827, "step": 26355 }, { "epoch": 0.7083165390299611, "grad_norm": 2.405299186706543, "learning_rate": 1.478278632711426e-05, "loss": 4.0191, "step": 26360 }, { "epoch": 0.7084508934569395, "grad_norm": 2.2736575603485107, "learning_rate": 1.4775977121067682e-05, "loss": 3.9334, "step": 26365 }, { "epoch": 0.7085852478839177, "grad_norm": 2.3930933475494385, "learning_rate": 1.476916791502111e-05, "loss": 4.1344, "step": 26370 }, { "epoch": 0.7087196023108961, "grad_norm": 2.4402098655700684, "learning_rate": 1.4762358708974533e-05, "loss": 4.0797, "step": 26375 }, { "epoch": 0.7088539567378745, "grad_norm": 2.5580220222473145, "learning_rate": 1.4755549502927959e-05, "loss": 4.0266, "step": 26380 }, { "epoch": 0.7089883111648528, "grad_norm": 2.6297922134399414, "learning_rate": 1.4748740296881383e-05, "loss": 4.1007, "step": 26385 }, { "epoch": 0.7091226655918312, "grad_norm": 2.739976167678833, "learning_rate": 1.474193109083481e-05, "loss": 4.0292, "step": 26390 }, { "epoch": 0.7092570200188096, "grad_norm": 2.9241387844085693, "learning_rate": 1.4735121884788233e-05, "loss": 4.0865, "step": 26395 }, { "epoch": 0.709391374445788, "grad_norm": 2.477668285369873, "learning_rate": 1.472831267874166e-05, "loss": 4.0826, "step": 26400 }, { "epoch": 0.7095257288727663, "grad_norm": 2.4050114154815674, "learning_rate": 1.4721503472695084e-05, "loss": 4.2485, "step": 26405 }, { "epoch": 0.7096600832997447, "grad_norm": 2.5341122150421143, "learning_rate": 1.471469426664851e-05, "loss": 4.0752, "step": 26410 }, { "epoch": 0.7097944377267231, "grad_norm": 2.33636474609375, "learning_rate": 1.4707885060601934e-05, "loss": 4.0872, "step": 26415 }, { "epoch": 0.7099287921537014, "grad_norm": 2.5028128623962402, "learning_rate": 1.470107585455536e-05, "loss": 4.1374, "step": 26420 }, { "epoch": 0.7100631465806798, "grad_norm": 2.4000141620635986, "learning_rate": 1.4694266648508783e-05, "loss": 4.1175, "step": 26425 }, { "epoch": 0.7101975010076582, "grad_norm": 2.6969828605651855, "learning_rate": 1.468745744246221e-05, "loss": 4.0836, "step": 26430 }, { "epoch": 0.7103318554346366, "grad_norm": 2.5366203784942627, "learning_rate": 1.4680648236415635e-05, "loss": 4.1898, "step": 26435 }, { "epoch": 0.7104662098616149, "grad_norm": 2.7102112770080566, "learning_rate": 1.467383903036906e-05, "loss": 4.092, "step": 26440 }, { "epoch": 0.7106005642885933, "grad_norm": 2.5550217628479004, "learning_rate": 1.4667029824322484e-05, "loss": 4.1527, "step": 26445 }, { "epoch": 0.7107349187155717, "grad_norm": 2.520310640335083, "learning_rate": 1.466022061827591e-05, "loss": 4.0852, "step": 26450 }, { "epoch": 0.71086927314255, "grad_norm": 2.46159291267395, "learning_rate": 1.4653411412229334e-05, "loss": 4.0588, "step": 26455 }, { "epoch": 0.7110036275695284, "grad_norm": 2.423255205154419, "learning_rate": 1.4646602206182761e-05, "loss": 4.0273, "step": 26460 }, { "epoch": 0.7111379819965068, "grad_norm": 2.449784755706787, "learning_rate": 1.4639793000136185e-05, "loss": 4.0283, "step": 26465 }, { "epoch": 0.7112723364234852, "grad_norm": 2.699544906616211, "learning_rate": 1.463298379408961e-05, "loss": 4.0476, "step": 26470 }, { "epoch": 0.7114066908504635, "grad_norm": 2.4506659507751465, "learning_rate": 1.4626174588043035e-05, "loss": 4.0428, "step": 26475 }, { "epoch": 0.7115410452774419, "grad_norm": 2.6937453746795654, "learning_rate": 1.461936538199646e-05, "loss": 3.9574, "step": 26480 }, { "epoch": 0.7116753997044203, "grad_norm": 2.4880151748657227, "learning_rate": 1.4612556175949884e-05, "loss": 4.111, "step": 26485 }, { "epoch": 0.7118097541313986, "grad_norm": 2.4697813987731934, "learning_rate": 1.4605746969903312e-05, "loss": 4.0468, "step": 26490 }, { "epoch": 0.711944108558377, "grad_norm": 2.5098752975463867, "learning_rate": 1.4598937763856736e-05, "loss": 3.977, "step": 26495 }, { "epoch": 0.7120784629853554, "grad_norm": 2.6967036724090576, "learning_rate": 1.4592128557810161e-05, "loss": 4.1669, "step": 26500 }, { "epoch": 0.7122128174123338, "grad_norm": 2.4671475887298584, "learning_rate": 1.4585319351763585e-05, "loss": 4.051, "step": 26505 }, { "epoch": 0.7123471718393121, "grad_norm": 2.224522352218628, "learning_rate": 1.4578510145717011e-05, "loss": 4.1308, "step": 26510 }, { "epoch": 0.7124815262662905, "grad_norm": 2.4402294158935547, "learning_rate": 1.4571700939670435e-05, "loss": 4.0558, "step": 26515 }, { "epoch": 0.7126158806932689, "grad_norm": 2.3905768394470215, "learning_rate": 1.4564891733623859e-05, "loss": 4.1375, "step": 26520 }, { "epoch": 0.7127502351202473, "grad_norm": 2.386465311050415, "learning_rate": 1.4558082527577285e-05, "loss": 4.1022, "step": 26525 }, { "epoch": 0.7128845895472256, "grad_norm": 2.5660958290100098, "learning_rate": 1.4551273321530709e-05, "loss": 4.0988, "step": 26530 }, { "epoch": 0.713018943974204, "grad_norm": 2.4855527877807617, "learning_rate": 1.4544464115484136e-05, "loss": 4.0624, "step": 26535 }, { "epoch": 0.7131532984011824, "grad_norm": 2.485211133956909, "learning_rate": 1.453765490943756e-05, "loss": 4.1359, "step": 26540 }, { "epoch": 0.7132876528281606, "grad_norm": 2.645197868347168, "learning_rate": 1.4530845703390985e-05, "loss": 4.0461, "step": 26545 }, { "epoch": 0.713422007255139, "grad_norm": 2.536564826965332, "learning_rate": 1.452403649734441e-05, "loss": 4.0964, "step": 26550 }, { "epoch": 0.7135563616821174, "grad_norm": 2.5270562171936035, "learning_rate": 1.4517227291297835e-05, "loss": 4.0555, "step": 26555 }, { "epoch": 0.7136907161090958, "grad_norm": 2.8785104751586914, "learning_rate": 1.4510418085251259e-05, "loss": 4.0692, "step": 26560 }, { "epoch": 0.7138250705360741, "grad_norm": 2.7163147926330566, "learning_rate": 1.4503608879204686e-05, "loss": 4.0407, "step": 26565 }, { "epoch": 0.7139594249630525, "grad_norm": 2.8238093852996826, "learning_rate": 1.449679967315811e-05, "loss": 4.1241, "step": 26570 }, { "epoch": 0.7140937793900309, "grad_norm": 2.3929879665374756, "learning_rate": 1.4489990467111536e-05, "loss": 3.9612, "step": 26575 }, { "epoch": 0.7142281338170092, "grad_norm": 2.4446349143981934, "learning_rate": 1.448318126106496e-05, "loss": 3.9631, "step": 26580 }, { "epoch": 0.7143624882439876, "grad_norm": 2.558551073074341, "learning_rate": 1.4476372055018386e-05, "loss": 4.0756, "step": 26585 }, { "epoch": 0.714496842670966, "grad_norm": 2.3390660285949707, "learning_rate": 1.446956284897181e-05, "loss": 4.0166, "step": 26590 }, { "epoch": 0.7146311970979444, "grad_norm": 2.4889774322509766, "learning_rate": 1.4462753642925237e-05, "loss": 4.1378, "step": 26595 }, { "epoch": 0.7147655515249227, "grad_norm": 2.6086084842681885, "learning_rate": 1.445594443687866e-05, "loss": 4.0735, "step": 26600 }, { "epoch": 0.7148999059519011, "grad_norm": 2.5231826305389404, "learning_rate": 1.4449135230832087e-05, "loss": 3.9422, "step": 26605 }, { "epoch": 0.7150342603788795, "grad_norm": 2.4458184242248535, "learning_rate": 1.444232602478551e-05, "loss": 4.1077, "step": 26610 }, { "epoch": 0.7151686148058578, "grad_norm": 2.3456358909606934, "learning_rate": 1.4435516818738936e-05, "loss": 4.1583, "step": 26615 }, { "epoch": 0.7153029692328362, "grad_norm": 2.6229805946350098, "learning_rate": 1.442870761269236e-05, "loss": 3.9935, "step": 26620 }, { "epoch": 0.7154373236598146, "grad_norm": 2.239637613296509, "learning_rate": 1.4421898406645788e-05, "loss": 3.9939, "step": 26625 }, { "epoch": 0.715571678086793, "grad_norm": 2.418419122695923, "learning_rate": 1.441508920059921e-05, "loss": 4.1198, "step": 26630 }, { "epoch": 0.7157060325137713, "grad_norm": 2.3493337631225586, "learning_rate": 1.4408279994552637e-05, "loss": 4.1135, "step": 26635 }, { "epoch": 0.7158403869407497, "grad_norm": 2.7357707023620605, "learning_rate": 1.4401470788506061e-05, "loss": 4.1177, "step": 26640 }, { "epoch": 0.7159747413677281, "grad_norm": 2.4043729305267334, "learning_rate": 1.4394661582459487e-05, "loss": 4.1447, "step": 26645 }, { "epoch": 0.7161090957947064, "grad_norm": 2.467005491256714, "learning_rate": 1.438785237641291e-05, "loss": 4.0203, "step": 26650 }, { "epoch": 0.7162434502216848, "grad_norm": 2.7044520378112793, "learning_rate": 1.4381043170366338e-05, "loss": 4.0562, "step": 26655 }, { "epoch": 0.7163778046486632, "grad_norm": 2.4532554149627686, "learning_rate": 1.437423396431976e-05, "loss": 4.1097, "step": 26660 }, { "epoch": 0.7165121590756416, "grad_norm": 2.3541104793548584, "learning_rate": 1.4367424758273184e-05, "loss": 3.916, "step": 26665 }, { "epoch": 0.7166465135026199, "grad_norm": 2.4951586723327637, "learning_rate": 1.4360615552226612e-05, "loss": 4.1502, "step": 26670 }, { "epoch": 0.7167808679295983, "grad_norm": 2.4293692111968994, "learning_rate": 1.4353806346180036e-05, "loss": 4.1134, "step": 26675 }, { "epoch": 0.7169152223565767, "grad_norm": 2.527618169784546, "learning_rate": 1.4346997140133461e-05, "loss": 3.9361, "step": 26680 }, { "epoch": 0.717049576783555, "grad_norm": 2.3842506408691406, "learning_rate": 1.4340187934086885e-05, "loss": 4.0462, "step": 26685 }, { "epoch": 0.7171839312105334, "grad_norm": 2.7903149127960205, "learning_rate": 1.4333378728040311e-05, "loss": 4.1503, "step": 26690 }, { "epoch": 0.7173182856375118, "grad_norm": 2.3270418643951416, "learning_rate": 1.4326569521993735e-05, "loss": 3.9648, "step": 26695 }, { "epoch": 0.7174526400644902, "grad_norm": 2.5134284496307373, "learning_rate": 1.4319760315947162e-05, "loss": 4.0265, "step": 26700 }, { "epoch": 0.7175869944914685, "grad_norm": 2.501375675201416, "learning_rate": 1.4312951109900584e-05, "loss": 4.0328, "step": 26705 }, { "epoch": 0.7177213489184469, "grad_norm": 2.3473119735717773, "learning_rate": 1.4306141903854012e-05, "loss": 4.1044, "step": 26710 }, { "epoch": 0.7178557033454253, "grad_norm": 2.488936424255371, "learning_rate": 1.4299332697807436e-05, "loss": 4.1315, "step": 26715 }, { "epoch": 0.7179900577724037, "grad_norm": 2.3840649127960205, "learning_rate": 1.4292523491760861e-05, "loss": 4.0205, "step": 26720 }, { "epoch": 0.718124412199382, "grad_norm": 2.489004135131836, "learning_rate": 1.4285714285714285e-05, "loss": 3.9739, "step": 26725 }, { "epoch": 0.7182587666263603, "grad_norm": 2.424358367919922, "learning_rate": 1.4278905079667713e-05, "loss": 4.0712, "step": 26730 }, { "epoch": 0.7183931210533387, "grad_norm": 2.6073362827301025, "learning_rate": 1.4272095873621135e-05, "loss": 4.1718, "step": 26735 }, { "epoch": 0.718527475480317, "grad_norm": 2.295586585998535, "learning_rate": 1.4265286667574562e-05, "loss": 4.2611, "step": 26740 }, { "epoch": 0.7186618299072954, "grad_norm": 2.448373794555664, "learning_rate": 1.4258477461527986e-05, "loss": 4.1206, "step": 26745 }, { "epoch": 0.7187961843342738, "grad_norm": 2.617144823074341, "learning_rate": 1.4251668255481412e-05, "loss": 4.0857, "step": 26750 }, { "epoch": 0.7189305387612522, "grad_norm": 2.4683449268341064, "learning_rate": 1.4244859049434836e-05, "loss": 4.1375, "step": 26755 }, { "epoch": 0.7190648931882305, "grad_norm": 2.7070183753967285, "learning_rate": 1.4238049843388263e-05, "loss": 4.0269, "step": 26760 }, { "epoch": 0.7191992476152089, "grad_norm": 2.5308358669281006, "learning_rate": 1.4231240637341686e-05, "loss": 3.9775, "step": 26765 }, { "epoch": 0.7193336020421873, "grad_norm": 2.491135597229004, "learning_rate": 1.4224431431295113e-05, "loss": 4.087, "step": 26770 }, { "epoch": 0.7194679564691656, "grad_norm": 2.4731311798095703, "learning_rate": 1.4217622225248537e-05, "loss": 4.1474, "step": 26775 }, { "epoch": 0.719602310896144, "grad_norm": 2.589618444442749, "learning_rate": 1.4210813019201963e-05, "loss": 4.0977, "step": 26780 }, { "epoch": 0.7197366653231224, "grad_norm": 2.7208192348480225, "learning_rate": 1.4204003813155387e-05, "loss": 4.1437, "step": 26785 }, { "epoch": 0.7198710197501008, "grad_norm": 2.7720112800598145, "learning_rate": 1.4197194607108812e-05, "loss": 4.0763, "step": 26790 }, { "epoch": 0.7200053741770791, "grad_norm": 2.3922579288482666, "learning_rate": 1.4190385401062236e-05, "loss": 4.101, "step": 26795 }, { "epoch": 0.7201397286040575, "grad_norm": 2.4025979042053223, "learning_rate": 1.4183576195015664e-05, "loss": 4.0523, "step": 26800 }, { "epoch": 0.7202740830310359, "grad_norm": 2.4548211097717285, "learning_rate": 1.4176766988969087e-05, "loss": 4.055, "step": 26805 }, { "epoch": 0.7204084374580142, "grad_norm": 2.406764030456543, "learning_rate": 1.416995778292251e-05, "loss": 3.9302, "step": 26810 }, { "epoch": 0.7205427918849926, "grad_norm": 2.4489247798919678, "learning_rate": 1.4163148576875937e-05, "loss": 3.9484, "step": 26815 }, { "epoch": 0.720677146311971, "grad_norm": 2.570671796798706, "learning_rate": 1.4156339370829361e-05, "loss": 4.1053, "step": 26820 }, { "epoch": 0.7208115007389494, "grad_norm": 2.5745930671691895, "learning_rate": 1.4149530164782787e-05, "loss": 4.1426, "step": 26825 }, { "epoch": 0.7209458551659277, "grad_norm": 2.4958407878875732, "learning_rate": 1.414272095873621e-05, "loss": 4.0809, "step": 26830 }, { "epoch": 0.7210802095929061, "grad_norm": 2.6374850273132324, "learning_rate": 1.4135911752689638e-05, "loss": 4.1116, "step": 26835 }, { "epoch": 0.7212145640198845, "grad_norm": 2.7088441848754883, "learning_rate": 1.412910254664306e-05, "loss": 4.0093, "step": 26840 }, { "epoch": 0.7213489184468628, "grad_norm": 2.5646162033081055, "learning_rate": 1.4122293340596488e-05, "loss": 4.0006, "step": 26845 }, { "epoch": 0.7214832728738412, "grad_norm": 2.7098500728607178, "learning_rate": 1.4115484134549912e-05, "loss": 3.9412, "step": 26850 }, { "epoch": 0.7216176273008196, "grad_norm": 2.699536085128784, "learning_rate": 1.4108674928503337e-05, "loss": 4.0701, "step": 26855 }, { "epoch": 0.721751981727798, "grad_norm": 2.53094220161438, "learning_rate": 1.4101865722456761e-05, "loss": 4.0556, "step": 26860 }, { "epoch": 0.7218863361547763, "grad_norm": 2.6329643726348877, "learning_rate": 1.4095056516410189e-05, "loss": 3.9674, "step": 26865 }, { "epoch": 0.7220206905817547, "grad_norm": 2.528913736343384, "learning_rate": 1.4088247310363611e-05, "loss": 4.1399, "step": 26870 }, { "epoch": 0.7221550450087331, "grad_norm": 2.6272060871124268, "learning_rate": 1.4081438104317038e-05, "loss": 4.1591, "step": 26875 }, { "epoch": 0.7222893994357114, "grad_norm": 2.2884609699249268, "learning_rate": 1.4074628898270462e-05, "loss": 4.1442, "step": 26880 }, { "epoch": 0.7224237538626898, "grad_norm": 2.301384210586548, "learning_rate": 1.4067819692223888e-05, "loss": 4.1532, "step": 26885 }, { "epoch": 0.7225581082896682, "grad_norm": 2.676708221435547, "learning_rate": 1.4061010486177312e-05, "loss": 4.1725, "step": 26890 }, { "epoch": 0.7226924627166466, "grad_norm": 2.662069797515869, "learning_rate": 1.4054201280130737e-05, "loss": 4.0778, "step": 26895 }, { "epoch": 0.7228268171436248, "grad_norm": 2.5326523780822754, "learning_rate": 1.4047392074084161e-05, "loss": 3.9873, "step": 26900 }, { "epoch": 0.7229611715706032, "grad_norm": 2.6270782947540283, "learning_rate": 1.4040582868037589e-05, "loss": 4.1274, "step": 26905 }, { "epoch": 0.7230955259975816, "grad_norm": 2.7088165283203125, "learning_rate": 1.4033773661991013e-05, "loss": 4.0239, "step": 26910 }, { "epoch": 0.7232298804245599, "grad_norm": 2.6652817726135254, "learning_rate": 1.4026964455944438e-05, "loss": 4.1503, "step": 26915 }, { "epoch": 0.7233642348515383, "grad_norm": 2.4388930797576904, "learning_rate": 1.4020155249897862e-05, "loss": 4.0661, "step": 26920 }, { "epoch": 0.7234985892785167, "grad_norm": 2.6126644611358643, "learning_rate": 1.4013346043851288e-05, "loss": 4.015, "step": 26925 }, { "epoch": 0.7236329437054951, "grad_norm": 2.4859893321990967, "learning_rate": 1.4006536837804712e-05, "loss": 3.9891, "step": 26930 }, { "epoch": 0.7237672981324734, "grad_norm": 2.994967222213745, "learning_rate": 1.399972763175814e-05, "loss": 4.0666, "step": 26935 }, { "epoch": 0.7239016525594518, "grad_norm": 2.969782829284668, "learning_rate": 1.3992918425711563e-05, "loss": 3.9794, "step": 26940 }, { "epoch": 0.7240360069864302, "grad_norm": 2.380441665649414, "learning_rate": 1.3986109219664989e-05, "loss": 4.1918, "step": 26945 }, { "epoch": 0.7241703614134086, "grad_norm": 2.4934520721435547, "learning_rate": 1.3979300013618413e-05, "loss": 4.0658, "step": 26950 }, { "epoch": 0.7243047158403869, "grad_norm": 2.27193546295166, "learning_rate": 1.3972490807571839e-05, "loss": 4.1259, "step": 26955 }, { "epoch": 0.7244390702673653, "grad_norm": 2.8975205421447754, "learning_rate": 1.3965681601525263e-05, "loss": 4.1436, "step": 26960 }, { "epoch": 0.7245734246943437, "grad_norm": 2.549117088317871, "learning_rate": 1.3958872395478686e-05, "loss": 4.0436, "step": 26965 }, { "epoch": 0.724707779121322, "grad_norm": 2.5144097805023193, "learning_rate": 1.3952063189432112e-05, "loss": 4.048, "step": 26970 }, { "epoch": 0.7248421335483004, "grad_norm": 2.318542718887329, "learning_rate": 1.3945253983385536e-05, "loss": 3.9959, "step": 26975 }, { "epoch": 0.7249764879752788, "grad_norm": 2.5673329830169678, "learning_rate": 1.3938444777338963e-05, "loss": 4.1235, "step": 26980 }, { "epoch": 0.7251108424022572, "grad_norm": 2.5433247089385986, "learning_rate": 1.3931635571292387e-05, "loss": 4.1209, "step": 26985 }, { "epoch": 0.7252451968292355, "grad_norm": 2.7180373668670654, "learning_rate": 1.3924826365245813e-05, "loss": 4.015, "step": 26990 }, { "epoch": 0.7253795512562139, "grad_norm": 2.5770130157470703, "learning_rate": 1.3918017159199237e-05, "loss": 4.0642, "step": 26995 }, { "epoch": 0.7255139056831923, "grad_norm": 2.6936371326446533, "learning_rate": 1.3911207953152663e-05, "loss": 4.1594, "step": 27000 }, { "epoch": 0.7256482601101706, "grad_norm": 2.5869860649108887, "learning_rate": 1.3904398747106087e-05, "loss": 4.211, "step": 27005 }, { "epoch": 0.725782614537149, "grad_norm": 2.4290127754211426, "learning_rate": 1.3897589541059514e-05, "loss": 4.0083, "step": 27010 }, { "epoch": 0.7259169689641274, "grad_norm": 2.606985092163086, "learning_rate": 1.3890780335012938e-05, "loss": 4.0914, "step": 27015 }, { "epoch": 0.7260513233911058, "grad_norm": 2.483344316482544, "learning_rate": 1.3883971128966364e-05, "loss": 4.1123, "step": 27020 }, { "epoch": 0.7261856778180841, "grad_norm": 2.521674633026123, "learning_rate": 1.3877161922919788e-05, "loss": 4.0989, "step": 27025 }, { "epoch": 0.7263200322450625, "grad_norm": 2.3827970027923584, "learning_rate": 1.3870352716873213e-05, "loss": 4.0836, "step": 27030 }, { "epoch": 0.7264543866720409, "grad_norm": 2.3737032413482666, "learning_rate": 1.3863543510826637e-05, "loss": 4.0838, "step": 27035 }, { "epoch": 0.7265887410990192, "grad_norm": 2.5359184741973877, "learning_rate": 1.3856734304780065e-05, "loss": 4.0516, "step": 27040 }, { "epoch": 0.7267230955259976, "grad_norm": 2.4275460243225098, "learning_rate": 1.3849925098733489e-05, "loss": 4.0862, "step": 27045 }, { "epoch": 0.726857449952976, "grad_norm": 2.7106385231018066, "learning_rate": 1.3843115892686914e-05, "loss": 4.016, "step": 27050 }, { "epoch": 0.7269918043799544, "grad_norm": 2.7803778648376465, "learning_rate": 1.3836306686640338e-05, "loss": 3.9879, "step": 27055 }, { "epoch": 0.7271261588069327, "grad_norm": 2.810943126678467, "learning_rate": 1.3829497480593764e-05, "loss": 4.0587, "step": 27060 }, { "epoch": 0.727260513233911, "grad_norm": 2.1625187397003174, "learning_rate": 1.3822688274547188e-05, "loss": 3.9987, "step": 27065 }, { "epoch": 0.7273948676608895, "grad_norm": 2.5665905475616455, "learning_rate": 1.3815879068500615e-05, "loss": 4.031, "step": 27070 }, { "epoch": 0.7275292220878677, "grad_norm": 2.3750298023223877, "learning_rate": 1.3809069862454037e-05, "loss": 3.9724, "step": 27075 }, { "epoch": 0.7276635765148461, "grad_norm": 2.4392318725585938, "learning_rate": 1.3802260656407465e-05, "loss": 3.9896, "step": 27080 }, { "epoch": 0.7277979309418245, "grad_norm": 2.6792218685150146, "learning_rate": 1.3795451450360889e-05, "loss": 4.0385, "step": 27085 }, { "epoch": 0.727932285368803, "grad_norm": 2.608372688293457, "learning_rate": 1.3788642244314314e-05, "loss": 4.005, "step": 27090 }, { "epoch": 0.7280666397957812, "grad_norm": 2.510970115661621, "learning_rate": 1.3781833038267738e-05, "loss": 4.0427, "step": 27095 }, { "epoch": 0.7282009942227596, "grad_norm": 2.5644888877868652, "learning_rate": 1.3775023832221166e-05, "loss": 4.1408, "step": 27100 }, { "epoch": 0.728335348649738, "grad_norm": 2.51778507232666, "learning_rate": 1.3768214626174588e-05, "loss": 3.9722, "step": 27105 }, { "epoch": 0.7284697030767163, "grad_norm": 2.6161813735961914, "learning_rate": 1.3761405420128012e-05, "loss": 3.9461, "step": 27110 }, { "epoch": 0.7286040575036947, "grad_norm": 2.4981157779693604, "learning_rate": 1.375459621408144e-05, "loss": 4.16, "step": 27115 }, { "epoch": 0.7287384119306731, "grad_norm": 2.6664180755615234, "learning_rate": 1.3747787008034863e-05, "loss": 4.1732, "step": 27120 }, { "epoch": 0.7288727663576515, "grad_norm": 2.487860679626465, "learning_rate": 1.3740977801988289e-05, "loss": 4.0698, "step": 27125 }, { "epoch": 0.7290071207846298, "grad_norm": 2.5451912879943848, "learning_rate": 1.3734168595941713e-05, "loss": 4.1232, "step": 27130 }, { "epoch": 0.7291414752116082, "grad_norm": 2.696127414703369, "learning_rate": 1.3727359389895139e-05, "loss": 3.983, "step": 27135 }, { "epoch": 0.7292758296385866, "grad_norm": 2.427971839904785, "learning_rate": 1.3720550183848562e-05, "loss": 4.0678, "step": 27140 }, { "epoch": 0.729410184065565, "grad_norm": 2.5199203491210938, "learning_rate": 1.371374097780199e-05, "loss": 4.0019, "step": 27145 }, { "epoch": 0.7295445384925433, "grad_norm": 2.5085549354553223, "learning_rate": 1.3706931771755414e-05, "loss": 4.042, "step": 27150 }, { "epoch": 0.7296788929195217, "grad_norm": 2.4763777256011963, "learning_rate": 1.370012256570884e-05, "loss": 4.0221, "step": 27155 }, { "epoch": 0.7298132473465001, "grad_norm": 2.6997640132904053, "learning_rate": 1.3693313359662263e-05, "loss": 4.045, "step": 27160 }, { "epoch": 0.7299476017734784, "grad_norm": 2.5226073265075684, "learning_rate": 1.3686504153615689e-05, "loss": 4.0735, "step": 27165 }, { "epoch": 0.7300819562004568, "grad_norm": 2.5416927337646484, "learning_rate": 1.3679694947569113e-05, "loss": 4.0002, "step": 27170 }, { "epoch": 0.7302163106274352, "grad_norm": 2.5626394748687744, "learning_rate": 1.367288574152254e-05, "loss": 4.1077, "step": 27175 }, { "epoch": 0.7303506650544136, "grad_norm": 2.611567735671997, "learning_rate": 1.3666076535475963e-05, "loss": 4.0402, "step": 27180 }, { "epoch": 0.7304850194813919, "grad_norm": 2.5577335357666016, "learning_rate": 1.365926732942939e-05, "loss": 4.1411, "step": 27185 }, { "epoch": 0.7306193739083703, "grad_norm": 2.5707876682281494, "learning_rate": 1.3652458123382814e-05, "loss": 4.0449, "step": 27190 }, { "epoch": 0.7307537283353487, "grad_norm": 2.495161771774292, "learning_rate": 1.364564891733624e-05, "loss": 4.1005, "step": 27195 }, { "epoch": 0.730888082762327, "grad_norm": 2.575080156326294, "learning_rate": 1.3638839711289664e-05, "loss": 4.1017, "step": 27200 }, { "epoch": 0.7310224371893054, "grad_norm": 2.792287826538086, "learning_rate": 1.3632030505243091e-05, "loss": 4.0119, "step": 27205 }, { "epoch": 0.7311567916162838, "grad_norm": 2.409008741378784, "learning_rate": 1.3625221299196513e-05, "loss": 4.015, "step": 27210 }, { "epoch": 0.7312911460432622, "grad_norm": 2.53548264503479, "learning_rate": 1.361841209314994e-05, "loss": 3.9637, "step": 27215 }, { "epoch": 0.7314255004702405, "grad_norm": 2.6357340812683105, "learning_rate": 1.3611602887103365e-05, "loss": 4.1447, "step": 27220 }, { "epoch": 0.7315598548972189, "grad_norm": 2.479964256286621, "learning_rate": 1.360479368105679e-05, "loss": 4.0892, "step": 27225 }, { "epoch": 0.7316942093241973, "grad_norm": 2.343773365020752, "learning_rate": 1.3597984475010214e-05, "loss": 3.9581, "step": 27230 }, { "epoch": 0.7318285637511756, "grad_norm": 2.45793080329895, "learning_rate": 1.3591175268963641e-05, "loss": 4.0773, "step": 27235 }, { "epoch": 0.731962918178154, "grad_norm": 2.63311505317688, "learning_rate": 1.3584366062917064e-05, "loss": 4.2286, "step": 27240 }, { "epoch": 0.7320972726051324, "grad_norm": 2.664856433868408, "learning_rate": 1.3577556856870491e-05, "loss": 3.9846, "step": 27245 }, { "epoch": 0.7322316270321108, "grad_norm": 2.6219139099121094, "learning_rate": 1.3570747650823915e-05, "loss": 4.1253, "step": 27250 }, { "epoch": 0.732365981459089, "grad_norm": 2.734297037124634, "learning_rate": 1.356393844477734e-05, "loss": 4.0539, "step": 27255 }, { "epoch": 0.7325003358860674, "grad_norm": 2.7208316326141357, "learning_rate": 1.3557129238730765e-05, "loss": 4.0312, "step": 27260 }, { "epoch": 0.7326346903130458, "grad_norm": 2.797855854034424, "learning_rate": 1.3550320032684189e-05, "loss": 4.2847, "step": 27265 }, { "epoch": 0.7327690447400241, "grad_norm": 2.7639501094818115, "learning_rate": 1.3543510826637614e-05, "loss": 4.0207, "step": 27270 }, { "epoch": 0.7329033991670025, "grad_norm": 2.6977858543395996, "learning_rate": 1.3536701620591038e-05, "loss": 3.9776, "step": 27275 }, { "epoch": 0.7330377535939809, "grad_norm": 2.822171449661255, "learning_rate": 1.3529892414544466e-05, "loss": 3.996, "step": 27280 }, { "epoch": 0.7331721080209593, "grad_norm": 2.54716157913208, "learning_rate": 1.3523083208497888e-05, "loss": 4.0811, "step": 27285 }, { "epoch": 0.7333064624479376, "grad_norm": 2.502875804901123, "learning_rate": 1.3516274002451315e-05, "loss": 4.2152, "step": 27290 }, { "epoch": 0.733440816874916, "grad_norm": 2.431347370147705, "learning_rate": 1.350946479640474e-05, "loss": 4.0868, "step": 27295 }, { "epoch": 0.7335751713018944, "grad_norm": 2.706016778945923, "learning_rate": 1.3502655590358165e-05, "loss": 4.0415, "step": 27300 }, { "epoch": 0.7337095257288727, "grad_norm": 2.4048750400543213, "learning_rate": 1.3495846384311589e-05, "loss": 4.0433, "step": 27305 }, { "epoch": 0.7338438801558511, "grad_norm": 2.784902334213257, "learning_rate": 1.3489037178265016e-05, "loss": 4.1096, "step": 27310 }, { "epoch": 0.7339782345828295, "grad_norm": 2.674161195755005, "learning_rate": 1.3482227972218438e-05, "loss": 3.9948, "step": 27315 }, { "epoch": 0.7341125890098079, "grad_norm": 2.5665650367736816, "learning_rate": 1.3475418766171866e-05, "loss": 4.0708, "step": 27320 }, { "epoch": 0.7342469434367862, "grad_norm": 2.6523518562316895, "learning_rate": 1.346860956012529e-05, "loss": 4.0739, "step": 27325 }, { "epoch": 0.7343812978637646, "grad_norm": 2.5107228755950928, "learning_rate": 1.3461800354078715e-05, "loss": 3.995, "step": 27330 }, { "epoch": 0.734515652290743, "grad_norm": 2.6661760807037354, "learning_rate": 1.345499114803214e-05, "loss": 3.9873, "step": 27335 }, { "epoch": 0.7346500067177214, "grad_norm": 2.4277145862579346, "learning_rate": 1.3448181941985567e-05, "loss": 4.1612, "step": 27340 }, { "epoch": 0.7347843611446997, "grad_norm": 2.481402635574341, "learning_rate": 1.3441372735938989e-05, "loss": 4.1054, "step": 27345 }, { "epoch": 0.7349187155716781, "grad_norm": 2.4681496620178223, "learning_rate": 1.3434563529892416e-05, "loss": 4.0591, "step": 27350 }, { "epoch": 0.7350530699986565, "grad_norm": 2.743870258331299, "learning_rate": 1.342775432384584e-05, "loss": 4.0498, "step": 27355 }, { "epoch": 0.7351874244256348, "grad_norm": 2.7099568843841553, "learning_rate": 1.3420945117799266e-05, "loss": 3.8206, "step": 27360 }, { "epoch": 0.7353217788526132, "grad_norm": 2.474907159805298, "learning_rate": 1.341413591175269e-05, "loss": 4.1962, "step": 27365 }, { "epoch": 0.7354561332795916, "grad_norm": 2.406263589859009, "learning_rate": 1.3407326705706116e-05, "loss": 4.1776, "step": 27370 }, { "epoch": 0.73559048770657, "grad_norm": 2.7678208351135254, "learning_rate": 1.340051749965954e-05, "loss": 4.1382, "step": 27375 }, { "epoch": 0.7357248421335483, "grad_norm": 2.481567859649658, "learning_rate": 1.3393708293612967e-05, "loss": 4.0872, "step": 27380 }, { "epoch": 0.7358591965605267, "grad_norm": 2.584256649017334, "learning_rate": 1.3386899087566391e-05, "loss": 4.0516, "step": 27385 }, { "epoch": 0.7359935509875051, "grad_norm": 2.579026460647583, "learning_rate": 1.3380089881519817e-05, "loss": 4.0055, "step": 27390 }, { "epoch": 0.7361279054144834, "grad_norm": 2.5526628494262695, "learning_rate": 1.337328067547324e-05, "loss": 4.0874, "step": 27395 }, { "epoch": 0.7362622598414618, "grad_norm": 2.367049217224121, "learning_rate": 1.3366471469426666e-05, "loss": 3.9098, "step": 27400 }, { "epoch": 0.7363966142684402, "grad_norm": 2.5932586193084717, "learning_rate": 1.335966226338009e-05, "loss": 4.0575, "step": 27405 }, { "epoch": 0.7365309686954186, "grad_norm": 2.5667803287506104, "learning_rate": 1.3352853057333514e-05, "loss": 4.0633, "step": 27410 }, { "epoch": 0.7366653231223969, "grad_norm": 2.404111385345459, "learning_rate": 1.3346043851286941e-05, "loss": 4.0838, "step": 27415 }, { "epoch": 0.7367996775493753, "grad_norm": 2.534876823425293, "learning_rate": 1.3339234645240364e-05, "loss": 4.1252, "step": 27420 }, { "epoch": 0.7369340319763537, "grad_norm": 2.597630262374878, "learning_rate": 1.3332425439193791e-05, "loss": 4.0765, "step": 27425 }, { "epoch": 0.7370683864033319, "grad_norm": 2.4507393836975098, "learning_rate": 1.3325616233147215e-05, "loss": 4.1558, "step": 27430 }, { "epoch": 0.7372027408303103, "grad_norm": 2.5739331245422363, "learning_rate": 1.331880702710064e-05, "loss": 4.0139, "step": 27435 }, { "epoch": 0.7373370952572887, "grad_norm": 2.312662124633789, "learning_rate": 1.3311997821054065e-05, "loss": 4.0445, "step": 27440 }, { "epoch": 0.7374714496842671, "grad_norm": 2.679407835006714, "learning_rate": 1.330518861500749e-05, "loss": 4.1218, "step": 27445 }, { "epoch": 0.7376058041112454, "grad_norm": 2.493534803390503, "learning_rate": 1.3298379408960914e-05, "loss": 4.0074, "step": 27450 }, { "epoch": 0.7377401585382238, "grad_norm": 2.6585192680358887, "learning_rate": 1.3291570202914342e-05, "loss": 4.0521, "step": 27455 }, { "epoch": 0.7378745129652022, "grad_norm": 2.637596845626831, "learning_rate": 1.3284760996867766e-05, "loss": 4.1324, "step": 27460 }, { "epoch": 0.7380088673921805, "grad_norm": 2.4800329208374023, "learning_rate": 1.3277951790821191e-05, "loss": 3.9652, "step": 27465 }, { "epoch": 0.7381432218191589, "grad_norm": 2.598944664001465, "learning_rate": 1.3271142584774615e-05, "loss": 4.026, "step": 27470 }, { "epoch": 0.7382775762461373, "grad_norm": 2.6920406818389893, "learning_rate": 1.3264333378728041e-05, "loss": 4.1017, "step": 27475 }, { "epoch": 0.7384119306731157, "grad_norm": 2.6152634620666504, "learning_rate": 1.3257524172681465e-05, "loss": 4.0182, "step": 27480 }, { "epoch": 0.738546285100094, "grad_norm": 2.587390899658203, "learning_rate": 1.3250714966634892e-05, "loss": 4.0262, "step": 27485 }, { "epoch": 0.7386806395270724, "grad_norm": 2.7638044357299805, "learning_rate": 1.3243905760588316e-05, "loss": 4.205, "step": 27490 }, { "epoch": 0.7388149939540508, "grad_norm": 2.6114258766174316, "learning_rate": 1.3237096554541742e-05, "loss": 4.028, "step": 27495 }, { "epoch": 0.7389493483810291, "grad_norm": 2.3654181957244873, "learning_rate": 1.3230287348495166e-05, "loss": 4.0319, "step": 27500 } ], "logging_steps": 5, "max_steps": 37215, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0038542288754278e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }