{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 200245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.993882493944918e-05, "grad_norm": 0.09688733518123627, "learning_rate": 6.000000000000001e-07, "loss": 10.7967, "step": 10 }, { "epoch": 9.987764987889836e-05, "grad_norm": 0.09242203086614609, "learning_rate": 1.2000000000000002e-06, "loss": 10.7959, "step": 20 }, { "epoch": 0.00014981647481834753, "grad_norm": 0.09640923142433167, "learning_rate": 1.8e-06, "loss": 10.7964, "step": 30 }, { "epoch": 0.0001997552997577967, "grad_norm": 0.09338584542274475, "learning_rate": 2.4000000000000003e-06, "loss": 10.7963, "step": 40 }, { "epoch": 0.00024969412469724587, "grad_norm": 0.09739788621664047, "learning_rate": 3e-06, "loss": 10.796, "step": 50 }, { "epoch": 0.00029963294963669505, "grad_norm": 0.09315921366214752, "learning_rate": 3.6e-06, "loss": 10.7954, "step": 60 }, { "epoch": 0.00034957177457614424, "grad_norm": 0.09130595624446869, "learning_rate": 4.2000000000000004e-06, "loss": 10.7956, "step": 70 }, { "epoch": 0.0003995105995155934, "grad_norm": 0.09694769978523254, "learning_rate": 4.800000000000001e-06, "loss": 10.7961, "step": 80 }, { "epoch": 0.00044944942445504255, "grad_norm": 0.09814093261957169, "learning_rate": 5.4e-06, "loss": 10.7956, "step": 90 }, { "epoch": 0.0004993882493944917, "grad_norm": 0.0940699502825737, "learning_rate": 6e-06, "loss": 10.7957, "step": 100 }, { "epoch": 0.000549327074333941, "grad_norm": 0.09281250834465027, "learning_rate": 6.6e-06, "loss": 10.7957, "step": 110 }, { "epoch": 0.0005992658992733901, "grad_norm": 0.09708160907030106, "learning_rate": 7.2e-06, "loss": 10.7961, "step": 120 }, { "epoch": 0.0006492047242128392, "grad_norm": 0.09620024263858795, "learning_rate": 7.8e-06, "loss": 10.7957, "step": 130 }, { "epoch": 0.0006991435491522885, "grad_norm": 0.09342040121555328, "learning_rate": 8.400000000000001e-06, "loss": 10.7958, "step": 140 }, { "epoch": 0.0007490823740917376, "grad_norm": 0.09522400051355362, "learning_rate": 9e-06, "loss": 10.7951, "step": 150 }, { "epoch": 0.0007990211990311868, "grad_norm": 0.09366803616285324, "learning_rate": 9.600000000000001e-06, "loss": 10.7963, "step": 160 }, { "epoch": 0.000848960023970636, "grad_norm": 0.10423287004232407, "learning_rate": 1.02e-05, "loss": 10.796, "step": 170 }, { "epoch": 0.0008988988489100851, "grad_norm": 0.09598524123430252, "learning_rate": 1.08e-05, "loss": 10.7951, "step": 180 }, { "epoch": 0.0009488376738495343, "grad_norm": 0.09360150247812271, "learning_rate": 1.1400000000000001e-05, "loss": 10.795, "step": 190 }, { "epoch": 0.0009987764987889835, "grad_norm": 0.09207968413829803, "learning_rate": 1.2e-05, "loss": 10.7947, "step": 200 }, { "epoch": 0.0010487153237284327, "grad_norm": 0.09178674221038818, "learning_rate": 1.26e-05, "loss": 10.7944, "step": 210 }, { "epoch": 0.001098654148667882, "grad_norm": 0.09687130898237228, "learning_rate": 1.32e-05, "loss": 10.7945, "step": 220 }, { "epoch": 0.001148592973607331, "grad_norm": 0.08996697515249252, "learning_rate": 1.3800000000000002e-05, "loss": 10.7939, "step": 230 }, { "epoch": 0.0011985317985467802, "grad_norm": 0.09358258545398712, "learning_rate": 1.44e-05, "loss": 10.7943, "step": 240 }, { "epoch": 0.0012484706234862294, "grad_norm": 0.09620951116085052, "learning_rate": 1.5e-05, "loss": 12.5216, "step": 250 }, { "epoch": 0.0012984094484256785, "grad_norm": 12312.0439453125, "learning_rate": 1.56e-05, "loss": 15.3101, "step": 260 }, { "epoch": 0.0013483482733651277, "grad_norm": 4373971.5, "learning_rate": 1.62e-05, "loss": 11.4047, "step": 270 }, { "epoch": 0.001398287098304577, "grad_norm": 0.09267476201057434, "learning_rate": 1.6800000000000002e-05, "loss": 13.7008, "step": 280 }, { "epoch": 0.001448225923244026, "grad_norm": 0.09616652131080627, "learning_rate": 1.74e-05, "loss": 10.7943, "step": 290 }, { "epoch": 0.0014981647481834752, "grad_norm": 0.09366879612207413, "learning_rate": 1.8e-05, "loss": 10.7936, "step": 300 }, { "epoch": 0.0015481035731229244, "grad_norm": 0.10064131766557693, "learning_rate": 1.86e-05, "loss": 10.7939, "step": 310 }, { "epoch": 0.0015980423980623737, "grad_norm": 0.09544051438570023, "learning_rate": 1.9200000000000003e-05, "loss": 10.7934, "step": 320 }, { "epoch": 0.0016479812230018227, "grad_norm": 0.09638150781393051, "learning_rate": 1.98e-05, "loss": 10.7934, "step": 330 }, { "epoch": 0.001697920047941272, "grad_norm": 0.09321698546409607, "learning_rate": 2.04e-05, "loss": 10.793, "step": 340 }, { "epoch": 0.0017478588728807212, "grad_norm": 0.09490924328565598, "learning_rate": 2.1e-05, "loss": 10.7924, "step": 350 }, { "epoch": 0.0017977976978201702, "grad_norm": 0.09169238060712814, "learning_rate": 2.16e-05, "loss": 10.7926, "step": 360 }, { "epoch": 0.0018477365227596194, "grad_norm": 0.09536827355623245, "learning_rate": 2.22e-05, "loss": 10.7916, "step": 370 }, { "epoch": 0.0018976753476990687, "grad_norm": 0.0935748964548111, "learning_rate": 2.2800000000000002e-05, "loss": 10.7922, "step": 380 }, { "epoch": 0.001947614172638518, "grad_norm": 0.09191051870584488, "learning_rate": 2.3400000000000003e-05, "loss": 10.7917, "step": 390 }, { "epoch": 0.001997552997577967, "grad_norm": 0.09062574058771133, "learning_rate": 2.4e-05, "loss": 10.7913, "step": 400 }, { "epoch": 0.002047491822517416, "grad_norm": 0.09686335176229477, "learning_rate": 2.4599999999999998e-05, "loss": 10.7911, "step": 410 }, { "epoch": 0.0020974306474568654, "grad_norm": 0.09458820521831512, "learning_rate": 2.52e-05, "loss": 10.7904, "step": 420 }, { "epoch": 0.0021473694723963144, "grad_norm": 0.09392788261175156, "learning_rate": 2.58e-05, "loss": 10.79, "step": 430 }, { "epoch": 0.002197308297335764, "grad_norm": 0.09329679608345032, "learning_rate": 2.64e-05, "loss": 10.7903, "step": 440 }, { "epoch": 0.002247247122275213, "grad_norm": 0.09585896879434586, "learning_rate": 2.7000000000000002e-05, "loss": 10.7897, "step": 450 }, { "epoch": 0.002297185947214662, "grad_norm": 0.0919712483882904, "learning_rate": 2.7600000000000003e-05, "loss": 10.7897, "step": 460 }, { "epoch": 0.0023471247721541114, "grad_norm": 0.09430967271327972, "learning_rate": 2.8199999999999998e-05, "loss": 10.7888, "step": 470 }, { "epoch": 0.0023970635970935604, "grad_norm": 0.09424952417612076, "learning_rate": 2.88e-05, "loss": 10.7897, "step": 480 }, { "epoch": 0.0024470024220330094, "grad_norm": 0.09542497992515564, "learning_rate": 2.94e-05, "loss": 10.7901, "step": 490 }, { "epoch": 0.002496941246972459, "grad_norm": 0.09828152507543564, "learning_rate": 3e-05, "loss": 11.0404, "step": 500 }, { "epoch": 0.002546880071911908, "grad_norm": 0.09262736141681671, "learning_rate": 2.9998498085058448e-05, "loss": 10.7884, "step": 510 }, { "epoch": 0.002596818896851357, "grad_norm": 0.0968865230679512, "learning_rate": 2.99969961701169e-05, "loss": 10.7878, "step": 520 }, { "epoch": 0.0026467577217908064, "grad_norm": 0.0930342897772789, "learning_rate": 2.9995494255175348e-05, "loss": 10.7871, "step": 530 }, { "epoch": 0.0026966965467302554, "grad_norm": 0.0979408249258995, "learning_rate": 2.99939923402338e-05, "loss": 10.7879, "step": 540 }, { "epoch": 0.0027466353716697044, "grad_norm": 0.09784050285816193, "learning_rate": 2.9992490425292248e-05, "loss": 10.7864, "step": 550 }, { "epoch": 0.002796574196609154, "grad_norm": 0.0974036231637001, "learning_rate": 2.9990988510350695e-05, "loss": 10.786, "step": 560 }, { "epoch": 0.002846513021548603, "grad_norm": 0.09453345090150833, "learning_rate": 2.998948659540915e-05, "loss": 10.786, "step": 570 }, { "epoch": 0.002896451846488052, "grad_norm": 0.09815464913845062, "learning_rate": 2.9987984680467595e-05, "loss": 10.7855, "step": 580 }, { "epoch": 0.0029463906714275014, "grad_norm": 0.0883261188864708, "learning_rate": 2.998648276552605e-05, "loss": 10.7854, "step": 590 }, { "epoch": 0.0029963294963669504, "grad_norm": 0.09347826987504959, "learning_rate": 2.9984980850584496e-05, "loss": 10.785, "step": 600 }, { "epoch": 0.0030462683213064, "grad_norm": 0.09312307834625244, "learning_rate": 2.9983478935642943e-05, "loss": 10.7847, "step": 610 }, { "epoch": 0.003096207146245849, "grad_norm": 0.09311032295227051, "learning_rate": 2.9981977020701396e-05, "loss": 10.785, "step": 620 }, { "epoch": 0.003146145971185298, "grad_norm": 0.09509855508804321, "learning_rate": 2.9980475105759843e-05, "loss": 10.7844, "step": 630 }, { "epoch": 0.0031960847961247474, "grad_norm": 0.09063342213630676, "learning_rate": 2.9978973190818296e-05, "loss": 10.7836, "step": 640 }, { "epoch": 0.0032460236210641964, "grad_norm": 0.095396488904953, "learning_rate": 2.9977471275876743e-05, "loss": 10.7838, "step": 650 }, { "epoch": 0.0032959624460036454, "grad_norm": 0.09543643146753311, "learning_rate": 2.997596936093519e-05, "loss": 10.7835, "step": 660 }, { "epoch": 0.003345901270943095, "grad_norm": 11308.7890625, "learning_rate": 2.9974467445993644e-05, "loss": 11.5043, "step": 670 }, { "epoch": 0.003395840095882544, "grad_norm": 0.09934733808040619, "learning_rate": 2.997296553105209e-05, "loss": 17.0433, "step": 680 }, { "epoch": 0.003445778920821993, "grad_norm": 0.09415256977081299, "learning_rate": 2.9971463616110544e-05, "loss": 10.9281, "step": 690 }, { "epoch": 0.0034957177457614424, "grad_norm": 0.09561091661453247, "learning_rate": 2.996996170116899e-05, "loss": 12.0749, "step": 700 }, { "epoch": 0.0035456565707008914, "grad_norm": 308366.0625, "learning_rate": 2.996845978622744e-05, "loss": 65.9464, "step": 710 }, { "epoch": 0.0035955953956403404, "grad_norm": 36107.77734375, "learning_rate": 2.996695787128589e-05, "loss": 87.2685, "step": 720 }, { "epoch": 0.00364553422057979, "grad_norm": 0.09354925155639648, "learning_rate": 2.9965455956344338e-05, "loss": 10.9749, "step": 730 }, { "epoch": 0.003695473045519239, "grad_norm": 0.09494537115097046, "learning_rate": 2.996395404140279e-05, "loss": 10.7808, "step": 740 }, { "epoch": 0.003745411870458688, "grad_norm": 0.10302335023880005, "learning_rate": 2.9962452126461238e-05, "loss": 13.2444, "step": 750 }, { "epoch": 0.0037953506953981374, "grad_norm": 0.09412359446287155, "learning_rate": 2.996095021151969e-05, "loss": 11.111, "step": 760 }, { "epoch": 0.0038452895203375864, "grad_norm": 0.10018070042133331, "learning_rate": 2.995944829657814e-05, "loss": 10.7808, "step": 770 }, { "epoch": 0.003895228345277036, "grad_norm": 0.09139321744441986, "learning_rate": 2.9957946381636585e-05, "loss": 10.7802, "step": 780 }, { "epoch": 0.003945167170216484, "grad_norm": 0.09905160218477249, "learning_rate": 2.995644446669504e-05, "loss": 10.7798, "step": 790 }, { "epoch": 0.003995105995155934, "grad_norm": 0.09502265602350235, "learning_rate": 2.9954942551753486e-05, "loss": 10.7806, "step": 800 }, { "epoch": 0.004045044820095383, "grad_norm": 0.09315773844718933, "learning_rate": 2.9953440636811936e-05, "loss": 10.7789, "step": 810 }, { "epoch": 0.004094983645034832, "grad_norm": 0.09650130569934845, "learning_rate": 2.9951938721870386e-05, "loss": 10.7788, "step": 820 }, { "epoch": 0.004144922469974281, "grad_norm": 0.09284522384405136, "learning_rate": 2.9950436806928833e-05, "loss": 10.7786, "step": 830 }, { "epoch": 0.004194861294913731, "grad_norm": 0.09788000583648682, "learning_rate": 2.9948934891987286e-05, "loss": 10.7783, "step": 840 }, { "epoch": 0.004244800119853179, "grad_norm": 0.09668333828449249, "learning_rate": 2.9947432977045733e-05, "loss": 10.7779, "step": 850 }, { "epoch": 0.004294738944792629, "grad_norm": 0.09662320464849472, "learning_rate": 2.9945931062104183e-05, "loss": 10.7769, "step": 860 }, { "epoch": 0.004344677769732078, "grad_norm": 0.09260370582342148, "learning_rate": 2.9944429147162634e-05, "loss": 10.7772, "step": 870 }, { "epoch": 0.004394616594671528, "grad_norm": 0.09133943170309067, "learning_rate": 2.994292723222108e-05, "loss": 10.7774, "step": 880 }, { "epoch": 0.004444555419610976, "grad_norm": 0.09417100250720978, "learning_rate": 2.9941425317279534e-05, "loss": 10.7768, "step": 890 }, { "epoch": 0.004494494244550426, "grad_norm": 0.09618920087814331, "learning_rate": 2.993992340233798e-05, "loss": 10.776, "step": 900 }, { "epoch": 0.004544433069489875, "grad_norm": 0.09578631818294525, "learning_rate": 2.993842148739643e-05, "loss": 10.7759, "step": 910 }, { "epoch": 0.004594371894429324, "grad_norm": 0.09697435051202774, "learning_rate": 2.993691957245488e-05, "loss": 10.7744, "step": 920 }, { "epoch": 0.004644310719368773, "grad_norm": 0.09273876994848251, "learning_rate": 2.9935417657513328e-05, "loss": 10.7755, "step": 930 }, { "epoch": 0.004694249544308223, "grad_norm": 0.0970180332660675, "learning_rate": 2.993391574257178e-05, "loss": 10.7751, "step": 940 }, { "epoch": 0.004744188369247671, "grad_norm": 0.0976632609963417, "learning_rate": 2.9932413827630228e-05, "loss": 10.7745, "step": 950 }, { "epoch": 0.004794127194187121, "grad_norm": 0.09234705567359924, "learning_rate": 2.993091191268868e-05, "loss": 10.7746, "step": 960 }, { "epoch": 0.00484406601912657, "grad_norm": 0.09593399614095688, "learning_rate": 2.992940999774713e-05, "loss": 10.7742, "step": 970 }, { "epoch": 0.004894004844066019, "grad_norm": 0.09624140709638596, "learning_rate": 2.9927908082805575e-05, "loss": 10.7732, "step": 980 }, { "epoch": 0.004943943669005468, "grad_norm": 0.09578274935483932, "learning_rate": 2.992640616786403e-05, "loss": 10.7728, "step": 990 }, { "epoch": 0.004993882493944918, "grad_norm": 0.10335574299097061, "learning_rate": 2.9924904252922476e-05, "loss": 10.7731, "step": 1000 }, { "epoch": 0.005043821318884366, "grad_norm": 0.09355924278497696, "learning_rate": 2.9923402337980926e-05, "loss": 10.7723, "step": 1010 }, { "epoch": 0.005093760143823816, "grad_norm": 0.09353188425302505, "learning_rate": 2.9921900423039376e-05, "loss": 10.773, "step": 1020 }, { "epoch": 0.005143698968763265, "grad_norm": 0.09189878404140472, "learning_rate": 2.9920398508097826e-05, "loss": 10.7719, "step": 1030 }, { "epoch": 0.005193637793702714, "grad_norm": 0.09627330303192139, "learning_rate": 2.9918896593156277e-05, "loss": 10.7721, "step": 1040 }, { "epoch": 0.005243576618642163, "grad_norm": 0.09078870713710785, "learning_rate": 2.9917394678214723e-05, "loss": 10.7711, "step": 1050 }, { "epoch": 0.005293515443581613, "grad_norm": 0.09166533499956131, "learning_rate": 2.9915892763273173e-05, "loss": 10.7711, "step": 1060 }, { "epoch": 0.005343454268521061, "grad_norm": 0.0925443097949028, "learning_rate": 2.9914390848331624e-05, "loss": 10.771, "step": 1070 }, { "epoch": 0.005393393093460511, "grad_norm": 0.09320785850286484, "learning_rate": 2.9912888933390074e-05, "loss": 10.7703, "step": 1080 }, { "epoch": 0.00544333191839996, "grad_norm": 0.09314128011465073, "learning_rate": 2.9911387018448524e-05, "loss": 10.7707, "step": 1090 }, { "epoch": 0.005493270743339409, "grad_norm": 0.09605088829994202, "learning_rate": 2.990988510350697e-05, "loss": 10.7705, "step": 1100 }, { "epoch": 0.005543209568278858, "grad_norm": 0.08931813389062881, "learning_rate": 2.990838318856542e-05, "loss": 10.7689, "step": 1110 }, { "epoch": 0.005593148393218308, "grad_norm": 0.09207310527563095, "learning_rate": 2.990688127362387e-05, "loss": 10.7691, "step": 1120 }, { "epoch": 0.005643087218157756, "grad_norm": 0.09695383906364441, "learning_rate": 2.990537935868232e-05, "loss": 10.7687, "step": 1130 }, { "epoch": 0.005693026043097206, "grad_norm": 0.09819135814905167, "learning_rate": 2.990387744374077e-05, "loss": 10.7679, "step": 1140 }, { "epoch": 0.005742964868036655, "grad_norm": 0.09095334261655807, "learning_rate": 2.990237552879922e-05, "loss": 10.7685, "step": 1150 }, { "epoch": 0.005792903692976104, "grad_norm": 0.08971603959798813, "learning_rate": 2.990087361385767e-05, "loss": 10.7678, "step": 1160 }, { "epoch": 0.005842842517915553, "grad_norm": 0.0930616483092308, "learning_rate": 2.989937169891612e-05, "loss": 10.7679, "step": 1170 }, { "epoch": 0.005892781342855003, "grad_norm": 0.09826360642910004, "learning_rate": 2.989786978397457e-05, "loss": 10.767, "step": 1180 }, { "epoch": 0.005942720167794451, "grad_norm": 0.09297499060630798, "learning_rate": 2.989636786903302e-05, "loss": 10.7671, "step": 1190 }, { "epoch": 0.005992658992733901, "grad_norm": 0.09357722103595734, "learning_rate": 2.9894865954091466e-05, "loss": 10.7661, "step": 1200 }, { "epoch": 0.00604259781767335, "grad_norm": 0.09486209601163864, "learning_rate": 2.9893364039149916e-05, "loss": 10.7658, "step": 1210 }, { "epoch": 0.0060925366426128, "grad_norm": 0.09341169148683548, "learning_rate": 2.9891862124208366e-05, "loss": 10.7663, "step": 1220 }, { "epoch": 0.006142475467552248, "grad_norm": 0.09570655226707458, "learning_rate": 2.9890360209266816e-05, "loss": 10.7658, "step": 1230 }, { "epoch": 0.006192414292491698, "grad_norm": 0.09547128528356552, "learning_rate": 2.9888858294325267e-05, "loss": 10.764, "step": 1240 }, { "epoch": 0.006242353117431147, "grad_norm": 0.08968691527843475, "learning_rate": 2.9887356379383713e-05, "loss": 10.7649, "step": 1250 }, { "epoch": 0.006292291942370596, "grad_norm": 0.09444384276866913, "learning_rate": 2.9885854464442163e-05, "loss": 10.7647, "step": 1260 }, { "epoch": 0.006342230767310045, "grad_norm": 0.0966639518737793, "learning_rate": 2.9884352549500614e-05, "loss": 10.764, "step": 1270 }, { "epoch": 0.006392169592249495, "grad_norm": 0.09153666347265244, "learning_rate": 2.9882850634559064e-05, "loss": 10.7646, "step": 1280 }, { "epoch": 0.006442108417188943, "grad_norm": 0.09613633900880814, "learning_rate": 2.9881348719617514e-05, "loss": 10.763, "step": 1290 }, { "epoch": 0.006492047242128393, "grad_norm": 0.09370370209217072, "learning_rate": 2.987984680467596e-05, "loss": 10.7634, "step": 1300 }, { "epoch": 0.006541986067067842, "grad_norm": 0.09263869374990463, "learning_rate": 2.987834488973441e-05, "loss": 10.7635, "step": 1310 }, { "epoch": 0.006591924892007291, "grad_norm": 0.09146302193403244, "learning_rate": 2.987684297479286e-05, "loss": 10.7622, "step": 1320 }, { "epoch": 0.00664186371694674, "grad_norm": 0.09400025755167007, "learning_rate": 2.987534105985131e-05, "loss": 10.7627, "step": 1330 }, { "epoch": 0.00669180254188619, "grad_norm": 0.09636477380990982, "learning_rate": 2.987383914490976e-05, "loss": 10.762, "step": 1340 }, { "epoch": 0.006741741366825638, "grad_norm": 0.08832681179046631, "learning_rate": 2.9872337229968212e-05, "loss": 10.7615, "step": 1350 }, { "epoch": 0.006791680191765088, "grad_norm": 0.10007186979055405, "learning_rate": 2.987083531502666e-05, "loss": 10.7612, "step": 1360 }, { "epoch": 0.006841619016704537, "grad_norm": 0.09131281077861786, "learning_rate": 2.986933340008511e-05, "loss": 10.761, "step": 1370 }, { "epoch": 0.006891557841643986, "grad_norm": 0.0982031375169754, "learning_rate": 2.986783148514356e-05, "loss": 10.7597, "step": 1380 }, { "epoch": 0.006941496666583435, "grad_norm": 0.10071565955877304, "learning_rate": 2.986632957020201e-05, "loss": 10.7601, "step": 1390 }, { "epoch": 0.006991435491522885, "grad_norm": 0.09393281489610672, "learning_rate": 2.986482765526046e-05, "loss": 10.7598, "step": 1400 }, { "epoch": 0.007041374316462333, "grad_norm": 0.09201990813016891, "learning_rate": 2.9863325740318906e-05, "loss": 10.7588, "step": 1410 }, { "epoch": 0.007091313141401783, "grad_norm": 0.09910114109516144, "learning_rate": 2.9861823825377356e-05, "loss": 10.7597, "step": 1420 }, { "epoch": 0.007141251966341232, "grad_norm": 0.09161806851625443, "learning_rate": 2.9860321910435806e-05, "loss": 10.7587, "step": 1430 }, { "epoch": 0.007191190791280681, "grad_norm": 0.09380662441253662, "learning_rate": 2.9858819995494257e-05, "loss": 10.7589, "step": 1440 }, { "epoch": 0.00724112961622013, "grad_norm": 0.09290491044521332, "learning_rate": 2.9857318080552707e-05, "loss": 10.7591, "step": 1450 }, { "epoch": 0.00729106844115958, "grad_norm": 0.09262260794639587, "learning_rate": 2.9855816165611154e-05, "loss": 10.7581, "step": 1460 }, { "epoch": 0.007341007266099028, "grad_norm": 698.4036865234375, "learning_rate": 2.9854314250669604e-05, "loss": 10.758, "step": 1470 }, { "epoch": 0.007390946091038478, "grad_norm": 0.09542746841907501, "learning_rate": 2.9852812335728054e-05, "loss": 12.0635, "step": 1480 }, { "epoch": 0.007440884915977927, "grad_norm": 0.09986045211553574, "learning_rate": 2.9851310420786504e-05, "loss": 10.7564, "step": 1490 }, { "epoch": 0.007490823740917376, "grad_norm": 0.09409373998641968, "learning_rate": 2.9849808505844954e-05, "loss": 10.7568, "step": 1500 }, { "epoch": 0.007540762565856825, "grad_norm": 0.092925526201725, "learning_rate": 2.98483065909034e-05, "loss": 10.7565, "step": 1510 }, { "epoch": 0.007590701390796275, "grad_norm": 0.0927615836262703, "learning_rate": 2.984680467596185e-05, "loss": 10.7556, "step": 1520 }, { "epoch": 0.007640640215735723, "grad_norm": 0.10118461400270462, "learning_rate": 2.98453027610203e-05, "loss": 10.7564, "step": 1530 }, { "epoch": 0.007690579040675173, "grad_norm": 0.09674585610628128, "learning_rate": 2.984380084607875e-05, "loss": 10.7554, "step": 1540 }, { "epoch": 0.007740517865614622, "grad_norm": 0.09508625417947769, "learning_rate": 2.9842298931137202e-05, "loss": 10.7555, "step": 1550 }, { "epoch": 0.007790456690554072, "grad_norm": 0.0912444218993187, "learning_rate": 2.984079701619565e-05, "loss": 10.7547, "step": 1560 }, { "epoch": 0.007840395515493521, "grad_norm": 0.09958932548761368, "learning_rate": 2.98392951012541e-05, "loss": 10.7539, "step": 1570 }, { "epoch": 0.007890334340432969, "grad_norm": 0.0945759117603302, "learning_rate": 2.983779318631255e-05, "loss": 10.7541, "step": 1580 }, { "epoch": 0.007940273165372418, "grad_norm": 0.09051518142223358, "learning_rate": 2.9836291271371e-05, "loss": 10.7533, "step": 1590 }, { "epoch": 0.007990211990311868, "grad_norm": 0.09735298156738281, "learning_rate": 2.983478935642945e-05, "loss": 10.753, "step": 1600 }, { "epoch": 0.008040150815251317, "grad_norm": 0.09062410145998001, "learning_rate": 2.9833287441487896e-05, "loss": 10.7529, "step": 1610 }, { "epoch": 0.008090089640190767, "grad_norm": 0.091838039457798, "learning_rate": 2.9831785526546346e-05, "loss": 10.7524, "step": 1620 }, { "epoch": 0.008140028465130216, "grad_norm": 0.0958361029624939, "learning_rate": 2.9830283611604796e-05, "loss": 10.7529, "step": 1630 }, { "epoch": 0.008189967290069664, "grad_norm": 0.09625451266765594, "learning_rate": 2.9828781696663247e-05, "loss": 10.7514, "step": 1640 }, { "epoch": 0.008239906115009113, "grad_norm": 0.0954735055565834, "learning_rate": 2.9827279781721697e-05, "loss": 10.7526, "step": 1650 }, { "epoch": 0.008289844939948563, "grad_norm": 0.09449174255132675, "learning_rate": 2.9825777866780144e-05, "loss": 10.7519, "step": 1660 }, { "epoch": 0.008339783764888012, "grad_norm": 0.08916933834552765, "learning_rate": 2.9824275951838597e-05, "loss": 10.7517, "step": 1670 }, { "epoch": 0.008389722589827462, "grad_norm": 0.0955243706703186, "learning_rate": 2.9822774036897044e-05, "loss": 10.7513, "step": 1680 }, { "epoch": 0.008439661414766911, "grad_norm": 0.0943785309791565, "learning_rate": 2.9821272121955494e-05, "loss": 10.7504, "step": 1690 }, { "epoch": 0.008489600239706359, "grad_norm": 0.09509387612342834, "learning_rate": 2.9819770207013944e-05, "loss": 10.7505, "step": 1700 }, { "epoch": 0.008539539064645808, "grad_norm": 0.09808336943387985, "learning_rate": 2.981826829207239e-05, "loss": 10.7496, "step": 1710 }, { "epoch": 0.008589477889585258, "grad_norm": 0.0928177759051323, "learning_rate": 2.9816766377130845e-05, "loss": 10.7501, "step": 1720 }, { "epoch": 0.008639416714524707, "grad_norm": 0.09282363951206207, "learning_rate": 2.981526446218929e-05, "loss": 10.7484, "step": 1730 }, { "epoch": 0.008689355539464157, "grad_norm": 0.09041490405797958, "learning_rate": 2.981376254724774e-05, "loss": 10.7492, "step": 1740 }, { "epoch": 0.008739294364403606, "grad_norm": 0.09924125671386719, "learning_rate": 2.9812260632306192e-05, "loss": 10.7486, "step": 1750 }, { "epoch": 0.008789233189343056, "grad_norm": 0.09147768467664719, "learning_rate": 2.981075871736464e-05, "loss": 10.7488, "step": 1760 }, { "epoch": 0.008839172014282503, "grad_norm": 0.09582570195198059, "learning_rate": 2.9809256802423092e-05, "loss": 10.7485, "step": 1770 }, { "epoch": 0.008889110839221953, "grad_norm": 0.09464222192764282, "learning_rate": 2.980775488748154e-05, "loss": 10.7473, "step": 1780 }, { "epoch": 0.008939049664161402, "grad_norm": 0.09430133551359177, "learning_rate": 2.980625297253999e-05, "loss": 10.7484, "step": 1790 }, { "epoch": 0.008988988489100852, "grad_norm": 0.09573784470558167, "learning_rate": 2.980475105759844e-05, "loss": 10.7467, "step": 1800 }, { "epoch": 0.009038927314040301, "grad_norm": 0.09440425038337708, "learning_rate": 2.9803249142656886e-05, "loss": 10.7473, "step": 1810 }, { "epoch": 0.00908886613897975, "grad_norm": 0.098334401845932, "learning_rate": 2.980174722771534e-05, "loss": 10.7465, "step": 1820 }, { "epoch": 0.009138804963919198, "grad_norm": 0.095046766102314, "learning_rate": 2.9800245312773786e-05, "loss": 10.7463, "step": 1830 }, { "epoch": 0.009188743788858648, "grad_norm": 0.09811218827962875, "learning_rate": 2.9798743397832237e-05, "loss": 10.7457, "step": 1840 }, { "epoch": 0.009238682613798097, "grad_norm": 0.09796342253684998, "learning_rate": 2.9797241482890687e-05, "loss": 10.7463, "step": 1850 }, { "epoch": 0.009288621438737547, "grad_norm": 0.09163960069417953, "learning_rate": 2.9795739567949134e-05, "loss": 10.7455, "step": 1860 }, { "epoch": 0.009338560263676996, "grad_norm": 0.09211383759975433, "learning_rate": 2.9794237653007587e-05, "loss": 10.7452, "step": 1870 }, { "epoch": 0.009388499088616446, "grad_norm": 0.1004691869020462, "learning_rate": 2.9792735738066034e-05, "loss": 10.7444, "step": 1880 }, { "epoch": 0.009438437913555893, "grad_norm": 0.09318966418504715, "learning_rate": 2.9791233823124484e-05, "loss": 10.744, "step": 1890 }, { "epoch": 0.009488376738495343, "grad_norm": 0.09770355373620987, "learning_rate": 2.9789731908182934e-05, "loss": 10.7435, "step": 1900 }, { "epoch": 0.009538315563434792, "grad_norm": 0.09747663885354996, "learning_rate": 2.978822999324138e-05, "loss": 10.7435, "step": 1910 }, { "epoch": 0.009588254388374242, "grad_norm": 0.0939902812242508, "learning_rate": 2.9786728078299835e-05, "loss": 10.7433, "step": 1920 }, { "epoch": 0.009638193213313691, "grad_norm": 0.09048355370759964, "learning_rate": 2.978522616335828e-05, "loss": 10.7432, "step": 1930 }, { "epoch": 0.00968813203825314, "grad_norm": 0.09295009821653366, "learning_rate": 2.978372424841673e-05, "loss": 10.743, "step": 1940 }, { "epoch": 0.009738070863192588, "grad_norm": 0.09584958851337433, "learning_rate": 2.9782222333475182e-05, "loss": 10.7431, "step": 1950 }, { "epoch": 0.009788009688132038, "grad_norm": 0.09165474027395248, "learning_rate": 2.978072041853363e-05, "loss": 10.7419, "step": 1960 }, { "epoch": 0.009837948513071487, "grad_norm": 0.0949409231543541, "learning_rate": 2.9779218503592082e-05, "loss": 10.7418, "step": 1970 }, { "epoch": 0.009887887338010937, "grad_norm": 0.09680484980344772, "learning_rate": 2.977771658865053e-05, "loss": 10.7418, "step": 1980 }, { "epoch": 0.009937826162950386, "grad_norm": 0.10440608114004135, "learning_rate": 2.9776214673708982e-05, "loss": 10.7418, "step": 1990 }, { "epoch": 0.009987764987889836, "grad_norm": 0.09684424102306366, "learning_rate": 2.977471275876743e-05, "loss": 10.741, "step": 2000 }, { "epoch": 0.010037703812829283, "grad_norm": 0.09417276084423065, "learning_rate": 2.9773210843825876e-05, "loss": 10.7413, "step": 2010 }, { "epoch": 0.010087642637768733, "grad_norm": 0.09294068813323975, "learning_rate": 2.977170892888433e-05, "loss": 10.74, "step": 2020 }, { "epoch": 0.010137581462708182, "grad_norm": 0.09408864378929138, "learning_rate": 2.9770207013942776e-05, "loss": 10.7388, "step": 2030 }, { "epoch": 0.010187520287647632, "grad_norm": 0.09455525130033493, "learning_rate": 2.976870509900123e-05, "loss": 10.7401, "step": 2040 }, { "epoch": 0.010237459112587081, "grad_norm": 0.09249377250671387, "learning_rate": 2.9767203184059677e-05, "loss": 10.7395, "step": 2050 }, { "epoch": 0.01028739793752653, "grad_norm": 0.09711704403162003, "learning_rate": 2.9765701269118124e-05, "loss": 10.7387, "step": 2060 }, { "epoch": 0.01033733676246598, "grad_norm": 0.09462075680494308, "learning_rate": 2.9764199354176577e-05, "loss": 10.7389, "step": 2070 }, { "epoch": 0.010387275587405428, "grad_norm": 0.09138345718383789, "learning_rate": 2.9762697439235024e-05, "loss": 10.7381, "step": 2080 }, { "epoch": 0.010437214412344877, "grad_norm": 0.0979338064789772, "learning_rate": 2.9761195524293477e-05, "loss": 10.7373, "step": 2090 }, { "epoch": 0.010487153237284327, "grad_norm": 428.1760559082031, "learning_rate": 2.9759693609351924e-05, "loss": 10.7368, "step": 2100 }, { "epoch": 0.010537092062223776, "grad_norm": 0.09210320562124252, "learning_rate": 2.975819169441037e-05, "loss": 11.136, "step": 2110 }, { "epoch": 0.010587030887163226, "grad_norm": 0.09596727043390274, "learning_rate": 2.9756689779468825e-05, "loss": 12.8231, "step": 2120 }, { "epoch": 0.010636969712102675, "grad_norm": 0.10231532901525497, "learning_rate": 2.975518786452727e-05, "loss": 10.8774, "step": 2130 }, { "epoch": 0.010686908537042123, "grad_norm": 0.09213022887706757, "learning_rate": 2.9753685949585725e-05, "loss": 10.7361, "step": 2140 }, { "epoch": 0.010736847361981572, "grad_norm": 0.0937543660402298, "learning_rate": 2.9752184034644172e-05, "loss": 10.7364, "step": 2150 }, { "epoch": 0.010786786186921022, "grad_norm": 0.09011025726795197, "learning_rate": 2.975068211970262e-05, "loss": 10.7361, "step": 2160 }, { "epoch": 0.010836725011860471, "grad_norm": 0.09514773637056351, "learning_rate": 2.9749180204761072e-05, "loss": 10.7348, "step": 2170 }, { "epoch": 0.01088666383679992, "grad_norm": 0.0912405401468277, "learning_rate": 2.974767828981952e-05, "loss": 10.7349, "step": 2180 }, { "epoch": 0.01093660266173937, "grad_norm": 0.10206656157970428, "learning_rate": 2.9746176374877972e-05, "loss": 10.7357, "step": 2190 }, { "epoch": 0.010986541486678818, "grad_norm": 0.10607360303401947, "learning_rate": 2.974467445993642e-05, "loss": 10.7347, "step": 2200 }, { "epoch": 0.011036480311618267, "grad_norm": 0.09714030474424362, "learning_rate": 2.9743172544994866e-05, "loss": 10.7333, "step": 2210 }, { "epoch": 0.011086419136557717, "grad_norm": 0.09556932747364044, "learning_rate": 2.974167063005332e-05, "loss": 10.7338, "step": 2220 }, { "epoch": 0.011136357961497166, "grad_norm": 0.09247563034296036, "learning_rate": 2.9740168715111766e-05, "loss": 10.7334, "step": 2230 }, { "epoch": 0.011186296786436616, "grad_norm": 0.08850020170211792, "learning_rate": 2.973866680017022e-05, "loss": 10.7334, "step": 2240 }, { "epoch": 0.011236235611376065, "grad_norm": 0.09381990134716034, "learning_rate": 2.9737164885228667e-05, "loss": 10.7324, "step": 2250 }, { "epoch": 0.011286174436315513, "grad_norm": 0.09170781821012497, "learning_rate": 2.9735662970287114e-05, "loss": 10.7327, "step": 2260 }, { "epoch": 0.011336113261254962, "grad_norm": 0.09936770796775818, "learning_rate": 2.9734161055345567e-05, "loss": 10.7321, "step": 2270 }, { "epoch": 0.011386052086194412, "grad_norm": 0.09585506469011307, "learning_rate": 2.9732659140404014e-05, "loss": 10.7322, "step": 2280 }, { "epoch": 0.011435990911133861, "grad_norm": 0.0958356037735939, "learning_rate": 2.9731157225462467e-05, "loss": 10.7323, "step": 2290 }, { "epoch": 0.01148592973607331, "grad_norm": 0.09194158017635345, "learning_rate": 2.9729655310520914e-05, "loss": 10.7318, "step": 2300 }, { "epoch": 0.01153586856101276, "grad_norm": 0.09054262191057205, "learning_rate": 2.9728153395579364e-05, "loss": 10.7313, "step": 2310 }, { "epoch": 0.011585807385952208, "grad_norm": 0.09786096960306168, "learning_rate": 2.9726651480637815e-05, "loss": 10.7306, "step": 2320 }, { "epoch": 0.011635746210891657, "grad_norm": 0.09685620665550232, "learning_rate": 2.972514956569626e-05, "loss": 10.731, "step": 2330 }, { "epoch": 0.011685685035831107, "grad_norm": 0.09197534620761871, "learning_rate": 2.9723647650754715e-05, "loss": 10.7293, "step": 2340 }, { "epoch": 0.011735623860770556, "grad_norm": 0.09799861162900925, "learning_rate": 2.9722145735813162e-05, "loss": 10.7295, "step": 2350 }, { "epoch": 0.011785562685710006, "grad_norm": 0.09268220514059067, "learning_rate": 2.9720643820871612e-05, "loss": 10.7298, "step": 2360 }, { "epoch": 0.011835501510649455, "grad_norm": 0.09487041085958481, "learning_rate": 2.9719141905930062e-05, "loss": 10.7286, "step": 2370 }, { "epoch": 0.011885440335588903, "grad_norm": 0.09200552850961685, "learning_rate": 2.971763999098851e-05, "loss": 10.7287, "step": 2380 }, { "epoch": 0.011935379160528352, "grad_norm": 0.09358713030815125, "learning_rate": 2.9716138076046962e-05, "loss": 10.7276, "step": 2390 }, { "epoch": 0.011985317985467802, "grad_norm": 0.09549114853143692, "learning_rate": 2.971463616110541e-05, "loss": 10.7284, "step": 2400 }, { "epoch": 0.012035256810407251, "grad_norm": 0.09079451113939285, "learning_rate": 2.971313424616386e-05, "loss": 10.7274, "step": 2410 }, { "epoch": 0.0120851956353467, "grad_norm": 0.09617462754249573, "learning_rate": 2.971163233122231e-05, "loss": 10.7274, "step": 2420 }, { "epoch": 0.01213513446028615, "grad_norm": 0.09577368944883347, "learning_rate": 2.9710130416280756e-05, "loss": 10.7275, "step": 2430 }, { "epoch": 0.0121850732852256, "grad_norm": 0.09303394705057144, "learning_rate": 2.970862850133921e-05, "loss": 10.7257, "step": 2440 }, { "epoch": 0.012235012110165047, "grad_norm": 0.09505324810743332, "learning_rate": 2.9707126586397657e-05, "loss": 10.726, "step": 2450 }, { "epoch": 0.012284950935104497, "grad_norm": 0.0933798998594284, "learning_rate": 2.9705624671456107e-05, "loss": 10.7261, "step": 2460 }, { "epoch": 0.012334889760043946, "grad_norm": 0.09893235564231873, "learning_rate": 2.9704122756514557e-05, "loss": 10.7264, "step": 2470 }, { "epoch": 0.012384828584983396, "grad_norm": 0.08968345075845718, "learning_rate": 2.9702620841573004e-05, "loss": 10.7259, "step": 2480 }, { "epoch": 0.012434767409922845, "grad_norm": 0.09330223500728607, "learning_rate": 2.9701118926631458e-05, "loss": 10.7266, "step": 2490 }, { "epoch": 0.012484706234862294, "grad_norm": 0.10579249262809753, "learning_rate": 2.9699617011689904e-05, "loss": 10.7249, "step": 2500 }, { "epoch": 0.012534645059801742, "grad_norm": 0.0928342342376709, "learning_rate": 2.9698115096748354e-05, "loss": 10.7238, "step": 2510 }, { "epoch": 0.012584583884741192, "grad_norm": 0.09858880937099457, "learning_rate": 2.9696613181806805e-05, "loss": 10.7247, "step": 2520 }, { "epoch": 0.012634522709680641, "grad_norm": 0.09675079584121704, "learning_rate": 2.969511126686525e-05, "loss": 10.7236, "step": 2530 }, { "epoch": 0.01268446153462009, "grad_norm": 0.09549619257450104, "learning_rate": 2.9693609351923705e-05, "loss": 10.7238, "step": 2540 }, { "epoch": 0.01273440035955954, "grad_norm": 0.09920501708984375, "learning_rate": 2.9692107436982152e-05, "loss": 10.7236, "step": 2550 }, { "epoch": 0.01278433918449899, "grad_norm": 0.09226509928703308, "learning_rate": 2.9690605522040602e-05, "loss": 10.7233, "step": 2560 }, { "epoch": 0.012834278009438437, "grad_norm": 0.09456513077020645, "learning_rate": 2.9689103607099052e-05, "loss": 10.7237, "step": 2570 }, { "epoch": 0.012884216834377887, "grad_norm": 0.09502000361680984, "learning_rate": 2.96876016921575e-05, "loss": 10.7231, "step": 2580 }, { "epoch": 0.012934155659317336, "grad_norm": 0.09463300555944443, "learning_rate": 2.9686099777215953e-05, "loss": 10.722, "step": 2590 }, { "epoch": 0.012984094484256786, "grad_norm": 0.09156177937984467, "learning_rate": 2.96845978622744e-05, "loss": 10.722, "step": 2600 }, { "epoch": 0.013034033309196235, "grad_norm": 0.09233105927705765, "learning_rate": 2.968309594733285e-05, "loss": 10.7217, "step": 2610 }, { "epoch": 0.013083972134135684, "grad_norm": 0.0961083248257637, "learning_rate": 2.96815940323913e-05, "loss": 10.7208, "step": 2620 }, { "epoch": 0.013133910959075132, "grad_norm": 0.0997881218791008, "learning_rate": 2.968009211744975e-05, "loss": 10.7205, "step": 2630 }, { "epoch": 0.013183849784014582, "grad_norm": 0.09116201847791672, "learning_rate": 2.96785902025082e-05, "loss": 10.72, "step": 2640 }, { "epoch": 0.013233788608954031, "grad_norm": 0.0893169716000557, "learning_rate": 2.9677088287566647e-05, "loss": 10.7206, "step": 2650 }, { "epoch": 0.01328372743389348, "grad_norm": 0.09925134479999542, "learning_rate": 2.9675586372625097e-05, "loss": 31.1372, "step": 2660 }, { "epoch": 0.01333366625883293, "grad_norm": 0.09098075330257416, "learning_rate": 2.9674084457683547e-05, "loss": 10.7191, "step": 2670 }, { "epoch": 0.01338360508377238, "grad_norm": 0.09981559216976166, "learning_rate": 2.9672582542741997e-05, "loss": 18.3812, "step": 2680 }, { "epoch": 0.013433543908711827, "grad_norm": 0.10291223227977753, "learning_rate": 2.9671080627800448e-05, "loss": 10.7193, "step": 2690 }, { "epoch": 0.013483482733651277, "grad_norm": 0.09718188643455505, "learning_rate": 2.9669578712858894e-05, "loss": 10.789, "step": 2700 }, { "epoch": 0.013533421558590726, "grad_norm": 0.1061011552810669, "learning_rate": 2.9668076797917344e-05, "loss": 10.7188, "step": 2710 }, { "epoch": 0.013583360383530176, "grad_norm": 65854.7578125, "learning_rate": 2.9666574882975795e-05, "loss": 11.0129, "step": 2720 }, { "epoch": 0.013633299208469625, "grad_norm": 743.2807006835938, "learning_rate": 2.9665072968034245e-05, "loss": 10.7305, "step": 2730 }, { "epoch": 0.013683238033409074, "grad_norm": 0.09718185663223267, "learning_rate": 2.9663571053092695e-05, "loss": 11.708, "step": 2740 }, { "epoch": 0.013733176858348524, "grad_norm": 0.09672287851572037, "learning_rate": 2.9662069138151142e-05, "loss": 10.7177, "step": 2750 }, { "epoch": 0.013783115683287972, "grad_norm": 0.093637615442276, "learning_rate": 2.9660567223209595e-05, "loss": 10.8841, "step": 2760 }, { "epoch": 0.013833054508227421, "grad_norm": 0.10001079738140106, "learning_rate": 2.9659065308268042e-05, "loss": 10.8078, "step": 2770 }, { "epoch": 0.01388299333316687, "grad_norm": 0.09189160168170929, "learning_rate": 2.9657563393326492e-05, "loss": 11.6597, "step": 2780 }, { "epoch": 0.01393293215810632, "grad_norm": 9842.095703125, "learning_rate": 2.9656061478384943e-05, "loss": 13.5412, "step": 2790 }, { "epoch": 0.01398287098304577, "grad_norm": 0.09831225126981735, "learning_rate": 2.965455956344339e-05, "loss": 10.7168, "step": 2800 }, { "epoch": 0.014032809807985219, "grad_norm": 0.09885016083717346, "learning_rate": 2.9653057648501843e-05, "loss": 10.7171, "step": 2810 }, { "epoch": 0.014082748632924667, "grad_norm": 0.09697610884904861, "learning_rate": 2.965155573356029e-05, "loss": 10.7158, "step": 2820 }, { "epoch": 0.014132687457864116, "grad_norm": 0.09410693496465683, "learning_rate": 2.965005381861874e-05, "loss": 10.7153, "step": 2830 }, { "epoch": 0.014182626282803566, "grad_norm": 0.09180265665054321, "learning_rate": 2.964855190367719e-05, "loss": 10.7161, "step": 2840 }, { "epoch": 0.014232565107743015, "grad_norm": 0.10008688271045685, "learning_rate": 2.9647049988735637e-05, "loss": 10.7156, "step": 2850 }, { "epoch": 0.014282503932682464, "grad_norm": 0.09007236361503601, "learning_rate": 2.964554807379409e-05, "loss": 10.7152, "step": 2860 }, { "epoch": 0.014332442757621914, "grad_norm": 0.09538665413856506, "learning_rate": 2.9644046158852537e-05, "loss": 10.7139, "step": 2870 }, { "epoch": 0.014382381582561362, "grad_norm": 0.09514635801315308, "learning_rate": 2.9642544243910987e-05, "loss": 10.7136, "step": 2880 }, { "epoch": 0.014432320407500811, "grad_norm": 0.09610595554113388, "learning_rate": 2.9641042328969438e-05, "loss": 10.7138, "step": 2890 }, { "epoch": 0.01448225923244026, "grad_norm": 0.09452015161514282, "learning_rate": 2.9639540414027884e-05, "loss": 10.7137, "step": 2900 }, { "epoch": 0.01453219805737971, "grad_norm": 0.09626973420381546, "learning_rate": 2.9638038499086338e-05, "loss": 10.7121, "step": 2910 }, { "epoch": 0.01458213688231916, "grad_norm": 0.0968800038099289, "learning_rate": 2.9636536584144785e-05, "loss": 10.7127, "step": 2920 }, { "epoch": 0.014632075707258609, "grad_norm": 0.09562117606401443, "learning_rate": 2.9635034669203235e-05, "loss": 10.7123, "step": 2930 }, { "epoch": 0.014682014532198057, "grad_norm": 0.09600751847028732, "learning_rate": 2.9633532754261685e-05, "loss": 10.7121, "step": 2940 }, { "epoch": 0.014731953357137506, "grad_norm": 0.09042752534151077, "learning_rate": 2.9632030839320135e-05, "loss": 10.7112, "step": 2950 }, { "epoch": 0.014781892182076956, "grad_norm": 0.09635340422391891, "learning_rate": 2.9630528924378585e-05, "loss": 10.7109, "step": 2960 }, { "epoch": 0.014831831007016405, "grad_norm": 0.09323673695325851, "learning_rate": 2.9629027009437032e-05, "loss": 10.7115, "step": 2970 }, { "epoch": 0.014881769831955854, "grad_norm": 0.09715006500482559, "learning_rate": 2.9627525094495482e-05, "loss": 10.7108, "step": 2980 }, { "epoch": 0.014931708656895304, "grad_norm": 0.09540782868862152, "learning_rate": 2.9626023179553933e-05, "loss": 10.7107, "step": 2990 }, { "epoch": 0.014981647481834752, "grad_norm": 0.09565411508083344, "learning_rate": 2.9624521264612383e-05, "loss": 10.7098, "step": 3000 }, { "epoch": 0.015031586306774201, "grad_norm": 0.09608177095651627, "learning_rate": 2.9623019349670833e-05, "loss": 10.7095, "step": 3010 }, { "epoch": 0.01508152513171365, "grad_norm": 0.09020178765058517, "learning_rate": 2.962151743472928e-05, "loss": 10.7091, "step": 3020 }, { "epoch": 0.0151314639566531, "grad_norm": 0.09340853989124298, "learning_rate": 2.962001551978773e-05, "loss": 10.7083, "step": 3030 }, { "epoch": 0.01518140278159255, "grad_norm": 0.09446629136800766, "learning_rate": 2.961851360484618e-05, "loss": 10.7087, "step": 3040 }, { "epoch": 0.015231341606531999, "grad_norm": 0.0978376492857933, "learning_rate": 2.961701168990463e-05, "loss": 10.7081, "step": 3050 }, { "epoch": 0.015281280431471447, "grad_norm": 0.09334233403205872, "learning_rate": 2.961550977496308e-05, "loss": 10.709, "step": 3060 }, { "epoch": 0.015331219256410896, "grad_norm": 0.098800890147686, "learning_rate": 2.9614007860021527e-05, "loss": 10.7079, "step": 3070 }, { "epoch": 0.015381158081350346, "grad_norm": 0.09233418852090836, "learning_rate": 2.9612505945079977e-05, "loss": 10.7071, "step": 3080 }, { "epoch": 0.015431096906289795, "grad_norm": 0.0931033119559288, "learning_rate": 2.9611004030138428e-05, "loss": 10.7077, "step": 3090 }, { "epoch": 0.015481035731229244, "grad_norm": 0.08983912318944931, "learning_rate": 2.9609502115196878e-05, "loss": 10.7069, "step": 3100 }, { "epoch": 0.015530974556168694, "grad_norm": 0.09569084644317627, "learning_rate": 2.9608000200255328e-05, "loss": 10.7061, "step": 3110 }, { "epoch": 0.015580913381108143, "grad_norm": 0.09349966049194336, "learning_rate": 2.9606498285313775e-05, "loss": 10.7061, "step": 3120 }, { "epoch": 0.015630852206047593, "grad_norm": 0.09354298561811447, "learning_rate": 2.9604996370372225e-05, "loss": 10.7061, "step": 3130 }, { "epoch": 0.015680791030987042, "grad_norm": 0.09295152127742767, "learning_rate": 2.9603494455430675e-05, "loss": 10.7059, "step": 3140 }, { "epoch": 0.01573072985592649, "grad_norm": 0.09449790418148041, "learning_rate": 2.9601992540489125e-05, "loss": 10.7059, "step": 3150 }, { "epoch": 0.015780668680865938, "grad_norm": 0.0972711518406868, "learning_rate": 2.9600490625547575e-05, "loss": 10.7049, "step": 3160 }, { "epoch": 0.015830607505805387, "grad_norm": 0.09353794902563095, "learning_rate": 2.9598988710606022e-05, "loss": 10.7047, "step": 3170 }, { "epoch": 0.015880546330744837, "grad_norm": 0.09542354941368103, "learning_rate": 2.9597486795664472e-05, "loss": 10.7048, "step": 3180 }, { "epoch": 0.015930485155684286, "grad_norm": 0.09505563974380493, "learning_rate": 2.9595984880722923e-05, "loss": 10.7042, "step": 3190 }, { "epoch": 0.015980423980623736, "grad_norm": 0.09707723557949066, "learning_rate": 2.9594482965781373e-05, "loss": 10.7039, "step": 3200 }, { "epoch": 0.016030362805563185, "grad_norm": 0.09463075548410416, "learning_rate": 2.9592981050839823e-05, "loss": 10.7039, "step": 3210 }, { "epoch": 0.016080301630502634, "grad_norm": 0.09552239626646042, "learning_rate": 2.959147913589827e-05, "loss": 10.7023, "step": 3220 }, { "epoch": 0.016130240455442084, "grad_norm": 0.0979151502251625, "learning_rate": 2.958997722095672e-05, "loss": 10.7026, "step": 3230 }, { "epoch": 0.016180179280381533, "grad_norm": 0.0954020619392395, "learning_rate": 2.958847530601517e-05, "loss": 10.702, "step": 3240 }, { "epoch": 0.016230118105320983, "grad_norm": 0.09606318175792694, "learning_rate": 2.958697339107362e-05, "loss": 10.702, "step": 3250 }, { "epoch": 0.016280056930260432, "grad_norm": 0.09881041944026947, "learning_rate": 2.958547147613207e-05, "loss": 10.7023, "step": 3260 }, { "epoch": 0.01632999575519988, "grad_norm": 0.09216438978910446, "learning_rate": 2.958396956119052e-05, "loss": 10.7006, "step": 3270 }, { "epoch": 0.016379934580139328, "grad_norm": 0.09119243174791336, "learning_rate": 2.9582467646248967e-05, "loss": 10.7009, "step": 3280 }, { "epoch": 0.016429873405078777, "grad_norm": 0.09380321204662323, "learning_rate": 2.9580965731307418e-05, "loss": 10.7007, "step": 3290 }, { "epoch": 0.016479812230018227, "grad_norm": 0.0956210270524025, "learning_rate": 2.9579463816365868e-05, "loss": 10.7, "step": 3300 }, { "epoch": 0.016529751054957676, "grad_norm": 0.0973009392619133, "learning_rate": 2.9577961901424318e-05, "loss": 10.6998, "step": 3310 }, { "epoch": 0.016579689879897126, "grad_norm": 0.09392161667346954, "learning_rate": 2.9576459986482768e-05, "loss": 10.7002, "step": 3320 }, { "epoch": 0.016629628704836575, "grad_norm": 0.09009403735399246, "learning_rate": 2.9574958071541215e-05, "loss": 10.7001, "step": 3330 }, { "epoch": 0.016679567529776024, "grad_norm": 0.09653884917497635, "learning_rate": 2.9573456156599665e-05, "loss": 10.6986, "step": 3340 }, { "epoch": 0.016729506354715474, "grad_norm": 0.09588049352169037, "learning_rate": 2.9571954241658115e-05, "loss": 10.6994, "step": 3350 }, { "epoch": 0.016779445179654923, "grad_norm": 0.09776197373867035, "learning_rate": 2.9570452326716565e-05, "loss": 10.6991, "step": 3360 }, { "epoch": 0.016829384004594373, "grad_norm": 0.09142053127288818, "learning_rate": 2.9568950411775016e-05, "loss": 10.6981, "step": 3370 }, { "epoch": 0.016879322829533822, "grad_norm": 0.09754452109336853, "learning_rate": 2.9567448496833462e-05, "loss": 10.6978, "step": 3380 }, { "epoch": 0.01692926165447327, "grad_norm": 0.09043591469526291, "learning_rate": 2.9565946581891913e-05, "loss": 10.6968, "step": 3390 }, { "epoch": 0.016979200479412718, "grad_norm": 0.09461095184087753, "learning_rate": 2.9564444666950363e-05, "loss": 10.6971, "step": 3400 }, { "epoch": 0.017029139304352167, "grad_norm": 0.10008252412080765, "learning_rate": 2.9562942752008813e-05, "loss": 10.6968, "step": 3410 }, { "epoch": 0.017079078129291617, "grad_norm": 0.09465460479259491, "learning_rate": 2.9561440837067263e-05, "loss": 10.6963, "step": 3420 }, { "epoch": 0.017129016954231066, "grad_norm": 0.09323673695325851, "learning_rate": 2.955993892212571e-05, "loss": 10.6959, "step": 3430 }, { "epoch": 0.017178955779170516, "grad_norm": 0.09518439322710037, "learning_rate": 2.955843700718416e-05, "loss": 10.6961, "step": 3440 }, { "epoch": 0.017228894604109965, "grad_norm": 0.09833721816539764, "learning_rate": 2.955693509224261e-05, "loss": 10.6954, "step": 3450 }, { "epoch": 0.017278833429049414, "grad_norm": 0.09518902748823166, "learning_rate": 2.955543317730106e-05, "loss": 10.6944, "step": 3460 }, { "epoch": 0.017328772253988864, "grad_norm": 0.10006536543369293, "learning_rate": 2.955393126235951e-05, "loss": 10.6956, "step": 3470 }, { "epoch": 0.017378711078928313, "grad_norm": 0.08888301998376846, "learning_rate": 2.9552429347417957e-05, "loss": 10.6955, "step": 3480 }, { "epoch": 0.017428649903867763, "grad_norm": 0.09507909417152405, "learning_rate": 2.9550927432476408e-05, "loss": 10.695, "step": 3490 }, { "epoch": 0.017478588728807212, "grad_norm": 0.09857828170061111, "learning_rate": 2.9549425517534858e-05, "loss": 10.6974, "step": 3500 }, { "epoch": 0.01752852755374666, "grad_norm": 0.09427110850811005, "learning_rate": 2.9547923602593308e-05, "loss": 10.6932, "step": 3510 }, { "epoch": 0.01757846637868611, "grad_norm": 0.08865749090909958, "learning_rate": 2.9546421687651758e-05, "loss": 10.6936, "step": 3520 }, { "epoch": 0.017628405203625557, "grad_norm": 0.09774231165647507, "learning_rate": 2.9544919772710205e-05, "loss": 10.6936, "step": 3530 }, { "epoch": 0.017678344028565007, "grad_norm": 0.10259677469730377, "learning_rate": 2.9543417857768655e-05, "loss": 10.6929, "step": 3540 }, { "epoch": 0.017728282853504456, "grad_norm": 0.09243781864643097, "learning_rate": 2.9541915942827105e-05, "loss": 10.6924, "step": 3550 }, { "epoch": 0.017778221678443906, "grad_norm": 0.08847206085920334, "learning_rate": 2.9540414027885555e-05, "loss": 10.6917, "step": 3560 }, { "epoch": 0.017828160503383355, "grad_norm": 0.09328203648328781, "learning_rate": 2.9538912112944006e-05, "loss": 10.692, "step": 3570 }, { "epoch": 0.017878099328322804, "grad_norm": 0.09655408561229706, "learning_rate": 2.9537410198002452e-05, "loss": 10.6916, "step": 3580 }, { "epoch": 0.017928038153262254, "grad_norm": 0.09533986449241638, "learning_rate": 2.9535908283060903e-05, "loss": 10.6912, "step": 3590 }, { "epoch": 0.017977976978201703, "grad_norm": 0.09806837886571884, "learning_rate": 2.9534406368119353e-05, "loss": 10.6905, "step": 3600 }, { "epoch": 0.018027915803141153, "grad_norm": 0.09532346576452255, "learning_rate": 2.9532904453177803e-05, "loss": 10.6909, "step": 3610 }, { "epoch": 0.018077854628080602, "grad_norm": 0.09362402558326721, "learning_rate": 2.9531402538236253e-05, "loss": 10.6905, "step": 3620 }, { "epoch": 0.01812779345302005, "grad_norm": 0.09821821749210358, "learning_rate": 2.95299006232947e-05, "loss": 10.6909, "step": 3630 }, { "epoch": 0.0181777322779595, "grad_norm": 0.09857241809368134, "learning_rate": 2.9528398708353153e-05, "loss": 10.6896, "step": 3640 }, { "epoch": 0.018227671102898947, "grad_norm": 0.0939745232462883, "learning_rate": 2.95268967934116e-05, "loss": 10.6893, "step": 3650 }, { "epoch": 0.018277609927838397, "grad_norm": 0.09561046957969666, "learning_rate": 2.952539487847005e-05, "loss": 10.6889, "step": 3660 }, { "epoch": 0.018327548752777846, "grad_norm": 0.09186465293169022, "learning_rate": 2.95238929635285e-05, "loss": 10.6892, "step": 3670 }, { "epoch": 0.018377487577717296, "grad_norm": 0.1008329913020134, "learning_rate": 2.9522391048586947e-05, "loss": 10.6882, "step": 3680 }, { "epoch": 0.018427426402656745, "grad_norm": 0.09180609881877899, "learning_rate": 2.95208891336454e-05, "loss": 10.6886, "step": 3690 }, { "epoch": 0.018477365227596194, "grad_norm": 0.09365356713533401, "learning_rate": 2.9519387218703848e-05, "loss": 10.6877, "step": 3700 }, { "epoch": 0.018527304052535644, "grad_norm": 0.09670042246580124, "learning_rate": 2.9517885303762298e-05, "loss": 10.6865, "step": 3710 }, { "epoch": 0.018577242877475093, "grad_norm": 0.1001424565911293, "learning_rate": 2.9516383388820748e-05, "loss": 10.6872, "step": 3720 }, { "epoch": 0.018627181702414543, "grad_norm": 0.0908576175570488, "learning_rate": 2.9514881473879195e-05, "loss": 10.6879, "step": 3730 }, { "epoch": 0.018677120527353992, "grad_norm": 0.0932522565126419, "learning_rate": 2.951337955893765e-05, "loss": 10.6877, "step": 3740 }, { "epoch": 0.01872705935229344, "grad_norm": 0.0923110842704773, "learning_rate": 2.9511877643996095e-05, "loss": 10.6855, "step": 3750 }, { "epoch": 0.01877699817723289, "grad_norm": 0.09654710441827774, "learning_rate": 2.9510375729054545e-05, "loss": 10.6861, "step": 3760 }, { "epoch": 0.01882693700217234, "grad_norm": 0.09733393788337708, "learning_rate": 2.9508873814112996e-05, "loss": 10.6865, "step": 3770 }, { "epoch": 0.018876875827111787, "grad_norm": 0.09079433232545853, "learning_rate": 2.9507371899171442e-05, "loss": 10.6857, "step": 3780 }, { "epoch": 0.018926814652051236, "grad_norm": 0.09449440240859985, "learning_rate": 2.9505869984229896e-05, "loss": 10.6846, "step": 3790 }, { "epoch": 0.018976753476990686, "grad_norm": 0.0952952429652214, "learning_rate": 2.9504368069288343e-05, "loss": 10.6846, "step": 3800 }, { "epoch": 0.019026692301930135, "grad_norm": 0.09154447913169861, "learning_rate": 2.9502866154346793e-05, "loss": 10.6848, "step": 3810 }, { "epoch": 0.019076631126869584, "grad_norm": 0.09125672280788422, "learning_rate": 2.9501364239405243e-05, "loss": 10.6841, "step": 3820 }, { "epoch": 0.019126569951809034, "grad_norm": 0.09873983263969421, "learning_rate": 2.949986232446369e-05, "loss": 10.6839, "step": 3830 }, { "epoch": 0.019176508776748483, "grad_norm": 0.09625153988599777, "learning_rate": 2.9498360409522143e-05, "loss": 10.6842, "step": 3840 }, { "epoch": 0.019226447601687933, "grad_norm": 0.09592974185943604, "learning_rate": 2.949685849458059e-05, "loss": 10.6827, "step": 3850 }, { "epoch": 0.019276386426627382, "grad_norm": 0.10091127455234528, "learning_rate": 2.949535657963904e-05, "loss": 10.684, "step": 3860 }, { "epoch": 0.01932632525156683, "grad_norm": 0.09118268638849258, "learning_rate": 2.949385466469749e-05, "loss": 10.6836, "step": 3870 }, { "epoch": 0.01937626407650628, "grad_norm": 0.09394898265600204, "learning_rate": 2.9492352749755937e-05, "loss": 10.6824, "step": 3880 }, { "epoch": 0.01942620290144573, "grad_norm": 0.08936263620853424, "learning_rate": 2.949085083481439e-05, "loss": 10.6821, "step": 3890 }, { "epoch": 0.019476141726385177, "grad_norm": 0.09612365067005157, "learning_rate": 2.9489348919872838e-05, "loss": 10.6827, "step": 3900 }, { "epoch": 0.019526080551324626, "grad_norm": 0.09695883840322495, "learning_rate": 2.9487847004931288e-05, "loss": 10.6824, "step": 3910 }, { "epoch": 0.019576019376264076, "grad_norm": 0.0948113203048706, "learning_rate": 2.9486345089989738e-05, "loss": 10.6813, "step": 3920 }, { "epoch": 0.019625958201203525, "grad_norm": 0.09577417373657227, "learning_rate": 2.9484843175048185e-05, "loss": 10.6822, "step": 3930 }, { "epoch": 0.019675897026142974, "grad_norm": 0.09592152386903763, "learning_rate": 2.948334126010664e-05, "loss": 10.6808, "step": 3940 }, { "epoch": 0.019725835851082424, "grad_norm": 0.09353554248809814, "learning_rate": 2.9481839345165085e-05, "loss": 10.68, "step": 3950 }, { "epoch": 0.019775774676021873, "grad_norm": 0.09733685851097107, "learning_rate": 2.948033743022354e-05, "loss": 10.6801, "step": 3960 }, { "epoch": 0.019825713500961323, "grad_norm": 0.09616942703723907, "learning_rate": 2.9478835515281986e-05, "loss": 10.6782, "step": 3970 }, { "epoch": 0.019875652325900772, "grad_norm": 0.091241754591465, "learning_rate": 2.9477333600340432e-05, "loss": 10.6792, "step": 3980 }, { "epoch": 0.01992559115084022, "grad_norm": 0.09594473242759705, "learning_rate": 2.9475831685398886e-05, "loss": 10.6788, "step": 3990 }, { "epoch": 0.01997552997577967, "grad_norm": 0.09622420370578766, "learning_rate": 2.9474329770457333e-05, "loss": 10.679, "step": 4000 }, { "epoch": 0.02002546880071912, "grad_norm": 0.09455501288175583, "learning_rate": 2.9472827855515786e-05, "loss": 10.6778, "step": 4010 }, { "epoch": 0.020075407625658567, "grad_norm": 0.08896750956773758, "learning_rate": 2.9471325940574233e-05, "loss": 10.679, "step": 4020 }, { "epoch": 0.020125346450598016, "grad_norm": 0.09691905975341797, "learning_rate": 2.946982402563268e-05, "loss": 10.6772, "step": 4030 }, { "epoch": 0.020175285275537466, "grad_norm": 0.09754662215709686, "learning_rate": 2.9468322110691134e-05, "loss": 10.6766, "step": 4040 }, { "epoch": 0.020225224100476915, "grad_norm": 0.09269405156373978, "learning_rate": 2.946682019574958e-05, "loss": 10.6776, "step": 4050 }, { "epoch": 0.020275162925416364, "grad_norm": 0.08767396956682205, "learning_rate": 2.9465318280808034e-05, "loss": 10.6769, "step": 4060 }, { "epoch": 0.020325101750355814, "grad_norm": 0.09328394383192062, "learning_rate": 2.946381636586648e-05, "loss": 10.6763, "step": 4070 }, { "epoch": 0.020375040575295263, "grad_norm": 0.09484425187110901, "learning_rate": 2.9462314450924927e-05, "loss": 10.6768, "step": 4080 }, { "epoch": 0.020424979400234713, "grad_norm": 0.09627150744199753, "learning_rate": 2.946081253598338e-05, "loss": 10.6757, "step": 4090 }, { "epoch": 0.020474918225174162, "grad_norm": 0.093561552464962, "learning_rate": 2.9459310621041828e-05, "loss": 10.6754, "step": 4100 }, { "epoch": 0.02052485705011361, "grad_norm": 0.0967199057340622, "learning_rate": 2.945780870610028e-05, "loss": 10.6758, "step": 4110 }, { "epoch": 0.02057479587505306, "grad_norm": 0.09370432049036026, "learning_rate": 2.9456306791158728e-05, "loss": 10.6746, "step": 4120 }, { "epoch": 0.02062473469999251, "grad_norm": 0.10350590944290161, "learning_rate": 2.9454804876217175e-05, "loss": 10.6754, "step": 4130 }, { "epoch": 0.02067467352493196, "grad_norm": 0.09343244880437851, "learning_rate": 2.945330296127563e-05, "loss": 10.6739, "step": 4140 }, { "epoch": 0.020724612349871406, "grad_norm": 0.09434296935796738, "learning_rate": 2.9451801046334075e-05, "loss": 10.6729, "step": 4150 }, { "epoch": 0.020774551174810856, "grad_norm": 0.09708379209041595, "learning_rate": 2.945029913139253e-05, "loss": 10.6736, "step": 4160 }, { "epoch": 0.020824489999750305, "grad_norm": 0.0962655320763588, "learning_rate": 2.9448797216450976e-05, "loss": 10.6728, "step": 4170 }, { "epoch": 0.020874428824689754, "grad_norm": 0.10440271347761154, "learning_rate": 2.9447295301509422e-05, "loss": 10.6742, "step": 4180 }, { "epoch": 0.020924367649629204, "grad_norm": 0.10102055221796036, "learning_rate": 2.9445793386567876e-05, "loss": 10.6736, "step": 4190 }, { "epoch": 0.020974306474568653, "grad_norm": 0.10021764785051346, "learning_rate": 2.9444291471626323e-05, "loss": 10.6728, "step": 4200 }, { "epoch": 0.021024245299508103, "grad_norm": 0.09241849929094315, "learning_rate": 2.9442789556684776e-05, "loss": 10.6724, "step": 4210 }, { "epoch": 0.021074184124447552, "grad_norm": 0.10048867762088776, "learning_rate": 2.9441287641743223e-05, "loss": 10.6714, "step": 4220 }, { "epoch": 0.021124122949387, "grad_norm": 0.09459193050861359, "learning_rate": 2.943978572680167e-05, "loss": 10.6714, "step": 4230 }, { "epoch": 0.02117406177432645, "grad_norm": 0.09565653651952744, "learning_rate": 2.9438283811860124e-05, "loss": 10.6698, "step": 4240 }, { "epoch": 0.0212240005992659, "grad_norm": 0.09747759997844696, "learning_rate": 2.943678189691857e-05, "loss": 10.6708, "step": 4250 }, { "epoch": 0.02127393942420535, "grad_norm": 0.09385592490434647, "learning_rate": 2.9435279981977024e-05, "loss": 10.6712, "step": 4260 }, { "epoch": 0.021323878249144796, "grad_norm": 0.09437979012727737, "learning_rate": 2.943377806703547e-05, "loss": 10.672, "step": 4270 }, { "epoch": 0.021373817074084245, "grad_norm": 0.09702524542808533, "learning_rate": 2.943227615209392e-05, "loss": 10.6702, "step": 4280 }, { "epoch": 0.021423755899023695, "grad_norm": 0.09239441156387329, "learning_rate": 2.943077423715237e-05, "loss": 10.6703, "step": 4290 }, { "epoch": 0.021473694723963144, "grad_norm": 0.09539250284433365, "learning_rate": 2.9429272322210818e-05, "loss": 10.6698, "step": 4300 }, { "epoch": 0.021523633548902594, "grad_norm": 0.09849133342504501, "learning_rate": 2.942777040726927e-05, "loss": 10.6696, "step": 4310 }, { "epoch": 0.021573572373842043, "grad_norm": 0.09273874014616013, "learning_rate": 2.9426268492327718e-05, "loss": 10.6689, "step": 4320 }, { "epoch": 0.021623511198781493, "grad_norm": 0.09297140687704086, "learning_rate": 2.942476657738617e-05, "loss": 10.6676, "step": 4330 }, { "epoch": 0.021673450023720942, "grad_norm": 0.09432685375213623, "learning_rate": 2.942326466244462e-05, "loss": 10.6677, "step": 4340 }, { "epoch": 0.02172338884866039, "grad_norm": 3660.670654296875, "learning_rate": 2.9421762747503065e-05, "loss": 10.674, "step": 4350 }, { "epoch": 0.02177332767359984, "grad_norm": 0.09602420032024384, "learning_rate": 2.942026083256152e-05, "loss": 16.9632, "step": 4360 }, { "epoch": 0.02182326649853929, "grad_norm": 0.09601210057735443, "learning_rate": 2.9418758917619966e-05, "loss": 10.6676, "step": 4370 }, { "epoch": 0.02187320532347874, "grad_norm": 0.08743233233690262, "learning_rate": 2.9417257002678416e-05, "loss": 10.6673, "step": 4380 }, { "epoch": 0.021923144148418186, "grad_norm": 0.10627735406160355, "learning_rate": 2.9415755087736866e-05, "loss": 10.8175, "step": 4390 }, { "epoch": 0.021973082973357635, "grad_norm": 0.09581291675567627, "learning_rate": 2.9414253172795313e-05, "loss": 10.6657, "step": 4400 }, { "epoch": 0.022023021798297085, "grad_norm": 0.09985919296741486, "learning_rate": 2.9412751257853766e-05, "loss": 10.6651, "step": 4410 }, { "epoch": 0.022072960623236534, "grad_norm": 0.09798286855220795, "learning_rate": 2.9411249342912213e-05, "loss": 10.6658, "step": 4420 }, { "epoch": 0.022122899448175984, "grad_norm": 0.09146228432655334, "learning_rate": 2.9409747427970663e-05, "loss": 10.6667, "step": 4430 }, { "epoch": 0.022172838273115433, "grad_norm": 0.10088124871253967, "learning_rate": 2.9408245513029114e-05, "loss": 10.6659, "step": 4440 }, { "epoch": 0.022222777098054883, "grad_norm": 0.09959380328655243, "learning_rate": 2.940674359808756e-05, "loss": 10.666, "step": 4450 }, { "epoch": 0.022272715922994332, "grad_norm": 0.09625295549631119, "learning_rate": 2.9405241683146014e-05, "loss": 10.6653, "step": 4460 }, { "epoch": 0.02232265474793378, "grad_norm": 0.09174207597970963, "learning_rate": 2.940373976820446e-05, "loss": 10.6652, "step": 4470 }, { "epoch": 0.02237259357287323, "grad_norm": 0.09853401780128479, "learning_rate": 2.940223785326291e-05, "loss": 10.6626, "step": 4480 }, { "epoch": 0.02242253239781268, "grad_norm": 0.09681205451488495, "learning_rate": 2.940073593832136e-05, "loss": 10.6643, "step": 4490 }, { "epoch": 0.02247247122275213, "grad_norm": 0.0938250795006752, "learning_rate": 2.9399234023379808e-05, "loss": 10.6627, "step": 4500 }, { "epoch": 0.02252241004769158, "grad_norm": 0.09339260309934616, "learning_rate": 2.939773210843826e-05, "loss": 10.6642, "step": 4510 }, { "epoch": 0.022572348872631025, "grad_norm": 0.0960143581032753, "learning_rate": 2.9396230193496708e-05, "loss": 10.6622, "step": 4520 }, { "epoch": 0.022622287697570475, "grad_norm": 0.09255195409059525, "learning_rate": 2.939472827855516e-05, "loss": 10.6628, "step": 4530 }, { "epoch": 0.022672226522509924, "grad_norm": 0.0997820645570755, "learning_rate": 2.939322636361361e-05, "loss": 10.661, "step": 4540 }, { "epoch": 0.022722165347449374, "grad_norm": 0.09495710581541061, "learning_rate": 2.9391724448672055e-05, "loss": 10.6627, "step": 4550 }, { "epoch": 0.022772104172388823, "grad_norm": 0.09391613304615021, "learning_rate": 2.939022253373051e-05, "loss": 10.6619, "step": 4560 }, { "epoch": 0.022822042997328273, "grad_norm": 0.09510276466608047, "learning_rate": 2.9388720618788956e-05, "loss": 10.6613, "step": 4570 }, { "epoch": 0.022871981822267722, "grad_norm": 0.09492931514978409, "learning_rate": 2.9387218703847406e-05, "loss": 10.6618, "step": 4580 }, { "epoch": 0.02292192064720717, "grad_norm": 0.0956352949142456, "learning_rate": 2.9385716788905856e-05, "loss": 10.6594, "step": 4590 }, { "epoch": 0.02297185947214662, "grad_norm": 0.09539540112018585, "learning_rate": 2.9384214873964306e-05, "loss": 10.6604, "step": 4600 }, { "epoch": 0.02302179829708607, "grad_norm": 0.09625688195228577, "learning_rate": 2.9382712959022756e-05, "loss": 10.6606, "step": 4610 }, { "epoch": 0.02307173712202552, "grad_norm": 0.09467311948537827, "learning_rate": 2.9381211044081203e-05, "loss": 10.6607, "step": 4620 }, { "epoch": 0.02312167594696497, "grad_norm": 0.10024222731590271, "learning_rate": 2.9379709129139653e-05, "loss": 10.6599, "step": 4630 }, { "epoch": 0.023171614771904415, "grad_norm": 0.09454458951950073, "learning_rate": 2.9378207214198104e-05, "loss": 10.6598, "step": 4640 }, { "epoch": 0.023221553596843865, "grad_norm": 0.10061488300561905, "learning_rate": 2.9376705299256554e-05, "loss": 10.6583, "step": 4650 }, { "epoch": 0.023271492421783314, "grad_norm": 0.09349951148033142, "learning_rate": 2.9375203384315004e-05, "loss": 10.6581, "step": 4660 }, { "epoch": 0.023321431246722764, "grad_norm": 0.09272319078445435, "learning_rate": 2.937370146937345e-05, "loss": 10.6588, "step": 4670 }, { "epoch": 0.023371370071662213, "grad_norm": 0.09603145718574524, "learning_rate": 2.93721995544319e-05, "loss": 10.6568, "step": 4680 }, { "epoch": 0.023421308896601663, "grad_norm": 0.09523800760507584, "learning_rate": 2.937069763949035e-05, "loss": 10.6573, "step": 4690 }, { "epoch": 0.023471247721541112, "grad_norm": 0.09391947090625763, "learning_rate": 2.93691957245488e-05, "loss": 10.6567, "step": 4700 }, { "epoch": 0.02352118654648056, "grad_norm": 0.09547725319862366, "learning_rate": 2.936769380960725e-05, "loss": 10.656, "step": 4710 }, { "epoch": 0.02357112537142001, "grad_norm": 0.09450241923332214, "learning_rate": 2.9366191894665698e-05, "loss": 10.6568, "step": 4720 }, { "epoch": 0.02362106419635946, "grad_norm": 0.09558774530887604, "learning_rate": 2.936468997972415e-05, "loss": 10.6559, "step": 4730 }, { "epoch": 0.02367100302129891, "grad_norm": 0.09340886026620865, "learning_rate": 2.93631880647826e-05, "loss": 10.6566, "step": 4740 }, { "epoch": 0.02372094184623836, "grad_norm": 0.09513580799102783, "learning_rate": 2.936168614984105e-05, "loss": 10.6557, "step": 4750 }, { "epoch": 0.023770880671177805, "grad_norm": 0.09321212023496628, "learning_rate": 2.93601842348995e-05, "loss": 10.6559, "step": 4760 }, { "epoch": 0.023820819496117255, "grad_norm": 0.10149511694908142, "learning_rate": 2.9358682319957946e-05, "loss": 10.6545, "step": 4770 }, { "epoch": 0.023870758321056704, "grad_norm": 0.09594745188951492, "learning_rate": 2.9357180405016396e-05, "loss": 10.6542, "step": 4780 }, { "epoch": 0.023920697145996154, "grad_norm": 0.09818349033594131, "learning_rate": 2.9355678490074846e-05, "loss": 10.6542, "step": 4790 }, { "epoch": 0.023970635970935603, "grad_norm": 0.09233896434307098, "learning_rate": 2.9354176575133296e-05, "loss": 10.654, "step": 4800 }, { "epoch": 0.024020574795875053, "grad_norm": 0.09461528062820435, "learning_rate": 2.9352674660191746e-05, "loss": 10.6537, "step": 4810 }, { "epoch": 0.024070513620814502, "grad_norm": 0.10067877173423767, "learning_rate": 2.9351172745250193e-05, "loss": 10.6535, "step": 4820 }, { "epoch": 0.02412045244575395, "grad_norm": 0.08958308398723602, "learning_rate": 2.9349670830308643e-05, "loss": 10.6532, "step": 4830 }, { "epoch": 0.0241703912706934, "grad_norm": 0.09203724563121796, "learning_rate": 2.9348168915367094e-05, "loss": 10.652, "step": 4840 }, { "epoch": 0.02422033009563285, "grad_norm": 0.11031246185302734, "learning_rate": 2.9346667000425544e-05, "loss": 10.6523, "step": 4850 }, { "epoch": 0.0242702689205723, "grad_norm": 0.09454483538866043, "learning_rate": 2.9345165085483994e-05, "loss": 10.6523, "step": 4860 }, { "epoch": 0.02432020774551175, "grad_norm": 0.0910816341638565, "learning_rate": 2.934366317054244e-05, "loss": 10.6521, "step": 4870 }, { "epoch": 0.0243701465704512, "grad_norm": 0.09349098056554794, "learning_rate": 2.934216125560089e-05, "loss": 10.6514, "step": 4880 }, { "epoch": 0.024420085395390645, "grad_norm": 0.0930526852607727, "learning_rate": 2.934065934065934e-05, "loss": 10.6526, "step": 4890 }, { "epoch": 0.024470024220330094, "grad_norm": 0.09221765398979187, "learning_rate": 2.933915742571779e-05, "loss": 10.652, "step": 4900 }, { "epoch": 0.024519963045269544, "grad_norm": 0.08790691196918488, "learning_rate": 2.933765551077624e-05, "loss": 10.6504, "step": 4910 }, { "epoch": 0.024569901870208993, "grad_norm": 0.09350787848234177, "learning_rate": 2.933615359583469e-05, "loss": 10.6491, "step": 4920 }, { "epoch": 0.024619840695148443, "grad_norm": 0.10474139451980591, "learning_rate": 2.933465168089314e-05, "loss": 10.651, "step": 4930 }, { "epoch": 0.024669779520087892, "grad_norm": 0.09355979412794113, "learning_rate": 2.933314976595159e-05, "loss": 10.6504, "step": 4940 }, { "epoch": 0.02471971834502734, "grad_norm": 0.09748471528291702, "learning_rate": 2.933164785101004e-05, "loss": 10.6502, "step": 4950 }, { "epoch": 0.02476965716996679, "grad_norm": 0.09395548701286316, "learning_rate": 2.933014593606849e-05, "loss": 10.6488, "step": 4960 }, { "epoch": 0.02481959599490624, "grad_norm": 0.09552334249019623, "learning_rate": 2.932864402112694e-05, "loss": 10.6497, "step": 4970 }, { "epoch": 0.02486953481984569, "grad_norm": 0.0992099866271019, "learning_rate": 2.9327142106185386e-05, "loss": 10.6502, "step": 4980 }, { "epoch": 0.02491947364478514, "grad_norm": 0.09515640139579773, "learning_rate": 2.9325640191243836e-05, "loss": 10.6497, "step": 4990 }, { "epoch": 0.02496941246972459, "grad_norm": 0.09259581565856934, "learning_rate": 2.9324138276302286e-05, "loss": 10.648, "step": 5000 }, { "epoch": 0.025019351294664035, "grad_norm": 0.09485942125320435, "learning_rate": 2.9322636361360736e-05, "loss": 10.6474, "step": 5010 }, { "epoch": 0.025069290119603484, "grad_norm": 0.09455141425132751, "learning_rate": 2.9321134446419187e-05, "loss": 10.6474, "step": 5020 }, { "epoch": 0.025119228944542934, "grad_norm": 0.09555475413799286, "learning_rate": 2.9319632531477633e-05, "loss": 10.6471, "step": 5030 }, { "epoch": 0.025169167769482383, "grad_norm": 0.09917616844177246, "learning_rate": 2.9318130616536084e-05, "loss": 10.646, "step": 5040 }, { "epoch": 0.025219106594421833, "grad_norm": 0.09640449285507202, "learning_rate": 2.9316628701594534e-05, "loss": 10.6467, "step": 5050 }, { "epoch": 0.025269045419361282, "grad_norm": 0.09314526617527008, "learning_rate": 2.9315126786652984e-05, "loss": 10.6474, "step": 5060 }, { "epoch": 0.02531898424430073, "grad_norm": 0.09635156393051147, "learning_rate": 2.9313624871711434e-05, "loss": 10.6464, "step": 5070 }, { "epoch": 0.02536892306924018, "grad_norm": 0.09240912646055222, "learning_rate": 2.931212295676988e-05, "loss": 10.6449, "step": 5080 }, { "epoch": 0.02541886189417963, "grad_norm": 0.09308675676584244, "learning_rate": 2.931062104182833e-05, "loss": 10.6455, "step": 5090 }, { "epoch": 0.02546880071911908, "grad_norm": 0.10017043352127075, "learning_rate": 2.930911912688678e-05, "loss": 10.6449, "step": 5100 }, { "epoch": 0.02551873954405853, "grad_norm": 0.1008182093501091, "learning_rate": 2.930761721194523e-05, "loss": 10.6452, "step": 5110 }, { "epoch": 0.02556867836899798, "grad_norm": 0.09373480826616287, "learning_rate": 2.930611529700368e-05, "loss": 10.6442, "step": 5120 }, { "epoch": 0.025618617193937425, "grad_norm": 0.0935322567820549, "learning_rate": 2.930461338206213e-05, "loss": 10.6445, "step": 5130 }, { "epoch": 0.025668556018876874, "grad_norm": 0.08992544561624527, "learning_rate": 2.930311146712058e-05, "loss": 10.6437, "step": 5140 }, { "epoch": 0.025718494843816324, "grad_norm": 0.09083853662014008, "learning_rate": 2.930160955217903e-05, "loss": 10.6428, "step": 5150 }, { "epoch": 0.025768433668755773, "grad_norm": 0.09427718818187714, "learning_rate": 2.930010763723748e-05, "loss": 10.642, "step": 5160 }, { "epoch": 0.025818372493695223, "grad_norm": 0.09764672070741653, "learning_rate": 2.929860572229593e-05, "loss": 10.6418, "step": 5170 }, { "epoch": 0.025868311318634672, "grad_norm": 0.09804851561784744, "learning_rate": 2.9297103807354376e-05, "loss": 10.6429, "step": 5180 }, { "epoch": 0.02591825014357412, "grad_norm": 0.095367930829525, "learning_rate": 2.9295601892412826e-05, "loss": 10.6437, "step": 5190 }, { "epoch": 0.02596818896851357, "grad_norm": 0.09083083271980286, "learning_rate": 2.9294099977471276e-05, "loss": 10.6429, "step": 5200 }, { "epoch": 0.02601812779345302, "grad_norm": 0.09228214621543884, "learning_rate": 2.9292598062529726e-05, "loss": 10.6408, "step": 5210 }, { "epoch": 0.02606806661839247, "grad_norm": 0.09770570695400238, "learning_rate": 2.9291096147588177e-05, "loss": 10.6422, "step": 5220 }, { "epoch": 0.02611800544333192, "grad_norm": 0.0948592945933342, "learning_rate": 2.9289594232646623e-05, "loss": 10.6408, "step": 5230 }, { "epoch": 0.02616794426827137, "grad_norm": 0.09406052529811859, "learning_rate": 2.9288092317705077e-05, "loss": 10.6412, "step": 5240 }, { "epoch": 0.02621788309321082, "grad_norm": 0.10018353909254074, "learning_rate": 2.9286590402763524e-05, "loss": 10.6396, "step": 5250 }, { "epoch": 0.026267821918150264, "grad_norm": 0.08951601386070251, "learning_rate": 2.9285088487821974e-05, "loss": 10.642, "step": 5260 }, { "epoch": 0.026317760743089714, "grad_norm": 0.10409606993198395, "learning_rate": 2.9283586572880424e-05, "loss": 10.6401, "step": 5270 }, { "epoch": 0.026367699568029163, "grad_norm": 0.09470842033624649, "learning_rate": 2.928208465793887e-05, "loss": 10.6407, "step": 5280 }, { "epoch": 0.026417638392968613, "grad_norm": 0.09823372960090637, "learning_rate": 2.9280582742997324e-05, "loss": 10.6392, "step": 5290 }, { "epoch": 0.026467577217908062, "grad_norm": 0.10075034201145172, "learning_rate": 2.927908082805577e-05, "loss": 10.6392, "step": 5300 }, { "epoch": 0.02651751604284751, "grad_norm": 0.09643511474132538, "learning_rate": 2.927757891311422e-05, "loss": 10.6394, "step": 5310 }, { "epoch": 0.02656745486778696, "grad_norm": 0.09283813089132309, "learning_rate": 2.927607699817267e-05, "loss": 10.6378, "step": 5320 }, { "epoch": 0.02661739369272641, "grad_norm": 0.09951145201921463, "learning_rate": 2.927457508323112e-05, "loss": 10.6374, "step": 5330 }, { "epoch": 0.02666733251766586, "grad_norm": 0.0960063636302948, "learning_rate": 2.9273073168289572e-05, "loss": 10.6374, "step": 5340 }, { "epoch": 0.02671727134260531, "grad_norm": 0.09506009519100189, "learning_rate": 2.927157125334802e-05, "loss": 10.6369, "step": 5350 }, { "epoch": 0.02676721016754476, "grad_norm": 0.09488440304994583, "learning_rate": 2.927006933840647e-05, "loss": 10.6387, "step": 5360 }, { "epoch": 0.02681714899248421, "grad_norm": 0.09644606709480286, "learning_rate": 2.926856742346492e-05, "loss": 10.6359, "step": 5370 }, { "epoch": 0.026867087817423654, "grad_norm": 0.09125857800245285, "learning_rate": 2.9267065508523366e-05, "loss": 10.6361, "step": 5380 }, { "epoch": 0.026917026642363104, "grad_norm": 0.09280921518802643, "learning_rate": 2.926556359358182e-05, "loss": 10.6364, "step": 5390 }, { "epoch": 0.026966965467302553, "grad_norm": 0.09589015692472458, "learning_rate": 2.9264061678640266e-05, "loss": 10.6376, "step": 5400 }, { "epoch": 0.027016904292242003, "grad_norm": 0.0969088077545166, "learning_rate": 2.9262559763698716e-05, "loss": 10.6358, "step": 5410 }, { "epoch": 0.027066843117181452, "grad_norm": 0.09490292519330978, "learning_rate": 2.9261057848757167e-05, "loss": 10.6357, "step": 5420 }, { "epoch": 0.0271167819421209, "grad_norm": 0.09488876163959503, "learning_rate": 2.9259555933815613e-05, "loss": 10.6352, "step": 5430 }, { "epoch": 0.02716672076706035, "grad_norm": 0.09537888318300247, "learning_rate": 2.9258054018874067e-05, "loss": 10.6348, "step": 5440 }, { "epoch": 0.0272166595919998, "grad_norm": 0.0909004881978035, "learning_rate": 2.9256552103932514e-05, "loss": 10.6343, "step": 5450 }, { "epoch": 0.02726659841693925, "grad_norm": 0.0989050418138504, "learning_rate": 2.9255050188990964e-05, "loss": 10.6339, "step": 5460 }, { "epoch": 0.0273165372418787, "grad_norm": 0.09519681334495544, "learning_rate": 2.9253548274049414e-05, "loss": 10.6333, "step": 5470 }, { "epoch": 0.02736647606681815, "grad_norm": 0.09233660250902176, "learning_rate": 2.925204635910786e-05, "loss": 10.6336, "step": 5480 }, { "epoch": 0.0274164148917576, "grad_norm": 0.09617756307125092, "learning_rate": 2.9250544444166314e-05, "loss": 10.6345, "step": 5490 }, { "epoch": 0.027466353716697048, "grad_norm": 0.09878206998109818, "learning_rate": 2.924904252922476e-05, "loss": 10.6326, "step": 5500 }, { "epoch": 0.027516292541636494, "grad_norm": 0.10213974118232727, "learning_rate": 2.924754061428321e-05, "loss": 10.6315, "step": 5510 }, { "epoch": 0.027566231366575943, "grad_norm": 0.09810013324022293, "learning_rate": 2.924603869934166e-05, "loss": 10.6324, "step": 5520 }, { "epoch": 0.027616170191515393, "grad_norm": 0.0984368622303009, "learning_rate": 2.924453678440011e-05, "loss": 10.6327, "step": 5530 }, { "epoch": 0.027666109016454842, "grad_norm": 0.094744011759758, "learning_rate": 2.9243034869458562e-05, "loss": 10.6325, "step": 5540 }, { "epoch": 0.02771604784139429, "grad_norm": 0.09556036442518234, "learning_rate": 2.924153295451701e-05, "loss": 10.6324, "step": 5550 }, { "epoch": 0.02776598666633374, "grad_norm": 0.09775315970182419, "learning_rate": 2.9240031039575462e-05, "loss": 10.6308, "step": 5560 }, { "epoch": 0.02781592549127319, "grad_norm": 0.09518972039222717, "learning_rate": 2.923852912463391e-05, "loss": 10.6312, "step": 5570 }, { "epoch": 0.02786586431621264, "grad_norm": 0.10233526676893234, "learning_rate": 2.9237027209692356e-05, "loss": 10.631, "step": 5580 }, { "epoch": 0.02791580314115209, "grad_norm": 0.09543079882860184, "learning_rate": 2.923552529475081e-05, "loss": 10.6295, "step": 5590 }, { "epoch": 0.02796574196609154, "grad_norm": 0.08888249844312668, "learning_rate": 2.9234023379809256e-05, "loss": 10.6316, "step": 5600 }, { "epoch": 0.02801568079103099, "grad_norm": 0.09589431434869766, "learning_rate": 2.923252146486771e-05, "loss": 10.6294, "step": 5610 }, { "epoch": 0.028065619615970438, "grad_norm": 0.09286142885684967, "learning_rate": 2.9231019549926157e-05, "loss": 10.6302, "step": 5620 }, { "epoch": 0.028115558440909884, "grad_norm": 0.09479831159114838, "learning_rate": 2.9229517634984603e-05, "loss": 10.629, "step": 5630 }, { "epoch": 0.028165497265849333, "grad_norm": 0.09502566605806351, "learning_rate": 2.9228015720043057e-05, "loss": 10.6301, "step": 5640 }, { "epoch": 0.028215436090788783, "grad_norm": 0.0879800021648407, "learning_rate": 2.9226513805101504e-05, "loss": 10.6291, "step": 5650 }, { "epoch": 0.028265374915728232, "grad_norm": 0.09621425718069077, "learning_rate": 2.9225011890159957e-05, "loss": 10.6279, "step": 5660 }, { "epoch": 0.02831531374066768, "grad_norm": 0.0952867642045021, "learning_rate": 2.9223509975218404e-05, "loss": 10.629, "step": 5670 }, { "epoch": 0.02836525256560713, "grad_norm": 0.09745540469884872, "learning_rate": 2.922200806027685e-05, "loss": 10.6263, "step": 5680 }, { "epoch": 0.02841519139054658, "grad_norm": 0.10703998804092407, "learning_rate": 2.9220506145335305e-05, "loss": 10.6267, "step": 5690 }, { "epoch": 0.02846513021548603, "grad_norm": 0.09526613354682922, "learning_rate": 2.921900423039375e-05, "loss": 10.6286, "step": 5700 }, { "epoch": 0.02851506904042548, "grad_norm": 0.09355132281780243, "learning_rate": 2.9217502315452205e-05, "loss": 10.6258, "step": 5710 }, { "epoch": 0.02856500786536493, "grad_norm": 0.09581465274095535, "learning_rate": 2.921600040051065e-05, "loss": 10.6266, "step": 5720 }, { "epoch": 0.02861494669030438, "grad_norm": 0.09699860215187073, "learning_rate": 2.92144984855691e-05, "loss": 10.6275, "step": 5730 }, { "epoch": 0.028664885515243828, "grad_norm": 0.09118512272834778, "learning_rate": 2.9212996570627552e-05, "loss": 10.6246, "step": 5740 }, { "epoch": 0.028714824340183274, "grad_norm": 0.09603618830442429, "learning_rate": 2.9211494655686e-05, "loss": 10.6266, "step": 5750 }, { "epoch": 0.028764763165122723, "grad_norm": 0.09384205937385559, "learning_rate": 2.9209992740744452e-05, "loss": 10.6246, "step": 5760 }, { "epoch": 0.028814701990062173, "grad_norm": 0.09352291375398636, "learning_rate": 2.92084908258029e-05, "loss": 10.6245, "step": 5770 }, { "epoch": 0.028864640815001622, "grad_norm": 0.09608279168605804, "learning_rate": 2.9206988910861346e-05, "loss": 10.6247, "step": 5780 }, { "epoch": 0.02891457963994107, "grad_norm": 0.09599608927965164, "learning_rate": 2.92054869959198e-05, "loss": 10.6243, "step": 5790 }, { "epoch": 0.02896451846488052, "grad_norm": 0.10410738736391068, "learning_rate": 2.9203985080978246e-05, "loss": 10.6244, "step": 5800 }, { "epoch": 0.02901445728981997, "grad_norm": 0.10078850388526917, "learning_rate": 2.92024831660367e-05, "loss": 10.6234, "step": 5810 }, { "epoch": 0.02906439611475942, "grad_norm": 0.09706875681877136, "learning_rate": 2.9200981251095147e-05, "loss": 10.6225, "step": 5820 }, { "epoch": 0.02911433493969887, "grad_norm": 0.09206659346818924, "learning_rate": 2.9199479336153593e-05, "loss": 10.6223, "step": 5830 }, { "epoch": 0.02916427376463832, "grad_norm": 0.09456048905849457, "learning_rate": 2.9197977421212047e-05, "loss": 10.6215, "step": 5840 }, { "epoch": 0.02921421258957777, "grad_norm": 0.09819670021533966, "learning_rate": 2.9196475506270494e-05, "loss": 10.6221, "step": 5850 }, { "epoch": 0.029264151414517218, "grad_norm": 0.09750770032405853, "learning_rate": 2.9194973591328947e-05, "loss": 10.6216, "step": 5860 }, { "epoch": 0.029314090239456667, "grad_norm": 0.09041731804609299, "learning_rate": 2.9193471676387394e-05, "loss": 10.6213, "step": 5870 }, { "epoch": 0.029364029064396113, "grad_norm": 0.09223254024982452, "learning_rate": 2.9191969761445844e-05, "loss": 10.6214, "step": 5880 }, { "epoch": 0.029413967889335563, "grad_norm": 0.09175052493810654, "learning_rate": 2.9190467846504295e-05, "loss": 10.62, "step": 5890 }, { "epoch": 0.029463906714275012, "grad_norm": 0.0979679524898529, "learning_rate": 2.918896593156274e-05, "loss": 10.6207, "step": 5900 }, { "epoch": 0.02951384553921446, "grad_norm": 0.0917288288474083, "learning_rate": 2.9187464016621195e-05, "loss": 10.6207, "step": 5910 }, { "epoch": 0.02956378436415391, "grad_norm": 0.09289643913507462, "learning_rate": 2.918596210167964e-05, "loss": 10.6209, "step": 5920 }, { "epoch": 0.02961372318909336, "grad_norm": 0.09195461124181747, "learning_rate": 2.9184460186738092e-05, "loss": 10.6197, "step": 5930 }, { "epoch": 0.02966366201403281, "grad_norm": 0.09683648496866226, "learning_rate": 2.9182958271796542e-05, "loss": 10.6192, "step": 5940 }, { "epoch": 0.02971360083897226, "grad_norm": 0.08719411492347717, "learning_rate": 2.918145635685499e-05, "loss": 10.619, "step": 5950 }, { "epoch": 0.02976353966391171, "grad_norm": 0.0998111441731453, "learning_rate": 2.9179954441913442e-05, "loss": 10.6193, "step": 5960 }, { "epoch": 0.02981347848885116, "grad_norm": 0.09359613060951233, "learning_rate": 2.917845252697189e-05, "loss": 10.618, "step": 5970 }, { "epoch": 0.029863417313790608, "grad_norm": 0.09427807480096817, "learning_rate": 2.917695061203034e-05, "loss": 10.6174, "step": 5980 }, { "epoch": 0.029913356138730057, "grad_norm": 0.09695295989513397, "learning_rate": 2.917544869708879e-05, "loss": 10.6173, "step": 5990 }, { "epoch": 0.029963294963669503, "grad_norm": 0.10022547841072083, "learning_rate": 2.9173946782147236e-05, "loss": 10.6184, "step": 6000 }, { "epoch": 0.030013233788608953, "grad_norm": 0.09429972618818283, "learning_rate": 2.917244486720569e-05, "loss": 10.6166, "step": 6010 }, { "epoch": 0.030063172613548402, "grad_norm": 0.09150966256856918, "learning_rate": 2.9170942952264137e-05, "loss": 10.6176, "step": 6020 }, { "epoch": 0.03011311143848785, "grad_norm": 0.09376371651887894, "learning_rate": 2.9169441037322587e-05, "loss": 10.6167, "step": 6030 }, { "epoch": 0.0301630502634273, "grad_norm": 0.09400255978107452, "learning_rate": 2.9167939122381037e-05, "loss": 10.6147, "step": 6040 }, { "epoch": 0.03021298908836675, "grad_norm": 0.10719768702983856, "learning_rate": 2.9166437207439484e-05, "loss": 10.6168, "step": 6050 }, { "epoch": 0.0302629279133062, "grad_norm": 0.0984666720032692, "learning_rate": 2.9164935292497937e-05, "loss": 10.6148, "step": 6060 }, { "epoch": 0.03031286673824565, "grad_norm": 0.09107008576393127, "learning_rate": 2.9163433377556384e-05, "loss": 10.6148, "step": 6070 }, { "epoch": 0.0303628055631851, "grad_norm": 0.09866014868021011, "learning_rate": 2.9161931462614834e-05, "loss": 10.6146, "step": 6080 }, { "epoch": 0.03041274438812455, "grad_norm": 0.0949750542640686, "learning_rate": 2.9160429547673285e-05, "loss": 10.6148, "step": 6090 }, { "epoch": 0.030462683213063998, "grad_norm": 0.09634987264871597, "learning_rate": 2.915892763273173e-05, "loss": 10.6139, "step": 6100 }, { "epoch": 0.030512622038003447, "grad_norm": 0.09872616082429886, "learning_rate": 2.9157425717790185e-05, "loss": 10.615, "step": 6110 }, { "epoch": 0.030562560862942893, "grad_norm": 0.09585590660572052, "learning_rate": 2.915592380284863e-05, "loss": 10.6138, "step": 6120 }, { "epoch": 0.030612499687882343, "grad_norm": 0.09388266503810883, "learning_rate": 2.9154421887907082e-05, "loss": 10.614, "step": 6130 }, { "epoch": 0.030662438512821792, "grad_norm": 0.10067202895879745, "learning_rate": 2.9152919972965532e-05, "loss": 10.6142, "step": 6140 }, { "epoch": 0.03071237733776124, "grad_norm": 0.09900005161762238, "learning_rate": 2.915141805802398e-05, "loss": 10.6135, "step": 6150 }, { "epoch": 0.03076231616270069, "grad_norm": 0.09707643836736679, "learning_rate": 2.9149916143082432e-05, "loss": 10.6129, "step": 6160 }, { "epoch": 0.03081225498764014, "grad_norm": 0.09792651236057281, "learning_rate": 2.914841422814088e-05, "loss": 10.6117, "step": 6170 }, { "epoch": 0.03086219381257959, "grad_norm": 0.09310603886842728, "learning_rate": 2.914691231319933e-05, "loss": 10.6116, "step": 6180 }, { "epoch": 0.03091213263751904, "grad_norm": 0.09561890363693237, "learning_rate": 2.914541039825778e-05, "loss": 10.6131, "step": 6190 }, { "epoch": 0.03096207146245849, "grad_norm": 0.09028071165084839, "learning_rate": 2.914390848331623e-05, "loss": 10.6135, "step": 6200 }, { "epoch": 0.03101201028739794, "grad_norm": 0.09210582822561264, "learning_rate": 2.914240656837468e-05, "loss": 10.6127, "step": 6210 }, { "epoch": 0.031061949112337388, "grad_norm": 0.09679680317640305, "learning_rate": 2.9140904653433127e-05, "loss": 10.611, "step": 6220 }, { "epoch": 0.031111887937276837, "grad_norm": 0.09834344685077667, "learning_rate": 2.9139402738491577e-05, "loss": 10.6099, "step": 6230 }, { "epoch": 0.031161826762216287, "grad_norm": 0.09306725114583969, "learning_rate": 2.9137900823550027e-05, "loss": 10.6113, "step": 6240 }, { "epoch": 0.031211765587155733, "grad_norm": 0.09439186751842499, "learning_rate": 2.9136398908608477e-05, "loss": 10.6104, "step": 6250 }, { "epoch": 0.031261704412095186, "grad_norm": 0.10274600237607956, "learning_rate": 2.9134896993666927e-05, "loss": 10.6088, "step": 6260 }, { "epoch": 0.031311643237034635, "grad_norm": 0.10353299975395203, "learning_rate": 2.9133395078725374e-05, "loss": 10.6096, "step": 6270 }, { "epoch": 0.031361582061974085, "grad_norm": 0.09838922321796417, "learning_rate": 2.9131893163783824e-05, "loss": 10.6104, "step": 6280 }, { "epoch": 0.031411520886913534, "grad_norm": 0.0952482670545578, "learning_rate": 2.9130391248842275e-05, "loss": 10.6079, "step": 6290 }, { "epoch": 0.03146145971185298, "grad_norm": 0.0897747352719307, "learning_rate": 2.9128889333900725e-05, "loss": 10.6092, "step": 6300 }, { "epoch": 0.031511398536792426, "grad_norm": 0.08722511678934097, "learning_rate": 2.9127387418959175e-05, "loss": 10.611, "step": 6310 }, { "epoch": 0.031561337361731875, "grad_norm": 0.09219997376203537, "learning_rate": 2.9125885504017622e-05, "loss": 10.6096, "step": 6320 }, { "epoch": 0.031611276186671325, "grad_norm": 0.08804293721914291, "learning_rate": 2.9124383589076072e-05, "loss": 10.6068, "step": 6330 }, { "epoch": 0.031661215011610774, "grad_norm": 0.09116089344024658, "learning_rate": 2.9122881674134522e-05, "loss": 10.6073, "step": 6340 }, { "epoch": 0.031711153836550224, "grad_norm": 0.10001252591609955, "learning_rate": 2.9121379759192972e-05, "loss": 10.6077, "step": 6350 }, { "epoch": 0.03176109266148967, "grad_norm": 0.0931568518280983, "learning_rate": 2.9119877844251422e-05, "loss": 10.6072, "step": 6360 }, { "epoch": 0.03181103148642912, "grad_norm": 0.09584708511829376, "learning_rate": 2.911837592930987e-05, "loss": 10.6061, "step": 6370 }, { "epoch": 0.03186097031136857, "grad_norm": 0.09385517984628677, "learning_rate": 2.911687401436832e-05, "loss": 10.6075, "step": 6380 }, { "epoch": 0.03191090913630802, "grad_norm": 0.09303200244903564, "learning_rate": 2.911537209942677e-05, "loss": 10.607, "step": 6390 }, { "epoch": 0.03196084796124747, "grad_norm": 0.09322622418403625, "learning_rate": 2.911387018448522e-05, "loss": 10.6056, "step": 6400 }, { "epoch": 0.03201078678618692, "grad_norm": 0.09804125130176544, "learning_rate": 2.911236826954367e-05, "loss": 10.6035, "step": 6410 }, { "epoch": 0.03206072561112637, "grad_norm": 0.09895630925893784, "learning_rate": 2.9110866354602117e-05, "loss": 10.6053, "step": 6420 }, { "epoch": 0.03211066443606582, "grad_norm": 0.09591073542833328, "learning_rate": 2.9109364439660567e-05, "loss": 10.6044, "step": 6430 }, { "epoch": 0.03216060326100527, "grad_norm": 0.08683682233095169, "learning_rate": 2.9107862524719017e-05, "loss": 10.6046, "step": 6440 }, { "epoch": 0.03221054208594472, "grad_norm": 0.09420903772115707, "learning_rate": 2.9106360609777467e-05, "loss": 10.6029, "step": 6450 }, { "epoch": 0.03226048091088417, "grad_norm": 0.09442789852619171, "learning_rate": 2.9104858694835917e-05, "loss": 10.6037, "step": 6460 }, { "epoch": 0.03231041973582362, "grad_norm": 0.09980372339487076, "learning_rate": 2.9103356779894364e-05, "loss": 10.6028, "step": 6470 }, { "epoch": 0.03236035856076307, "grad_norm": 0.0942731723189354, "learning_rate": 2.9101854864952814e-05, "loss": 10.603, "step": 6480 }, { "epoch": 0.032410297385702516, "grad_norm": 0.09348214417695999, "learning_rate": 2.9100352950011265e-05, "loss": 10.6036, "step": 6490 }, { "epoch": 0.032460236210641966, "grad_norm": 0.09484933316707611, "learning_rate": 2.9098851035069715e-05, "loss": 10.6022, "step": 6500 }, { "epoch": 0.032510175035581415, "grad_norm": 0.09451691806316376, "learning_rate": 2.9097349120128165e-05, "loss": 10.6036, "step": 6510 }, { "epoch": 0.032560113860520865, "grad_norm": 0.0937938317656517, "learning_rate": 2.9095847205186612e-05, "loss": 10.6008, "step": 6520 }, { "epoch": 0.032610052685460314, "grad_norm": 0.09523924440145493, "learning_rate": 2.9094345290245062e-05, "loss": 10.601, "step": 6530 }, { "epoch": 0.03265999151039976, "grad_norm": 0.09146000444889069, "learning_rate": 2.9092843375303512e-05, "loss": 10.601, "step": 6540 }, { "epoch": 0.03270993033533921, "grad_norm": 0.09606587141752243, "learning_rate": 2.9091341460361962e-05, "loss": 10.5993, "step": 6550 }, { "epoch": 0.032759869160278655, "grad_norm": 0.09281494468450546, "learning_rate": 2.9089839545420412e-05, "loss": 10.6012, "step": 6560 }, { "epoch": 0.032809807985218105, "grad_norm": 0.09290701895952225, "learning_rate": 2.9088337630478863e-05, "loss": 10.6008, "step": 6570 }, { "epoch": 0.032859746810157554, "grad_norm": 0.10137023776769638, "learning_rate": 2.908683571553731e-05, "loss": 10.6012, "step": 6580 }, { "epoch": 0.032909685635097004, "grad_norm": 0.09537509083747864, "learning_rate": 2.908533380059576e-05, "loss": 10.599, "step": 6590 }, { "epoch": 0.03295962446003645, "grad_norm": 0.0944240391254425, "learning_rate": 2.908383188565421e-05, "loss": 10.6006, "step": 6600 }, { "epoch": 0.0330095632849759, "grad_norm": 0.09300459921360016, "learning_rate": 2.908232997071266e-05, "loss": 10.6005, "step": 6610 }, { "epoch": 0.03305950210991535, "grad_norm": 0.09325224906206131, "learning_rate": 2.908082805577111e-05, "loss": 10.5999, "step": 6620 }, { "epoch": 0.0331094409348548, "grad_norm": 0.09946674108505249, "learning_rate": 2.9079326140829557e-05, "loss": 10.597, "step": 6630 }, { "epoch": 0.03315937975979425, "grad_norm": 0.09831472486257553, "learning_rate": 2.9077824225888007e-05, "loss": 10.5973, "step": 6640 }, { "epoch": 0.0332093185847337, "grad_norm": 0.09227102994918823, "learning_rate": 2.9076322310946457e-05, "loss": 10.5992, "step": 6650 }, { "epoch": 0.03325925740967315, "grad_norm": 0.094718337059021, "learning_rate": 2.9074820396004907e-05, "loss": 10.5978, "step": 6660 }, { "epoch": 0.0333091962346126, "grad_norm": 0.09251998364925385, "learning_rate": 2.9073318481063358e-05, "loss": 10.5985, "step": 6670 }, { "epoch": 0.03335913505955205, "grad_norm": 0.0993405282497406, "learning_rate": 2.9071816566121804e-05, "loss": 10.599, "step": 6680 }, { "epoch": 0.0334090738844915, "grad_norm": 0.09692016988992691, "learning_rate": 2.9070314651180255e-05, "loss": 10.5975, "step": 6690 }, { "epoch": 0.03345901270943095, "grad_norm": 0.0930512323975563, "learning_rate": 2.9068812736238705e-05, "loss": 10.5965, "step": 6700 }, { "epoch": 0.0335089515343704, "grad_norm": 0.09981938451528549, "learning_rate": 2.9067310821297155e-05, "loss": 10.5969, "step": 6710 }, { "epoch": 0.03355889035930985, "grad_norm": 0.09343113005161285, "learning_rate": 2.9065808906355605e-05, "loss": 10.5961, "step": 6720 }, { "epoch": 0.033608829184249296, "grad_norm": 0.09517756849527359, "learning_rate": 2.9064306991414052e-05, "loss": 10.5965, "step": 6730 }, { "epoch": 0.033658768009188746, "grad_norm": 0.090797059237957, "learning_rate": 2.9062805076472502e-05, "loss": 10.5962, "step": 6740 }, { "epoch": 0.033708706834128195, "grad_norm": 0.09465719759464264, "learning_rate": 2.9061303161530952e-05, "loss": 10.5961, "step": 6750 }, { "epoch": 0.033758645659067645, "grad_norm": 0.09611517190933228, "learning_rate": 2.9059801246589402e-05, "loss": 10.5927, "step": 6760 }, { "epoch": 0.033808584484007094, "grad_norm": 0.10133732110261917, "learning_rate": 2.9058299331647853e-05, "loss": 10.5945, "step": 6770 }, { "epoch": 0.03385852330894654, "grad_norm": 0.09177147597074509, "learning_rate": 2.90567974167063e-05, "loss": 10.5955, "step": 6780 }, { "epoch": 0.03390846213388599, "grad_norm": 0.09687118232250214, "learning_rate": 2.905529550176475e-05, "loss": 10.5931, "step": 6790 }, { "epoch": 0.033958400958825435, "grad_norm": 0.09665414690971375, "learning_rate": 2.90537935868232e-05, "loss": 10.5954, "step": 6800 }, { "epoch": 0.034008339783764885, "grad_norm": 0.09662660956382751, "learning_rate": 2.905229167188165e-05, "loss": 10.5924, "step": 6810 }, { "epoch": 0.034058278608704334, "grad_norm": 0.09507598727941513, "learning_rate": 2.90507897569401e-05, "loss": 10.5926, "step": 6820 }, { "epoch": 0.034108217433643784, "grad_norm": 0.09256746619939804, "learning_rate": 2.9049287841998547e-05, "loss": 10.5938, "step": 6830 }, { "epoch": 0.03415815625858323, "grad_norm": 0.09468525648117065, "learning_rate": 2.9047785927056997e-05, "loss": 10.5927, "step": 6840 }, { "epoch": 0.03420809508352268, "grad_norm": 0.09714210778474808, "learning_rate": 2.9046284012115447e-05, "loss": 10.5932, "step": 6850 }, { "epoch": 0.03425803390846213, "grad_norm": 0.09986428171396255, "learning_rate": 2.9044782097173897e-05, "loss": 10.5918, "step": 6860 }, { "epoch": 0.03430797273340158, "grad_norm": 0.09874702244997025, "learning_rate": 2.9043280182232348e-05, "loss": 10.5937, "step": 6870 }, { "epoch": 0.03435791155834103, "grad_norm": 0.09810672700405121, "learning_rate": 2.9041778267290794e-05, "loss": 10.591, "step": 6880 }, { "epoch": 0.03440785038328048, "grad_norm": 0.09897971153259277, "learning_rate": 2.9040276352349248e-05, "loss": 10.5932, "step": 6890 }, { "epoch": 0.03445778920821993, "grad_norm": 0.09357500821352005, "learning_rate": 2.9038774437407695e-05, "loss": 10.5916, "step": 6900 }, { "epoch": 0.03450772803315938, "grad_norm": 0.09426924586296082, "learning_rate": 2.9037272522466145e-05, "loss": 10.5915, "step": 6910 }, { "epoch": 0.03455766685809883, "grad_norm": 0.09487421810626984, "learning_rate": 2.9035770607524595e-05, "loss": 10.5889, "step": 6920 }, { "epoch": 0.03460760568303828, "grad_norm": 0.08992663025856018, "learning_rate": 2.9034268692583042e-05, "loss": 10.5891, "step": 6930 }, { "epoch": 0.03465754450797773, "grad_norm": 0.0984937772154808, "learning_rate": 2.9032766777641495e-05, "loss": 10.5908, "step": 6940 }, { "epoch": 0.03470748333291718, "grad_norm": 0.08962874859571457, "learning_rate": 2.9031264862699942e-05, "loss": 10.59, "step": 6950 }, { "epoch": 0.03475742215785663, "grad_norm": 0.09817676246166229, "learning_rate": 2.9029762947758392e-05, "loss": 10.5901, "step": 6960 }, { "epoch": 0.034807360982796076, "grad_norm": 0.09869422018527985, "learning_rate": 2.9028261032816843e-05, "loss": 10.589, "step": 6970 }, { "epoch": 0.034857299807735526, "grad_norm": 0.09235729277133942, "learning_rate": 2.9026759117875293e-05, "loss": 10.588, "step": 6980 }, { "epoch": 0.034907238632674975, "grad_norm": 0.0891089141368866, "learning_rate": 2.9025257202933743e-05, "loss": 16.5832, "step": 6990 }, { "epoch": 0.034957177457614425, "grad_norm": 0.0962633341550827, "learning_rate": 2.902375528799219e-05, "loss": 22.1721, "step": 7000 }, { "epoch": 0.035007116282553874, "grad_norm": 0.09975840896368027, "learning_rate": 2.902225337305064e-05, "loss": 10.604, "step": 7010 }, { "epoch": 0.03505705510749332, "grad_norm": 0.09406078606843948, "learning_rate": 2.902075145810909e-05, "loss": 12.82, "step": 7020 }, { "epoch": 0.03510699393243277, "grad_norm": 0.09256692975759506, "learning_rate": 2.901924954316754e-05, "loss": 10.5856, "step": 7030 }, { "epoch": 0.03515693275737222, "grad_norm": 0.09028827399015427, "learning_rate": 2.901774762822599e-05, "loss": 10.5869, "step": 7040 }, { "epoch": 0.035206871582311665, "grad_norm": 0.0957457572221756, "learning_rate": 2.9016245713284437e-05, "loss": 10.5873, "step": 7050 }, { "epoch": 0.035256810407251114, "grad_norm": 0.09316723793745041, "learning_rate": 2.9014743798342887e-05, "loss": 10.5875, "step": 7060 }, { "epoch": 0.035306749232190564, "grad_norm": 0.09424072504043579, "learning_rate": 2.9013241883401338e-05, "loss": 10.5852, "step": 7070 }, { "epoch": 0.03535668805713001, "grad_norm": 0.09654801338911057, "learning_rate": 2.9011739968459788e-05, "loss": 12.5143, "step": 7080 }, { "epoch": 0.03540662688206946, "grad_norm": 0.10188226401805878, "learning_rate": 2.9010238053518238e-05, "loss": 10.5845, "step": 7090 }, { "epoch": 0.03545656570700891, "grad_norm": 0.0997641459107399, "learning_rate": 2.9008736138576685e-05, "loss": 12.043, "step": 7100 }, { "epoch": 0.03550650453194836, "grad_norm": 0.09502726793289185, "learning_rate": 2.9007234223635135e-05, "loss": 10.5843, "step": 7110 }, { "epoch": 0.03555644335688781, "grad_norm": 0.09691105782985687, "learning_rate": 2.9005732308693585e-05, "loss": 10.5821, "step": 7120 }, { "epoch": 0.03560638218182726, "grad_norm": 0.09250783175230026, "learning_rate": 2.9004230393752035e-05, "loss": 10.5865, "step": 7130 }, { "epoch": 0.03565632100676671, "grad_norm": 0.09487821161746979, "learning_rate": 2.9002728478810486e-05, "loss": 10.5849, "step": 7140 }, { "epoch": 0.03570625983170616, "grad_norm": 3425.232666015625, "learning_rate": 2.9001226563868932e-05, "loss": 10.598, "step": 7150 }, { "epoch": 0.03575619865664561, "grad_norm": 0.10075180232524872, "learning_rate": 2.8999724648927382e-05, "loss": 10.5865, "step": 7160 }, { "epoch": 0.03580613748158506, "grad_norm": 0.09755649417638779, "learning_rate": 2.8998222733985833e-05, "loss": 10.5843, "step": 7170 }, { "epoch": 0.03585607630652451, "grad_norm": 0.09459280222654343, "learning_rate": 2.8996720819044283e-05, "loss": 10.5831, "step": 7180 }, { "epoch": 0.03590601513146396, "grad_norm": 0.09937462210655212, "learning_rate": 2.8995218904102733e-05, "loss": 10.5837, "step": 7190 }, { "epoch": 0.03595595395640341, "grad_norm": 0.09443425387144089, "learning_rate": 2.899371698916118e-05, "loss": 10.5812, "step": 7200 }, { "epoch": 0.036005892781342856, "grad_norm": 0.0933324322104454, "learning_rate": 2.8992215074219633e-05, "loss": 10.5829, "step": 7210 }, { "epoch": 0.036055831606282306, "grad_norm": 0.09167325496673584, "learning_rate": 2.899071315927808e-05, "loss": 10.5823, "step": 7220 }, { "epoch": 0.036105770431221755, "grad_norm": 0.09948615729808807, "learning_rate": 2.898921124433653e-05, "loss": 10.5817, "step": 7230 }, { "epoch": 0.036155709256161205, "grad_norm": 0.09661941975355148, "learning_rate": 2.898770932939498e-05, "loss": 10.58, "step": 7240 }, { "epoch": 0.036205648081100654, "grad_norm": 0.09533500671386719, "learning_rate": 2.8986207414453427e-05, "loss": 10.5822, "step": 7250 }, { "epoch": 0.0362555869060401, "grad_norm": 0.09911171346902847, "learning_rate": 2.898470549951188e-05, "loss": 10.5812, "step": 7260 }, { "epoch": 0.03630552573097955, "grad_norm": 0.09520751982927322, "learning_rate": 2.8983203584570328e-05, "loss": 10.5781, "step": 7270 }, { "epoch": 0.036355464555919, "grad_norm": 0.09380774945020676, "learning_rate": 2.8981701669628778e-05, "loss": 10.5798, "step": 7280 }, { "epoch": 0.03640540338085845, "grad_norm": 0.09551588445901871, "learning_rate": 2.8980199754687228e-05, "loss": 10.5789, "step": 7290 }, { "epoch": 0.036455342205797894, "grad_norm": 0.09562008827924728, "learning_rate": 2.8978697839745675e-05, "loss": 10.5788, "step": 7300 }, { "epoch": 0.036505281030737344, "grad_norm": 0.09640656411647797, "learning_rate": 2.897719592480413e-05, "loss": 10.5793, "step": 7310 }, { "epoch": 0.03655521985567679, "grad_norm": 0.09420552849769592, "learning_rate": 2.8975694009862575e-05, "loss": 10.5776, "step": 7320 }, { "epoch": 0.03660515868061624, "grad_norm": 0.09690862894058228, "learning_rate": 2.8974192094921025e-05, "loss": 10.577, "step": 7330 }, { "epoch": 0.03665509750555569, "grad_norm": 0.09202788025140762, "learning_rate": 2.8972690179979476e-05, "loss": 10.5785, "step": 7340 }, { "epoch": 0.03670503633049514, "grad_norm": 0.08765114843845367, "learning_rate": 2.8971188265037922e-05, "loss": 10.5781, "step": 7350 }, { "epoch": 0.03675497515543459, "grad_norm": 0.09262295067310333, "learning_rate": 2.8969686350096376e-05, "loss": 10.5772, "step": 7360 }, { "epoch": 0.03680491398037404, "grad_norm": 0.09478264302015305, "learning_rate": 2.8968184435154823e-05, "loss": 10.5783, "step": 7370 }, { "epoch": 0.03685485280531349, "grad_norm": 0.09246762841939926, "learning_rate": 2.8966682520213273e-05, "loss": 10.5775, "step": 7380 }, { "epoch": 0.03690479163025294, "grad_norm": 0.0920056626200676, "learning_rate": 2.8965180605271723e-05, "loss": 10.5771, "step": 7390 }, { "epoch": 0.03695473045519239, "grad_norm": 0.09541013091802597, "learning_rate": 2.896367869033017e-05, "loss": 10.5757, "step": 7400 }, { "epoch": 0.03700466928013184, "grad_norm": 0.0953700914978981, "learning_rate": 2.8962176775388623e-05, "loss": 10.5765, "step": 7410 }, { "epoch": 0.03705460810507129, "grad_norm": 0.09888526797294617, "learning_rate": 2.896067486044707e-05, "loss": 10.5757, "step": 7420 }, { "epoch": 0.03710454693001074, "grad_norm": 0.09751054644584656, "learning_rate": 2.895917294550552e-05, "loss": 10.5733, "step": 7430 }, { "epoch": 0.03715448575495019, "grad_norm": 0.095875583589077, "learning_rate": 2.895767103056397e-05, "loss": 10.5762, "step": 7440 }, { "epoch": 0.037204424579889636, "grad_norm": 0.0908399224281311, "learning_rate": 2.8956169115622417e-05, "loss": 10.5755, "step": 7450 }, { "epoch": 0.037254363404829086, "grad_norm": 0.09418118000030518, "learning_rate": 2.895466720068087e-05, "loss": 10.5738, "step": 7460 }, { "epoch": 0.037304302229768535, "grad_norm": 0.09714969247579575, "learning_rate": 2.8953165285739318e-05, "loss": 10.5742, "step": 7470 }, { "epoch": 0.037354241054707985, "grad_norm": 0.09826507419347763, "learning_rate": 2.8951663370797768e-05, "loss": 10.5731, "step": 7480 }, { "epoch": 0.037404179879647434, "grad_norm": 0.0948546826839447, "learning_rate": 2.8950161455856218e-05, "loss": 10.5723, "step": 7490 }, { "epoch": 0.03745411870458688, "grad_norm": 0.09687359631061554, "learning_rate": 2.8948659540914665e-05, "loss": 10.5733, "step": 7500 }, { "epoch": 0.03750405752952633, "grad_norm": 0.10149165242910385, "learning_rate": 2.894715762597312e-05, "loss": 10.5732, "step": 7510 }, { "epoch": 0.03755399635446578, "grad_norm": 0.09481348842382431, "learning_rate": 2.8945655711031565e-05, "loss": 10.5732, "step": 7520 }, { "epoch": 0.03760393517940523, "grad_norm": 0.09526333957910538, "learning_rate": 2.894415379609002e-05, "loss": 10.5736, "step": 7530 }, { "epoch": 0.03765387400434468, "grad_norm": 0.09095079451799393, "learning_rate": 2.8942651881148466e-05, "loss": 10.5709, "step": 7540 }, { "epoch": 0.037703812829284124, "grad_norm": 0.09532736986875534, "learning_rate": 2.8941149966206912e-05, "loss": 10.5712, "step": 7550 }, { "epoch": 0.03775375165422357, "grad_norm": 0.09308824688196182, "learning_rate": 2.8939648051265366e-05, "loss": 10.5711, "step": 7560 }, { "epoch": 0.03780369047916302, "grad_norm": 0.0935669094324112, "learning_rate": 2.8938146136323813e-05, "loss": 10.5715, "step": 7570 }, { "epoch": 0.03785362930410247, "grad_norm": 0.09036646783351898, "learning_rate": 2.8936644221382266e-05, "loss": 10.5704, "step": 7580 }, { "epoch": 0.03790356812904192, "grad_norm": 0.09510631859302521, "learning_rate": 2.8935142306440713e-05, "loss": 10.5709, "step": 7590 }, { "epoch": 0.03795350695398137, "grad_norm": 0.09474785625934601, "learning_rate": 2.893364039149916e-05, "loss": 10.571, "step": 7600 }, { "epoch": 0.03800344577892082, "grad_norm": 0.09734784811735153, "learning_rate": 2.8932138476557613e-05, "loss": 10.5693, "step": 7610 }, { "epoch": 0.03805338460386027, "grad_norm": 0.10142108052968979, "learning_rate": 2.893063656161606e-05, "loss": 10.5701, "step": 7620 }, { "epoch": 0.03810332342879972, "grad_norm": 0.09453453868627548, "learning_rate": 2.8929134646674514e-05, "loss": 10.5697, "step": 7630 }, { "epoch": 0.03815326225373917, "grad_norm": 0.09844522923231125, "learning_rate": 2.892763273173296e-05, "loss": 10.5688, "step": 7640 }, { "epoch": 0.03820320107867862, "grad_norm": 0.10033461451530457, "learning_rate": 2.8926130816791407e-05, "loss": 10.5689, "step": 7650 }, { "epoch": 0.03825313990361807, "grad_norm": 0.10299541801214218, "learning_rate": 2.892462890184986e-05, "loss": 10.5693, "step": 7660 }, { "epoch": 0.03830307872855752, "grad_norm": 0.09430062025785446, "learning_rate": 2.8923126986908308e-05, "loss": 10.5672, "step": 7670 }, { "epoch": 0.03835301755349697, "grad_norm": 0.10111190378665924, "learning_rate": 2.892162507196676e-05, "loss": 10.5674, "step": 7680 }, { "epoch": 0.038402956378436416, "grad_norm": 0.09100598096847534, "learning_rate": 2.8920123157025208e-05, "loss": 10.5685, "step": 7690 }, { "epoch": 0.038452895203375866, "grad_norm": 0.09059002995491028, "learning_rate": 2.8918621242083655e-05, "loss": 10.5676, "step": 7700 }, { "epoch": 0.038502834028315315, "grad_norm": 0.09399350732564926, "learning_rate": 2.891711932714211e-05, "loss": 10.568, "step": 7710 }, { "epoch": 0.038552772853254764, "grad_norm": 0.09312798827886581, "learning_rate": 2.8915617412200555e-05, "loss": 10.5672, "step": 7720 }, { "epoch": 0.038602711678194214, "grad_norm": 0.0949626937508583, "learning_rate": 2.891411549725901e-05, "loss": 10.5663, "step": 7730 }, { "epoch": 0.03865265050313366, "grad_norm": 0.08603295683860779, "learning_rate": 2.8912613582317456e-05, "loss": 10.5665, "step": 7740 }, { "epoch": 0.03870258932807311, "grad_norm": 0.09113872051239014, "learning_rate": 2.8911111667375902e-05, "loss": 10.567, "step": 7750 }, { "epoch": 0.03875252815301256, "grad_norm": 0.09675870090723038, "learning_rate": 2.8909609752434356e-05, "loss": 10.5649, "step": 7760 }, { "epoch": 0.03880246697795201, "grad_norm": 0.09332741051912308, "learning_rate": 2.8908107837492803e-05, "loss": 10.5656, "step": 7770 }, { "epoch": 0.03885240580289146, "grad_norm": 0.09331821650266647, "learning_rate": 2.8906605922551256e-05, "loss": 10.5659, "step": 7780 }, { "epoch": 0.038902344627830904, "grad_norm": 0.09395716339349747, "learning_rate": 2.8905104007609703e-05, "loss": 10.5656, "step": 7790 }, { "epoch": 0.03895228345277035, "grad_norm": 0.09877283871173859, "learning_rate": 2.890360209266815e-05, "loss": 10.5653, "step": 7800 }, { "epoch": 0.0390022222777098, "grad_norm": 0.09474792331457138, "learning_rate": 2.8902100177726603e-05, "loss": 10.5637, "step": 7810 }, { "epoch": 0.03905216110264925, "grad_norm": 0.09844552725553513, "learning_rate": 2.890059826278505e-05, "loss": 10.5633, "step": 7820 }, { "epoch": 0.0391020999275887, "grad_norm": 0.09523854404687881, "learning_rate": 2.8899096347843504e-05, "loss": 10.5637, "step": 7830 }, { "epoch": 0.03915203875252815, "grad_norm": 0.09699324518442154, "learning_rate": 2.889759443290195e-05, "loss": 10.5631, "step": 7840 }, { "epoch": 0.0392019775774676, "grad_norm": 0.09227604418992996, "learning_rate": 2.88960925179604e-05, "loss": 10.5623, "step": 7850 }, { "epoch": 0.03925191640240705, "grad_norm": 0.0924190804362297, "learning_rate": 2.889459060301885e-05, "loss": 10.5623, "step": 7860 }, { "epoch": 0.0393018552273465, "grad_norm": 0.10132279247045517, "learning_rate": 2.8893088688077298e-05, "loss": 10.5635, "step": 7870 }, { "epoch": 0.03935179405228595, "grad_norm": 0.09170647710561752, "learning_rate": 2.889158677313575e-05, "loss": 10.5616, "step": 7880 }, { "epoch": 0.0394017328772254, "grad_norm": 0.09620120376348495, "learning_rate": 2.8890084858194198e-05, "loss": 10.5616, "step": 7890 }, { "epoch": 0.03945167170216485, "grad_norm": 0.09894801676273346, "learning_rate": 2.8888582943252648e-05, "loss": 10.5605, "step": 7900 }, { "epoch": 0.0395016105271043, "grad_norm": 0.09626492857933044, "learning_rate": 2.88870810283111e-05, "loss": 10.5602, "step": 7910 }, { "epoch": 0.03955154935204375, "grad_norm": 0.09040050953626633, "learning_rate": 2.8885579113369545e-05, "loss": 10.5595, "step": 7920 }, { "epoch": 0.039601488176983196, "grad_norm": 0.09787009656429291, "learning_rate": 2.8884077198428e-05, "loss": 10.5598, "step": 7930 }, { "epoch": 0.039651427001922646, "grad_norm": 0.09217660874128342, "learning_rate": 2.8882575283486446e-05, "loss": 10.5596, "step": 7940 }, { "epoch": 0.039701365826862095, "grad_norm": 0.09433452039957047, "learning_rate": 2.8881073368544896e-05, "loss": 10.56, "step": 7950 }, { "epoch": 0.039751304651801544, "grad_norm": 0.09549769759178162, "learning_rate": 2.8879571453603346e-05, "loss": 10.5576, "step": 7960 }, { "epoch": 0.039801243476740994, "grad_norm": 0.09538113325834274, "learning_rate": 2.8878069538661793e-05, "loss": 10.56, "step": 7970 }, { "epoch": 0.03985118230168044, "grad_norm": 0.09417406469583511, "learning_rate": 2.8876567623720246e-05, "loss": 10.5598, "step": 7980 }, { "epoch": 0.03990112112661989, "grad_norm": 0.09565738588571548, "learning_rate": 2.8875065708778693e-05, "loss": 10.5599, "step": 7990 }, { "epoch": 0.03995105995155934, "grad_norm": 0.09348537772893906, "learning_rate": 2.8873563793837143e-05, "loss": 10.5605, "step": 8000 }, { "epoch": 0.04000099877649879, "grad_norm": 0.0924496129155159, "learning_rate": 2.8872061878895593e-05, "loss": 10.5592, "step": 8010 }, { "epoch": 0.04005093760143824, "grad_norm": 0.09314726293087006, "learning_rate": 2.887055996395404e-05, "loss": 10.5568, "step": 8020 }, { "epoch": 0.04010087642637769, "grad_norm": 0.09801434725522995, "learning_rate": 2.8869058049012494e-05, "loss": 10.558, "step": 8030 }, { "epoch": 0.04015081525131713, "grad_norm": 0.09441787749528885, "learning_rate": 2.886755613407094e-05, "loss": 10.5562, "step": 8040 }, { "epoch": 0.04020075407625658, "grad_norm": 0.09265992045402527, "learning_rate": 2.886605421912939e-05, "loss": 10.5588, "step": 8050 }, { "epoch": 0.04025069290119603, "grad_norm": 0.09449957311153412, "learning_rate": 2.886455230418784e-05, "loss": 10.5578, "step": 8060 }, { "epoch": 0.04030063172613548, "grad_norm": 0.09760826081037521, "learning_rate": 2.8863050389246288e-05, "loss": 10.5543, "step": 8070 }, { "epoch": 0.04035057055107493, "grad_norm": 0.09547002613544464, "learning_rate": 2.886154847430474e-05, "loss": 10.5575, "step": 8080 }, { "epoch": 0.04040050937601438, "grad_norm": 0.09818068146705627, "learning_rate": 2.8860046559363188e-05, "loss": 10.5569, "step": 8090 }, { "epoch": 0.04045044820095383, "grad_norm": 0.09117206931114197, "learning_rate": 2.8858544644421638e-05, "loss": 10.5529, "step": 8100 }, { "epoch": 0.04050038702589328, "grad_norm": 0.10111954063177109, "learning_rate": 2.885704272948009e-05, "loss": 10.5541, "step": 8110 }, { "epoch": 0.04055032585083273, "grad_norm": 0.09272032976150513, "learning_rate": 2.8855540814538535e-05, "loss": 10.5528, "step": 8120 }, { "epoch": 0.04060026467577218, "grad_norm": 0.09511199593544006, "learning_rate": 2.885403889959699e-05, "loss": 10.5561, "step": 8130 }, { "epoch": 0.04065020350071163, "grad_norm": 0.09244706481695175, "learning_rate": 2.8852536984655436e-05, "loss": 10.5553, "step": 8140 }, { "epoch": 0.04070014232565108, "grad_norm": 0.09505173563957214, "learning_rate": 2.8851035069713886e-05, "loss": 10.5548, "step": 8150 }, { "epoch": 0.04075008115059053, "grad_norm": 0.09145089983940125, "learning_rate": 2.8849533154772336e-05, "loss": 10.5544, "step": 8160 }, { "epoch": 0.040800019975529976, "grad_norm": 0.09063593298196793, "learning_rate": 2.8848031239830786e-05, "loss": 10.5549, "step": 8170 }, { "epoch": 0.040849958800469426, "grad_norm": 0.09323187917470932, "learning_rate": 2.8846529324889236e-05, "loss": 10.5522, "step": 8180 }, { "epoch": 0.040899897625408875, "grad_norm": 0.09502808004617691, "learning_rate": 2.8845027409947683e-05, "loss": 10.5537, "step": 8190 }, { "epoch": 0.040949836450348324, "grad_norm": 0.10582366585731506, "learning_rate": 2.8843525495006133e-05, "loss": 10.5538, "step": 8200 }, { "epoch": 0.040999775275287774, "grad_norm": 0.09141421318054199, "learning_rate": 2.8842023580064583e-05, "loss": 10.5523, "step": 8210 }, { "epoch": 0.04104971410022722, "grad_norm": 0.10094629228115082, "learning_rate": 2.8840521665123034e-05, "loss": 10.5521, "step": 8220 }, { "epoch": 0.04109965292516667, "grad_norm": 0.08866948634386063, "learning_rate": 2.8839019750181484e-05, "loss": 10.5503, "step": 8230 }, { "epoch": 0.04114959175010612, "grad_norm": 0.09741657972335815, "learning_rate": 2.883751783523993e-05, "loss": 10.5508, "step": 8240 }, { "epoch": 0.04119953057504557, "grad_norm": 0.09136499464511871, "learning_rate": 2.883601592029838e-05, "loss": 10.5507, "step": 8250 }, { "epoch": 0.04124946939998502, "grad_norm": 0.09198024123907089, "learning_rate": 2.883451400535683e-05, "loss": 10.5514, "step": 8260 }, { "epoch": 0.04129940822492447, "grad_norm": 0.09833436459302902, "learning_rate": 2.883301209041528e-05, "loss": 10.5492, "step": 8270 }, { "epoch": 0.04134934704986392, "grad_norm": 0.09501312673091888, "learning_rate": 2.883151017547373e-05, "loss": 10.5505, "step": 8280 }, { "epoch": 0.04139928587480336, "grad_norm": 0.08871445059776306, "learning_rate": 2.8830008260532178e-05, "loss": 10.5488, "step": 8290 }, { "epoch": 0.04144922469974281, "grad_norm": 0.09673582017421722, "learning_rate": 2.8828506345590628e-05, "loss": 10.55, "step": 8300 }, { "epoch": 0.04149916352468226, "grad_norm": 0.09481798112392426, "learning_rate": 2.882700443064908e-05, "loss": 10.5475, "step": 8310 }, { "epoch": 0.04154910234962171, "grad_norm": 0.09376543760299683, "learning_rate": 2.882550251570753e-05, "loss": 10.5485, "step": 8320 }, { "epoch": 0.04159904117456116, "grad_norm": 0.09397636353969574, "learning_rate": 2.882400060076598e-05, "loss": 10.5472, "step": 8330 }, { "epoch": 0.04164897999950061, "grad_norm": 0.09375918656587601, "learning_rate": 2.8822498685824426e-05, "loss": 10.5469, "step": 8340 }, { "epoch": 0.04169891882444006, "grad_norm": 0.09085606038570404, "learning_rate": 2.8820996770882876e-05, "loss": 10.5496, "step": 8350 }, { "epoch": 0.04174885764937951, "grad_norm": 0.0982041284441948, "learning_rate": 2.8819494855941326e-05, "loss": 10.5468, "step": 8360 }, { "epoch": 0.04179879647431896, "grad_norm": 0.09510507434606552, "learning_rate": 2.8817992940999776e-05, "loss": 10.5458, "step": 8370 }, { "epoch": 0.04184873529925841, "grad_norm": 0.09728366136550903, "learning_rate": 2.8816491026058226e-05, "loss": 10.5469, "step": 8380 }, { "epoch": 0.04189867412419786, "grad_norm": 0.10339502990245819, "learning_rate": 2.8814989111116673e-05, "loss": 10.5475, "step": 8390 }, { "epoch": 0.04194861294913731, "grad_norm": 0.09696449339389801, "learning_rate": 2.8813487196175123e-05, "loss": 10.5457, "step": 8400 }, { "epoch": 0.041998551774076756, "grad_norm": 0.09432040899991989, "learning_rate": 2.8811985281233573e-05, "loss": 10.5464, "step": 8410 }, { "epoch": 0.042048490599016206, "grad_norm": 0.09533160924911499, "learning_rate": 2.8810483366292024e-05, "loss": 10.5458, "step": 8420 }, { "epoch": 0.042098429423955655, "grad_norm": 0.09491016715765, "learning_rate": 2.8808981451350474e-05, "loss": 10.546, "step": 8430 }, { "epoch": 0.042148368248895104, "grad_norm": 0.0958162397146225, "learning_rate": 2.880747953640892e-05, "loss": 10.5436, "step": 8440 }, { "epoch": 0.042198307073834554, "grad_norm": 0.10142121464014053, "learning_rate": 2.880597762146737e-05, "loss": 10.5453, "step": 8450 }, { "epoch": 0.042248245898774, "grad_norm": 0.09626547992229462, "learning_rate": 2.880447570652582e-05, "loss": 10.5443, "step": 8460 }, { "epoch": 0.04229818472371345, "grad_norm": 0.09666489064693451, "learning_rate": 2.880297379158427e-05, "loss": 10.5445, "step": 8470 }, { "epoch": 0.0423481235486529, "grad_norm": 0.0963977724313736, "learning_rate": 2.880147187664272e-05, "loss": 10.545, "step": 8480 }, { "epoch": 0.04239806237359235, "grad_norm": 0.09442082047462463, "learning_rate": 2.879996996170117e-05, "loss": 10.5455, "step": 8490 }, { "epoch": 0.0424480011985318, "grad_norm": 0.09085852652788162, "learning_rate": 2.8798468046759618e-05, "loss": 10.5439, "step": 8500 }, { "epoch": 0.04249794002347125, "grad_norm": 0.0952218547463417, "learning_rate": 2.879696613181807e-05, "loss": 10.5411, "step": 8510 }, { "epoch": 0.0425478788484107, "grad_norm": 0.09868868440389633, "learning_rate": 2.879546421687652e-05, "loss": 10.5424, "step": 8520 }, { "epoch": 0.04259781767335014, "grad_norm": 0.09193531423807144, "learning_rate": 2.879396230193497e-05, "loss": 10.5434, "step": 8530 }, { "epoch": 0.04264775649828959, "grad_norm": 0.09298193454742432, "learning_rate": 2.879246038699342e-05, "loss": 10.5418, "step": 8540 }, { "epoch": 0.04269769532322904, "grad_norm": 0.09158916771411896, "learning_rate": 2.8790958472051866e-05, "loss": 10.5422, "step": 8550 }, { "epoch": 0.04274763414816849, "grad_norm": 0.09237027168273926, "learning_rate": 2.8789456557110316e-05, "loss": 10.5407, "step": 8560 }, { "epoch": 0.04279757297310794, "grad_norm": 0.1070277988910675, "learning_rate": 2.8787954642168766e-05, "loss": 10.5417, "step": 8570 }, { "epoch": 0.04284751179804739, "grad_norm": 0.0924166664481163, "learning_rate": 2.8786452727227216e-05, "loss": 10.5391, "step": 8580 }, { "epoch": 0.04289745062298684, "grad_norm": 0.09547102451324463, "learning_rate": 2.8784950812285667e-05, "loss": 10.5386, "step": 8590 }, { "epoch": 0.04294738944792629, "grad_norm": 0.09805493801832199, "learning_rate": 2.8783448897344113e-05, "loss": 10.5404, "step": 8600 }, { "epoch": 0.04299732827286574, "grad_norm": 0.09734487533569336, "learning_rate": 2.8781946982402563e-05, "loss": 10.5402, "step": 8610 }, { "epoch": 0.04304726709780519, "grad_norm": 0.09356056153774261, "learning_rate": 2.8780445067461014e-05, "loss": 10.5394, "step": 8620 }, { "epoch": 0.04309720592274464, "grad_norm": 0.09781930595636368, "learning_rate": 2.8778943152519464e-05, "loss": 10.5389, "step": 8630 }, { "epoch": 0.04314714474768409, "grad_norm": 0.0970260426402092, "learning_rate": 2.8777441237577914e-05, "loss": 10.5402, "step": 8640 }, { "epoch": 0.043197083572623536, "grad_norm": 0.09671913832426071, "learning_rate": 2.877593932263636e-05, "loss": 10.5387, "step": 8650 }, { "epoch": 0.043247022397562986, "grad_norm": 0.0935794860124588, "learning_rate": 2.877443740769481e-05, "loss": 10.5378, "step": 8660 }, { "epoch": 0.043296961222502435, "grad_norm": 0.0933162197470665, "learning_rate": 2.877293549275326e-05, "loss": 10.5399, "step": 8670 }, { "epoch": 0.043346900047441884, "grad_norm": 0.09513531625270844, "learning_rate": 2.877143357781171e-05, "loss": 10.5379, "step": 8680 }, { "epoch": 0.043396838872381334, "grad_norm": 0.10161089897155762, "learning_rate": 2.876993166287016e-05, "loss": 10.5386, "step": 8690 }, { "epoch": 0.04344677769732078, "grad_norm": 0.09620557725429535, "learning_rate": 2.8768429747928608e-05, "loss": 10.5381, "step": 8700 }, { "epoch": 0.04349671652226023, "grad_norm": 0.09468070417642593, "learning_rate": 2.876692783298706e-05, "loss": 10.5371, "step": 8710 }, { "epoch": 0.04354665534719968, "grad_norm": 0.09541548788547516, "learning_rate": 2.876542591804551e-05, "loss": 10.5374, "step": 8720 }, { "epoch": 0.04359659417213913, "grad_norm": 0.09611213952302933, "learning_rate": 2.876392400310396e-05, "loss": 10.5382, "step": 8730 }, { "epoch": 0.04364653299707858, "grad_norm": 0.08991603553295135, "learning_rate": 2.876242208816241e-05, "loss": 10.536, "step": 8740 }, { "epoch": 0.04369647182201803, "grad_norm": 0.09493478387594223, "learning_rate": 2.8760920173220856e-05, "loss": 10.5358, "step": 8750 }, { "epoch": 0.04374641064695748, "grad_norm": 0.09500102698802948, "learning_rate": 2.8759418258279306e-05, "loss": 10.5345, "step": 8760 }, { "epoch": 0.04379634947189693, "grad_norm": 0.09735292196273804, "learning_rate": 2.8757916343337756e-05, "loss": 10.5329, "step": 8770 }, { "epoch": 0.04384628829683637, "grad_norm": 0.09669680148363113, "learning_rate": 2.8756414428396206e-05, "loss": 10.5363, "step": 8780 }, { "epoch": 0.04389622712177582, "grad_norm": 0.09384785592556, "learning_rate": 2.8754912513454657e-05, "loss": 10.5342, "step": 8790 }, { "epoch": 0.04394616594671527, "grad_norm": 0.09973820298910141, "learning_rate": 2.8753410598513103e-05, "loss": 10.5346, "step": 8800 }, { "epoch": 0.04399610477165472, "grad_norm": 0.09043479710817337, "learning_rate": 2.8751908683571557e-05, "loss": 10.5345, "step": 8810 }, { "epoch": 0.04404604359659417, "grad_norm": 0.09604961425065994, "learning_rate": 2.8750406768630004e-05, "loss": 10.5338, "step": 8820 }, { "epoch": 0.04409598242153362, "grad_norm": 0.09182944893836975, "learning_rate": 2.8748904853688454e-05, "loss": 10.5345, "step": 8830 }, { "epoch": 0.04414592124647307, "grad_norm": 0.09448856860399246, "learning_rate": 2.8747402938746904e-05, "loss": 10.5337, "step": 8840 }, { "epoch": 0.04419586007141252, "grad_norm": 0.09963658452033997, "learning_rate": 2.874590102380535e-05, "loss": 10.5313, "step": 8850 }, { "epoch": 0.04424579889635197, "grad_norm": 0.09832189232110977, "learning_rate": 2.8744399108863804e-05, "loss": 10.5341, "step": 8860 }, { "epoch": 0.04429573772129142, "grad_norm": 0.0896415188908577, "learning_rate": 2.874289719392225e-05, "loss": 10.5319, "step": 8870 }, { "epoch": 0.04434567654623087, "grad_norm": 0.09824098646640778, "learning_rate": 2.87413952789807e-05, "loss": 10.5328, "step": 8880 }, { "epoch": 0.044395615371170316, "grad_norm": 0.09288741648197174, "learning_rate": 2.873989336403915e-05, "loss": 10.5329, "step": 8890 }, { "epoch": 0.044445554196109766, "grad_norm": 0.09383495151996613, "learning_rate": 2.87383914490976e-05, "loss": 10.5323, "step": 8900 }, { "epoch": 0.044495493021049215, "grad_norm": 0.09476141631603241, "learning_rate": 2.8736889534156052e-05, "loss": 10.5316, "step": 8910 }, { "epoch": 0.044545431845988664, "grad_norm": 0.0921645388007164, "learning_rate": 2.87353876192145e-05, "loss": 10.5313, "step": 8920 }, { "epoch": 0.044595370670928114, "grad_norm": 0.09352537989616394, "learning_rate": 2.873388570427295e-05, "loss": 10.5309, "step": 8930 }, { "epoch": 0.04464530949586756, "grad_norm": 0.08910872787237167, "learning_rate": 2.87323837893314e-05, "loss": 10.5303, "step": 8940 }, { "epoch": 0.04469524832080701, "grad_norm": 0.09727400541305542, "learning_rate": 2.8730881874389846e-05, "loss": 10.5306, "step": 8950 }, { "epoch": 0.04474518714574646, "grad_norm": 0.09605511277914047, "learning_rate": 2.87293799594483e-05, "loss": 10.5287, "step": 8960 }, { "epoch": 0.04479512597068591, "grad_norm": 0.0909186601638794, "learning_rate": 2.8727878044506746e-05, "loss": 10.5279, "step": 8970 }, { "epoch": 0.04484506479562536, "grad_norm": 0.0998016744852066, "learning_rate": 2.8726376129565196e-05, "loss": 10.5307, "step": 8980 }, { "epoch": 0.04489500362056481, "grad_norm": 0.09300219267606735, "learning_rate": 2.8724874214623647e-05, "loss": 10.5304, "step": 8990 }, { "epoch": 0.04494494244550426, "grad_norm": 0.09390337765216827, "learning_rate": 2.8723372299682093e-05, "loss": 10.5274, "step": 9000 }, { "epoch": 0.04499488127044371, "grad_norm": 0.09505102783441544, "learning_rate": 2.8721870384740547e-05, "loss": 10.529, "step": 9010 }, { "epoch": 0.04504482009538316, "grad_norm": 0.09317026287317276, "learning_rate": 2.8720368469798994e-05, "loss": 10.5281, "step": 9020 }, { "epoch": 0.0450947589203226, "grad_norm": 0.09734094887971878, "learning_rate": 2.8718866554857444e-05, "loss": 10.5281, "step": 9030 }, { "epoch": 0.04514469774526205, "grad_norm": 0.09334099292755127, "learning_rate": 2.8717364639915894e-05, "loss": 10.5263, "step": 9040 }, { "epoch": 0.0451946365702015, "grad_norm": 0.0930711030960083, "learning_rate": 2.871586272497434e-05, "loss": 10.526, "step": 9050 }, { "epoch": 0.04524457539514095, "grad_norm": 0.09420018643140793, "learning_rate": 2.8714360810032794e-05, "loss": 10.527, "step": 9060 }, { "epoch": 0.0452945142200804, "grad_norm": 0.0920024886727333, "learning_rate": 2.871285889509124e-05, "loss": 10.5282, "step": 9070 }, { "epoch": 0.04534445304501985, "grad_norm": 0.1000896766781807, "learning_rate": 2.871135698014969e-05, "loss": 10.5276, "step": 9080 }, { "epoch": 0.0453943918699593, "grad_norm": 0.09131770581007004, "learning_rate": 2.870985506520814e-05, "loss": 10.5263, "step": 9090 }, { "epoch": 0.04544433069489875, "grad_norm": 0.09568016231060028, "learning_rate": 2.870835315026659e-05, "loss": 10.5241, "step": 9100 }, { "epoch": 0.0454942695198382, "grad_norm": 0.1060924157500267, "learning_rate": 2.8706851235325042e-05, "loss": 10.5241, "step": 9110 }, { "epoch": 0.04554420834477765, "grad_norm": 0.10000130534172058, "learning_rate": 2.870534932038349e-05, "loss": 10.5277, "step": 9120 }, { "epoch": 0.045594147169717096, "grad_norm": 0.09147665649652481, "learning_rate": 2.8703847405441942e-05, "loss": 10.5265, "step": 9130 }, { "epoch": 0.045644085994656546, "grad_norm": 0.09924129396677017, "learning_rate": 2.870234549050039e-05, "loss": 10.5251, "step": 9140 }, { "epoch": 0.045694024819595995, "grad_norm": 0.09299519658088684, "learning_rate": 2.8700843575558836e-05, "loss": 10.5242, "step": 9150 }, { "epoch": 0.045743963644535444, "grad_norm": 0.09638933092355728, "learning_rate": 2.869934166061729e-05, "loss": 10.5243, "step": 9160 }, { "epoch": 0.045793902469474894, "grad_norm": 0.10021718591451645, "learning_rate": 2.8697839745675736e-05, "loss": 10.5254, "step": 9170 }, { "epoch": 0.04584384129441434, "grad_norm": 0.09364977478981018, "learning_rate": 2.869633783073419e-05, "loss": 10.524, "step": 9180 }, { "epoch": 0.04589378011935379, "grad_norm": 0.09493537992238998, "learning_rate": 2.8694835915792637e-05, "loss": 10.524, "step": 9190 }, { "epoch": 0.04594371894429324, "grad_norm": 0.09197970479726791, "learning_rate": 2.8693334000851083e-05, "loss": 10.5239, "step": 9200 }, { "epoch": 0.04599365776923269, "grad_norm": 0.09489205479621887, "learning_rate": 2.8691832085909537e-05, "loss": 10.5234, "step": 9210 }, { "epoch": 0.04604359659417214, "grad_norm": 0.09727194160223007, "learning_rate": 2.8690330170967984e-05, "loss": 10.5224, "step": 9220 }, { "epoch": 0.04609353541911159, "grad_norm": 0.09218050539493561, "learning_rate": 2.8688828256026437e-05, "loss": 10.5209, "step": 9230 }, { "epoch": 0.04614347424405104, "grad_norm": 0.08994846791028976, "learning_rate": 2.8687326341084884e-05, "loss": 10.5225, "step": 9240 }, { "epoch": 0.04619341306899049, "grad_norm": 0.09504954516887665, "learning_rate": 2.868582442614333e-05, "loss": 10.5213, "step": 9250 }, { "epoch": 0.04624335189392994, "grad_norm": 0.0972573459148407, "learning_rate": 2.8684322511201784e-05, "loss": 10.522, "step": 9260 }, { "epoch": 0.04629329071886939, "grad_norm": 0.09305988252162933, "learning_rate": 2.868282059626023e-05, "loss": 10.5211, "step": 9270 }, { "epoch": 0.04634322954380883, "grad_norm": 0.09756896644830704, "learning_rate": 2.8681318681318685e-05, "loss": 10.5218, "step": 9280 }, { "epoch": 0.04639316836874828, "grad_norm": 0.09515320509672165, "learning_rate": 2.867981676637713e-05, "loss": 10.5219, "step": 9290 }, { "epoch": 0.04644310719368773, "grad_norm": 0.1009177565574646, "learning_rate": 2.867831485143558e-05, "loss": 10.5203, "step": 9300 }, { "epoch": 0.04649304601862718, "grad_norm": 0.09644551575183868, "learning_rate": 2.8676812936494032e-05, "loss": 10.5188, "step": 9310 }, { "epoch": 0.04654298484356663, "grad_norm": 0.09817861765623093, "learning_rate": 2.867531102155248e-05, "loss": 10.5195, "step": 9320 }, { "epoch": 0.04659292366850608, "grad_norm": 0.09904427081346512, "learning_rate": 2.8673809106610932e-05, "loss": 10.5177, "step": 9330 }, { "epoch": 0.04664286249344553, "grad_norm": 0.09583260118961334, "learning_rate": 2.867230719166938e-05, "loss": 10.5209, "step": 9340 }, { "epoch": 0.04669280131838498, "grad_norm": 0.09900553524494171, "learning_rate": 2.8670805276727826e-05, "loss": 10.5185, "step": 9350 }, { "epoch": 0.04674274014332443, "grad_norm": 0.09579353779554367, "learning_rate": 2.866930336178628e-05, "loss": 10.5189, "step": 9360 }, { "epoch": 0.046792678968263876, "grad_norm": 0.09370560199022293, "learning_rate": 2.8667801446844726e-05, "loss": 10.5196, "step": 9370 }, { "epoch": 0.046842617793203326, "grad_norm": 0.09705270081758499, "learning_rate": 2.866629953190318e-05, "loss": 10.5178, "step": 9380 }, { "epoch": 0.046892556618142775, "grad_norm": 0.09159563481807709, "learning_rate": 2.8664797616961627e-05, "loss": 10.5194, "step": 9390 }, { "epoch": 0.046942495443082224, "grad_norm": 0.09416472911834717, "learning_rate": 2.8663295702020073e-05, "loss": 10.5183, "step": 9400 }, { "epoch": 0.046992434268021674, "grad_norm": 0.09269614517688751, "learning_rate": 2.8661793787078527e-05, "loss": 10.5174, "step": 9410 }, { "epoch": 0.04704237309296112, "grad_norm": 0.09696542471647263, "learning_rate": 2.8660291872136974e-05, "loss": 10.5164, "step": 9420 }, { "epoch": 0.04709231191790057, "grad_norm": 0.09807953983545303, "learning_rate": 2.8658789957195427e-05, "loss": 10.5161, "step": 9430 }, { "epoch": 0.04714225074284002, "grad_norm": 0.09456553310155869, "learning_rate": 2.8657288042253874e-05, "loss": 10.5161, "step": 9440 }, { "epoch": 0.04719218956777947, "grad_norm": 0.09571606665849686, "learning_rate": 2.865578612731232e-05, "loss": 10.5163, "step": 9450 }, { "epoch": 0.04724212839271892, "grad_norm": 0.09193666279315948, "learning_rate": 2.8654284212370774e-05, "loss": 10.5135, "step": 9460 }, { "epoch": 0.04729206721765837, "grad_norm": 0.09799723327159882, "learning_rate": 2.865278229742922e-05, "loss": 10.515, "step": 9470 }, { "epoch": 0.04734200604259782, "grad_norm": 0.09496576339006424, "learning_rate": 2.8651280382487675e-05, "loss": 10.5143, "step": 9480 }, { "epoch": 0.04739194486753727, "grad_norm": 0.09969381988048553, "learning_rate": 2.864977846754612e-05, "loss": 10.5162, "step": 9490 }, { "epoch": 0.04744188369247672, "grad_norm": 0.08909635990858078, "learning_rate": 2.8648276552604572e-05, "loss": 10.5178, "step": 9500 }, { "epoch": 0.04749182251741617, "grad_norm": 0.10057427734136581, "learning_rate": 2.8646774637663022e-05, "loss": 10.5147, "step": 9510 }, { "epoch": 0.04754176134235561, "grad_norm": 0.09253738075494766, "learning_rate": 2.864527272272147e-05, "loss": 10.5167, "step": 9520 }, { "epoch": 0.04759170016729506, "grad_norm": 0.09274590760469437, "learning_rate": 2.8643770807779922e-05, "loss": 10.9066, "step": 9530 }, { "epoch": 0.04764163899223451, "grad_norm": 0.09520485997200012, "learning_rate": 2.864226889283837e-05, "loss": 10.5115, "step": 9540 }, { "epoch": 0.04769157781717396, "grad_norm": 0.09519509971141815, "learning_rate": 2.864076697789682e-05, "loss": 10.5245, "step": 9550 }, { "epoch": 0.04774151664211341, "grad_norm": 0.09528204053640366, "learning_rate": 2.863926506295527e-05, "loss": 10.5133, "step": 9560 }, { "epoch": 0.04779145546705286, "grad_norm": 0.09493537992238998, "learning_rate": 2.8637763148013716e-05, "loss": 10.5144, "step": 9570 }, { "epoch": 0.04784139429199231, "grad_norm": 0.09186563640832901, "learning_rate": 2.863626123307217e-05, "loss": 10.5186, "step": 9580 }, { "epoch": 0.04789133311693176, "grad_norm": 0.09734216332435608, "learning_rate": 2.8634759318130617e-05, "loss": 10.5126, "step": 9590 }, { "epoch": 0.04794127194187121, "grad_norm": 0.09098707884550095, "learning_rate": 2.8633257403189067e-05, "loss": 11.7738, "step": 9600 }, { "epoch": 0.047991210766810656, "grad_norm": 0.09297781437635422, "learning_rate": 2.8631755488247517e-05, "loss": 10.6659, "step": 9610 }, { "epoch": 0.048041149591750106, "grad_norm": 0.09503110498189926, "learning_rate": 2.8630253573305964e-05, "loss": 10.5111, "step": 9620 }, { "epoch": 0.048091088416689555, "grad_norm": 0.09521497786045074, "learning_rate": 2.8628751658364417e-05, "loss": 10.5093, "step": 9630 }, { "epoch": 0.048141027241629004, "grad_norm": 0.09482381492853165, "learning_rate": 2.8627249743422864e-05, "loss": 10.5128, "step": 9640 }, { "epoch": 0.048190966066568454, "grad_norm": 0.09641779214143753, "learning_rate": 2.8625747828481314e-05, "loss": 10.51, "step": 9650 }, { "epoch": 0.0482409048915079, "grad_norm": 0.10081770271062851, "learning_rate": 2.8624245913539764e-05, "loss": 10.511, "step": 9660 }, { "epoch": 0.04829084371644735, "grad_norm": 0.09441135823726654, "learning_rate": 2.862274399859821e-05, "loss": 10.5093, "step": 9670 }, { "epoch": 0.0483407825413868, "grad_norm": 0.09851568937301636, "learning_rate": 2.8621242083656665e-05, "loss": 10.5099, "step": 9680 }, { "epoch": 0.04839072136632625, "grad_norm": 0.09381608664989471, "learning_rate": 2.861974016871511e-05, "loss": 10.5099, "step": 9690 }, { "epoch": 0.0484406601912657, "grad_norm": 0.0943373441696167, "learning_rate": 2.8618238253773562e-05, "loss": 10.5086, "step": 9700 }, { "epoch": 0.04849059901620515, "grad_norm": 0.09664776921272278, "learning_rate": 2.8616736338832012e-05, "loss": 10.51, "step": 9710 }, { "epoch": 0.0485405378411446, "grad_norm": 0.09082762151956558, "learning_rate": 2.861523442389046e-05, "loss": 10.509, "step": 9720 }, { "epoch": 0.04859047666608405, "grad_norm": 0.1018294245004654, "learning_rate": 2.8613732508948912e-05, "loss": 10.5095, "step": 9730 }, { "epoch": 0.0486404154910235, "grad_norm": 0.09788943082094193, "learning_rate": 2.861223059400736e-05, "loss": 10.506, "step": 9740 }, { "epoch": 0.04869035431596295, "grad_norm": 0.0945865660905838, "learning_rate": 2.861072867906581e-05, "loss": 10.5045, "step": 9750 }, { "epoch": 0.0487402931409024, "grad_norm": 0.09245441854000092, "learning_rate": 2.860922676412426e-05, "loss": 10.5076, "step": 9760 }, { "epoch": 0.04879023196584184, "grad_norm": 0.09692434966564178, "learning_rate": 2.8607724849182706e-05, "loss": 10.5044, "step": 9770 }, { "epoch": 0.04884017079078129, "grad_norm": 0.10064822435379028, "learning_rate": 2.860622293424116e-05, "loss": 10.5054, "step": 9780 }, { "epoch": 0.04889010961572074, "grad_norm": 0.09322358667850494, "learning_rate": 2.8604721019299607e-05, "loss": 10.5048, "step": 9790 }, { "epoch": 0.04894004844066019, "grad_norm": 0.09551732242107391, "learning_rate": 2.8603219104358057e-05, "loss": 10.5049, "step": 9800 }, { "epoch": 0.04898998726559964, "grad_norm": 0.09352359920740128, "learning_rate": 2.8601717189416507e-05, "loss": 10.5055, "step": 9810 }, { "epoch": 0.04903992609053909, "grad_norm": 0.09425562620162964, "learning_rate": 2.8600215274474957e-05, "loss": 10.5046, "step": 9820 }, { "epoch": 0.04908986491547854, "grad_norm": 0.09174810349941254, "learning_rate": 2.8598713359533407e-05, "loss": 10.5061, "step": 9830 }, { "epoch": 0.04913980374041799, "grad_norm": 0.0945669412612915, "learning_rate": 2.8597211444591854e-05, "loss": 10.5056, "step": 9840 }, { "epoch": 0.049189742565357436, "grad_norm": 0.1002650186419487, "learning_rate": 2.8595709529650304e-05, "loss": 10.5034, "step": 9850 }, { "epoch": 0.049239681390296886, "grad_norm": 0.09847228229045868, "learning_rate": 2.8594207614708754e-05, "loss": 10.503, "step": 9860 }, { "epoch": 0.049289620215236335, "grad_norm": 0.10454359650611877, "learning_rate": 2.8592705699767205e-05, "loss": 10.504, "step": 9870 }, { "epoch": 0.049339559040175784, "grad_norm": 0.09625834971666336, "learning_rate": 2.8591203784825655e-05, "loss": 10.5023, "step": 9880 }, { "epoch": 0.049389497865115234, "grad_norm": 0.09287148714065552, "learning_rate": 2.85897018698841e-05, "loss": 10.5059, "step": 9890 }, { "epoch": 0.04943943669005468, "grad_norm": 0.09204880893230438, "learning_rate": 2.8588199954942552e-05, "loss": 10.5014, "step": 9900 }, { "epoch": 0.04948937551499413, "grad_norm": 0.09391821175813675, "learning_rate": 2.8586698040001002e-05, "loss": 10.5023, "step": 9910 }, { "epoch": 0.04953931433993358, "grad_norm": 0.09617898613214493, "learning_rate": 2.8585196125059452e-05, "loss": 10.5032, "step": 9920 }, { "epoch": 0.04958925316487303, "grad_norm": 0.09573251754045486, "learning_rate": 2.8583694210117902e-05, "loss": 10.5024, "step": 9930 }, { "epoch": 0.04963919198981248, "grad_norm": 0.09664706140756607, "learning_rate": 2.858219229517635e-05, "loss": 10.5018, "step": 9940 }, { "epoch": 0.04968913081475193, "grad_norm": 0.09707572311162949, "learning_rate": 2.85806903802348e-05, "loss": 10.5026, "step": 9950 }, { "epoch": 0.04973906963969138, "grad_norm": 0.09420596808195114, "learning_rate": 2.857918846529325e-05, "loss": 10.5004, "step": 9960 }, { "epoch": 0.04978900846463083, "grad_norm": 0.09333644807338715, "learning_rate": 2.85776865503517e-05, "loss": 10.4996, "step": 9970 }, { "epoch": 0.04983894728957028, "grad_norm": 0.09561195969581604, "learning_rate": 2.857618463541015e-05, "loss": 10.5005, "step": 9980 }, { "epoch": 0.04988888611450973, "grad_norm": 0.09285534173250198, "learning_rate": 2.8574682720468597e-05, "loss": 10.5016, "step": 9990 }, { "epoch": 0.04993882493944918, "grad_norm": 0.09716643393039703, "learning_rate": 2.8573180805527047e-05, "loss": 10.4969, "step": 10000 }, { "epoch": 0.04998876376438863, "grad_norm": 0.09244739264249802, "learning_rate": 2.8571678890585497e-05, "loss": 10.4983, "step": 10010 }, { "epoch": 0.05003870258932807, "grad_norm": 0.099833182990551, "learning_rate": 2.8570176975643947e-05, "loss": 10.5007, "step": 10020 }, { "epoch": 0.05008864141426752, "grad_norm": 0.09650156646966934, "learning_rate": 2.8568675060702397e-05, "loss": 10.4993, "step": 10030 }, { "epoch": 0.05013858023920697, "grad_norm": 0.10166086256504059, "learning_rate": 2.8567173145760844e-05, "loss": 10.4981, "step": 10040 }, { "epoch": 0.05018851906414642, "grad_norm": 0.09234780818223953, "learning_rate": 2.8565671230819294e-05, "loss": 10.4969, "step": 10050 }, { "epoch": 0.05023845788908587, "grad_norm": 0.0958356112241745, "learning_rate": 2.8564169315877744e-05, "loss": 10.4996, "step": 10060 }, { "epoch": 0.05028839671402532, "grad_norm": 0.09565756469964981, "learning_rate": 2.8562667400936195e-05, "loss": 10.4971, "step": 10070 }, { "epoch": 0.05033833553896477, "grad_norm": 0.09427642822265625, "learning_rate": 2.8561165485994645e-05, "loss": 10.4966, "step": 10080 }, { "epoch": 0.050388274363904216, "grad_norm": 0.0930957943201065, "learning_rate": 2.855966357105309e-05, "loss": 10.4987, "step": 10090 }, { "epoch": 0.050438213188843666, "grad_norm": 0.09489066898822784, "learning_rate": 2.8558161656111542e-05, "loss": 10.496, "step": 10100 }, { "epoch": 0.050488152013783115, "grad_norm": 0.09817587584257126, "learning_rate": 2.8556659741169992e-05, "loss": 10.4956, "step": 10110 }, { "epoch": 0.050538090838722564, "grad_norm": 0.09142786264419556, "learning_rate": 2.8555157826228442e-05, "loss": 10.4974, "step": 10120 }, { "epoch": 0.050588029663662014, "grad_norm": 0.09748794138431549, "learning_rate": 2.8553655911286892e-05, "loss": 10.4959, "step": 10130 }, { "epoch": 0.05063796848860146, "grad_norm": 0.09116542339324951, "learning_rate": 2.8552153996345343e-05, "loss": 10.498, "step": 10140 }, { "epoch": 0.05068790731354091, "grad_norm": 0.09313584864139557, "learning_rate": 2.855065208140379e-05, "loss": 10.4932, "step": 10150 }, { "epoch": 0.05073784613848036, "grad_norm": 0.09887810051441193, "learning_rate": 2.854915016646224e-05, "loss": 10.4945, "step": 10160 }, { "epoch": 0.05078778496341981, "grad_norm": 0.09741490334272385, "learning_rate": 2.854764825152069e-05, "loss": 10.4943, "step": 10170 }, { "epoch": 0.05083772378835926, "grad_norm": 0.09374571591615677, "learning_rate": 2.854614633657914e-05, "loss": 10.4943, "step": 10180 }, { "epoch": 0.05088766261329871, "grad_norm": 0.09434989094734192, "learning_rate": 2.854464442163759e-05, "loss": 10.4963, "step": 10190 }, { "epoch": 0.05093760143823816, "grad_norm": 0.09508496522903442, "learning_rate": 2.8543142506696037e-05, "loss": 10.4925, "step": 10200 }, { "epoch": 0.05098754026317761, "grad_norm": 0.09928717464208603, "learning_rate": 2.8541640591754487e-05, "loss": 10.4933, "step": 10210 }, { "epoch": 0.05103747908811706, "grad_norm": 0.09969381988048553, "learning_rate": 2.8540138676812937e-05, "loss": 10.4932, "step": 10220 }, { "epoch": 0.05108741791305651, "grad_norm": 0.0988709032535553, "learning_rate": 2.8538636761871387e-05, "loss": 10.4942, "step": 10230 }, { "epoch": 0.05113735673799596, "grad_norm": 0.09207888692617416, "learning_rate": 2.8537134846929838e-05, "loss": 10.492, "step": 10240 }, { "epoch": 0.05118729556293541, "grad_norm": 0.09073358029127121, "learning_rate": 2.8535632931988284e-05, "loss": 10.494, "step": 10250 }, { "epoch": 0.05123723438787485, "grad_norm": 0.09680426120758057, "learning_rate": 2.8534131017046734e-05, "loss": 10.4916, "step": 10260 }, { "epoch": 0.0512871732128143, "grad_norm": 0.09444583207368851, "learning_rate": 2.8532629102105185e-05, "loss": 10.4934, "step": 10270 }, { "epoch": 0.05133711203775375, "grad_norm": 0.09547161310911179, "learning_rate": 2.8531127187163635e-05, "loss": 10.4909, "step": 10280 }, { "epoch": 0.0513870508626932, "grad_norm": 0.09873054176568985, "learning_rate": 2.8529625272222085e-05, "loss": 10.4924, "step": 10290 }, { "epoch": 0.05143698968763265, "grad_norm": 0.09690874069929123, "learning_rate": 2.8528123357280532e-05, "loss": 10.4925, "step": 10300 }, { "epoch": 0.0514869285125721, "grad_norm": 0.09330197423696518, "learning_rate": 2.8526621442338982e-05, "loss": 10.4923, "step": 10310 }, { "epoch": 0.05153686733751155, "grad_norm": 0.10089316219091415, "learning_rate": 2.8525119527397432e-05, "loss": 10.4924, "step": 10320 }, { "epoch": 0.051586806162450996, "grad_norm": 0.0995856523513794, "learning_rate": 2.8523617612455882e-05, "loss": 10.4916, "step": 10330 }, { "epoch": 0.051636744987390445, "grad_norm": 0.09683484584093094, "learning_rate": 2.8522115697514333e-05, "loss": 10.4886, "step": 10340 }, { "epoch": 0.051686683812329895, "grad_norm": 0.09331785142421722, "learning_rate": 2.852061378257278e-05, "loss": 10.4894, "step": 10350 }, { "epoch": 0.051736622637269344, "grad_norm": 0.09782253205776215, "learning_rate": 2.851911186763123e-05, "loss": 10.4884, "step": 10360 }, { "epoch": 0.051786561462208794, "grad_norm": 0.09803865849971771, "learning_rate": 2.851760995268968e-05, "loss": 10.4905, "step": 10370 }, { "epoch": 0.05183650028714824, "grad_norm": 0.09339244663715363, "learning_rate": 2.851610803774813e-05, "loss": 10.4909, "step": 10380 }, { "epoch": 0.05188643911208769, "grad_norm": 0.09537281095981598, "learning_rate": 2.851460612280658e-05, "loss": 10.4881, "step": 10390 }, { "epoch": 0.05193637793702714, "grad_norm": 0.09507238864898682, "learning_rate": 2.8513104207865027e-05, "loss": 10.4862, "step": 10400 }, { "epoch": 0.05198631676196659, "grad_norm": 0.09395944327116013, "learning_rate": 2.8511602292923477e-05, "loss": 10.486, "step": 10410 }, { "epoch": 0.05203625558690604, "grad_norm": 0.09412617236375809, "learning_rate": 2.8510100377981927e-05, "loss": 10.4888, "step": 10420 }, { "epoch": 0.05208619441184549, "grad_norm": 0.10089904814958572, "learning_rate": 2.8508598463040377e-05, "loss": 10.488, "step": 10430 }, { "epoch": 0.05213613323678494, "grad_norm": 0.09662104398012161, "learning_rate": 2.8507096548098828e-05, "loss": 10.4879, "step": 10440 }, { "epoch": 0.05218607206172439, "grad_norm": 0.09902186691761017, "learning_rate": 2.8505594633157274e-05, "loss": 10.4877, "step": 10450 }, { "epoch": 0.05223601088666384, "grad_norm": 0.09871252626180649, "learning_rate": 2.8504092718215728e-05, "loss": 10.489, "step": 10460 }, { "epoch": 0.05228594971160329, "grad_norm": 0.09363856911659241, "learning_rate": 2.8502590803274175e-05, "loss": 10.4855, "step": 10470 }, { "epoch": 0.05233588853654274, "grad_norm": 0.0946008712053299, "learning_rate": 2.8501088888332625e-05, "loss": 10.4855, "step": 10480 }, { "epoch": 0.05238582736148219, "grad_norm": 0.09601443260908127, "learning_rate": 2.8499586973391075e-05, "loss": 10.4831, "step": 10490 }, { "epoch": 0.05243576618642164, "grad_norm": 0.09569672495126724, "learning_rate": 2.8498085058449522e-05, "loss": 10.4833, "step": 10500 }, { "epoch": 0.05248570501136108, "grad_norm": 0.09212665259838104, "learning_rate": 2.8496583143507975e-05, "loss": 10.4858, "step": 10510 }, { "epoch": 0.05253564383630053, "grad_norm": 0.09606766700744629, "learning_rate": 2.8495081228566422e-05, "loss": 10.4837, "step": 10520 }, { "epoch": 0.05258558266123998, "grad_norm": 0.09896976500749588, "learning_rate": 2.8493579313624872e-05, "loss": 10.4854, "step": 10530 }, { "epoch": 0.05263552148617943, "grad_norm": 0.0996663048863411, "learning_rate": 2.8492077398683323e-05, "loss": 10.4849, "step": 10540 }, { "epoch": 0.05268546031111888, "grad_norm": 0.09384378045797348, "learning_rate": 2.849057548374177e-05, "loss": 10.4849, "step": 10550 }, { "epoch": 0.05273539913605833, "grad_norm": 0.09996800869703293, "learning_rate": 2.8489073568800223e-05, "loss": 10.4818, "step": 10560 }, { "epoch": 0.052785337960997776, "grad_norm": 0.09955079853534698, "learning_rate": 2.848757165385867e-05, "loss": 10.4836, "step": 10570 }, { "epoch": 0.052835276785937225, "grad_norm": 0.0904826894402504, "learning_rate": 2.848606973891712e-05, "loss": 10.483, "step": 10580 }, { "epoch": 0.052885215610876675, "grad_norm": 0.0925852507352829, "learning_rate": 2.848456782397557e-05, "loss": 10.4826, "step": 10590 }, { "epoch": 0.052935154435816124, "grad_norm": 0.09814697504043579, "learning_rate": 2.8483065909034017e-05, "loss": 10.483, "step": 10600 }, { "epoch": 0.052985093260755574, "grad_norm": 0.09643968194723129, "learning_rate": 2.848156399409247e-05, "loss": 10.4833, "step": 10610 }, { "epoch": 0.05303503208569502, "grad_norm": 0.09589682519435883, "learning_rate": 2.8480062079150917e-05, "loss": 10.4814, "step": 10620 }, { "epoch": 0.05308497091063447, "grad_norm": 0.09177594631910324, "learning_rate": 2.8478560164209367e-05, "loss": 10.4801, "step": 10630 }, { "epoch": 0.05313490973557392, "grad_norm": 0.08999846875667572, "learning_rate": 2.8477058249267818e-05, "loss": 10.4834, "step": 10640 }, { "epoch": 0.05318484856051337, "grad_norm": 0.09998583048582077, "learning_rate": 2.8475556334326264e-05, "loss": 10.4802, "step": 10650 }, { "epoch": 0.05323478738545282, "grad_norm": 0.0951545462012291, "learning_rate": 2.8474054419384718e-05, "loss": 10.4797, "step": 10660 }, { "epoch": 0.05328472621039227, "grad_norm": 0.09384284913539886, "learning_rate": 2.8472552504443165e-05, "loss": 10.4819, "step": 10670 }, { "epoch": 0.05333466503533172, "grad_norm": 0.09465983510017395, "learning_rate": 2.8471050589501615e-05, "loss": 10.4797, "step": 10680 }, { "epoch": 0.05338460386027117, "grad_norm": 0.09355910867452621, "learning_rate": 2.8469548674560065e-05, "loss": 10.4793, "step": 10690 }, { "epoch": 0.05343454268521062, "grad_norm": 0.10254157334566116, "learning_rate": 2.8468046759618512e-05, "loss": 10.481, "step": 10700 }, { "epoch": 0.05348448151015007, "grad_norm": 0.09623821824789047, "learning_rate": 2.8466544844676965e-05, "loss": 10.4798, "step": 10710 }, { "epoch": 0.05353442033508952, "grad_norm": 0.0956728383898735, "learning_rate": 2.8465042929735412e-05, "loss": 10.4775, "step": 10720 }, { "epoch": 0.05358435916002897, "grad_norm": 0.1043584942817688, "learning_rate": 2.8463541014793862e-05, "loss": 10.4797, "step": 10730 }, { "epoch": 0.05363429798496842, "grad_norm": 0.09671546518802643, "learning_rate": 2.8462039099852313e-05, "loss": 10.4768, "step": 10740 }, { "epoch": 0.053684236809907866, "grad_norm": 0.0880408063530922, "learning_rate": 2.846053718491076e-05, "loss": 10.4796, "step": 10750 }, { "epoch": 0.05373417563484731, "grad_norm": 0.0982058197259903, "learning_rate": 2.8459035269969213e-05, "loss": 10.4792, "step": 10760 }, { "epoch": 0.05378411445978676, "grad_norm": 0.09881541877985, "learning_rate": 2.845753335502766e-05, "loss": 10.4785, "step": 10770 }, { "epoch": 0.05383405328472621, "grad_norm": 0.10053160041570663, "learning_rate": 2.8456031440086113e-05, "loss": 10.4742, "step": 10780 }, { "epoch": 0.05388399210966566, "grad_norm": 0.0990302637219429, "learning_rate": 2.845452952514456e-05, "loss": 10.4763, "step": 10790 }, { "epoch": 0.05393393093460511, "grad_norm": 0.09606090933084488, "learning_rate": 2.8453027610203007e-05, "loss": 10.478, "step": 10800 }, { "epoch": 0.053983869759544556, "grad_norm": 0.09462147206068039, "learning_rate": 2.845152569526146e-05, "loss": 10.4758, "step": 10810 }, { "epoch": 0.054033808584484005, "grad_norm": 0.09580814093351364, "learning_rate": 2.8450023780319907e-05, "loss": 10.4743, "step": 10820 }, { "epoch": 0.054083747409423455, "grad_norm": 0.09848089516162872, "learning_rate": 2.844852186537836e-05, "loss": 10.4749, "step": 10830 }, { "epoch": 0.054133686234362904, "grad_norm": 0.09434827417135239, "learning_rate": 2.8447019950436808e-05, "loss": 10.4777, "step": 10840 }, { "epoch": 0.054183625059302354, "grad_norm": 0.09295500814914703, "learning_rate": 2.8445518035495254e-05, "loss": 10.4744, "step": 10850 }, { "epoch": 0.0542335638842418, "grad_norm": 0.0959131121635437, "learning_rate": 2.8444016120553708e-05, "loss": 10.4755, "step": 10860 }, { "epoch": 0.05428350270918125, "grad_norm": 0.09555883705615997, "learning_rate": 2.8442514205612155e-05, "loss": 10.4737, "step": 10870 }, { "epoch": 0.0543334415341207, "grad_norm": 0.09721937030553818, "learning_rate": 2.8441012290670608e-05, "loss": 10.4752, "step": 10880 }, { "epoch": 0.05438338035906015, "grad_norm": 0.09354712814092636, "learning_rate": 2.8439510375729055e-05, "loss": 10.4721, "step": 10890 }, { "epoch": 0.0544333191839996, "grad_norm": 0.09647790342569351, "learning_rate": 2.8438008460787502e-05, "loss": 10.4745, "step": 10900 }, { "epoch": 0.05448325800893905, "grad_norm": 0.10081273317337036, "learning_rate": 2.8436506545845955e-05, "loss": 10.4725, "step": 10910 }, { "epoch": 0.0545331968338785, "grad_norm": 0.09475501626729965, "learning_rate": 2.8435004630904402e-05, "loss": 10.4723, "step": 10920 }, { "epoch": 0.05458313565881795, "grad_norm": 0.09256067872047424, "learning_rate": 2.8433502715962856e-05, "loss": 10.4717, "step": 10930 }, { "epoch": 0.0546330744837574, "grad_norm": 0.09541960060596466, "learning_rate": 2.8432000801021303e-05, "loss": 10.4727, "step": 10940 }, { "epoch": 0.05468301330869685, "grad_norm": 0.09717325121164322, "learning_rate": 2.843049888607975e-05, "loss": 10.4725, "step": 10950 }, { "epoch": 0.0547329521336363, "grad_norm": 0.09780695289373398, "learning_rate": 2.8428996971138203e-05, "loss": 10.4726, "step": 10960 }, { "epoch": 0.05478289095857575, "grad_norm": 0.09682448208332062, "learning_rate": 2.842749505619665e-05, "loss": 10.4728, "step": 10970 }, { "epoch": 0.0548328297835152, "grad_norm": 0.0957455188035965, "learning_rate": 2.8425993141255103e-05, "loss": 10.4719, "step": 10980 }, { "epoch": 0.054882768608454646, "grad_norm": 0.0956406518816948, "learning_rate": 2.842449122631355e-05, "loss": 10.4714, "step": 10990 }, { "epoch": 0.054932707433394096, "grad_norm": 0.0934106856584549, "learning_rate": 2.8422989311371997e-05, "loss": 10.4694, "step": 11000 }, { "epoch": 0.05498264625833354, "grad_norm": 0.09818681329488754, "learning_rate": 2.842148739643045e-05, "loss": 10.4721, "step": 11010 }, { "epoch": 0.05503258508327299, "grad_norm": 0.09489887952804565, "learning_rate": 2.8419985481488897e-05, "loss": 10.4708, "step": 11020 }, { "epoch": 0.05508252390821244, "grad_norm": 0.09038610011339188, "learning_rate": 2.841848356654735e-05, "loss": 10.4731, "step": 11030 }, { "epoch": 0.05513246273315189, "grad_norm": 0.0944511890411377, "learning_rate": 2.8416981651605798e-05, "loss": 10.4708, "step": 11040 }, { "epoch": 0.055182401558091336, "grad_norm": 0.09606794267892838, "learning_rate": 2.8415479736664244e-05, "loss": 10.4689, "step": 11050 }, { "epoch": 0.055232340383030785, "grad_norm": 0.09804631024599075, "learning_rate": 2.8413977821722698e-05, "loss": 10.4698, "step": 11060 }, { "epoch": 0.055282279207970235, "grad_norm": 0.10113038122653961, "learning_rate": 2.8412475906781145e-05, "loss": 10.4686, "step": 11070 }, { "epoch": 0.055332218032909684, "grad_norm": 0.10174177587032318, "learning_rate": 2.8410973991839598e-05, "loss": 10.4694, "step": 11080 }, { "epoch": 0.055382156857849134, "grad_norm": 0.09673687815666199, "learning_rate": 2.8409472076898045e-05, "loss": 10.4677, "step": 11090 }, { "epoch": 0.05543209568278858, "grad_norm": 0.09577104449272156, "learning_rate": 2.8407970161956495e-05, "loss": 10.4673, "step": 11100 }, { "epoch": 0.05548203450772803, "grad_norm": 0.09360501170158386, "learning_rate": 2.8406468247014945e-05, "loss": 10.47, "step": 11110 }, { "epoch": 0.05553197333266748, "grad_norm": 0.09364285320043564, "learning_rate": 2.8404966332073392e-05, "loss": 10.4676, "step": 11120 }, { "epoch": 0.05558191215760693, "grad_norm": 0.093747578561306, "learning_rate": 2.8403464417131846e-05, "loss": 10.4669, "step": 11130 }, { "epoch": 0.05563185098254638, "grad_norm": 0.0995292142033577, "learning_rate": 2.8401962502190293e-05, "loss": 10.4677, "step": 11140 }, { "epoch": 0.05568178980748583, "grad_norm": 0.0943877249956131, "learning_rate": 2.8400460587248743e-05, "loss": 10.4691, "step": 11150 }, { "epoch": 0.05573172863242528, "grad_norm": 0.09354741126298904, "learning_rate": 2.8398958672307193e-05, "loss": 10.4657, "step": 11160 }, { "epoch": 0.05578166745736473, "grad_norm": 0.09523334354162216, "learning_rate": 2.839745675736564e-05, "loss": 10.4658, "step": 11170 }, { "epoch": 0.05583160628230418, "grad_norm": 0.09460029006004333, "learning_rate": 2.8395954842424093e-05, "loss": 10.4631, "step": 11180 }, { "epoch": 0.05588154510724363, "grad_norm": 0.09529443830251694, "learning_rate": 2.839445292748254e-05, "loss": 10.4652, "step": 11190 }, { "epoch": 0.05593148393218308, "grad_norm": 0.09970243275165558, "learning_rate": 2.839295101254099e-05, "loss": 10.4644, "step": 11200 }, { "epoch": 0.05598142275712253, "grad_norm": 0.09669343382120132, "learning_rate": 2.839144909759944e-05, "loss": 10.465, "step": 11210 }, { "epoch": 0.05603136158206198, "grad_norm": 0.0948340967297554, "learning_rate": 2.8389947182657887e-05, "loss": 10.4656, "step": 11220 }, { "epoch": 0.056081300407001426, "grad_norm": 0.09210893511772156, "learning_rate": 2.838844526771634e-05, "loss": 10.4647, "step": 11230 }, { "epoch": 0.056131239231940876, "grad_norm": 0.09912335872650146, "learning_rate": 2.8386943352774788e-05, "loss": 10.4635, "step": 11240 }, { "epoch": 0.05618117805688032, "grad_norm": 0.09609910845756531, "learning_rate": 2.8385441437833238e-05, "loss": 10.4647, "step": 11250 }, { "epoch": 0.05623111688181977, "grad_norm": 0.09431801736354828, "learning_rate": 2.8383939522891688e-05, "loss": 10.4616, "step": 11260 }, { "epoch": 0.05628105570675922, "grad_norm": 0.09194690734148026, "learning_rate": 2.8382437607950135e-05, "loss": 10.4663, "step": 11270 }, { "epoch": 0.05633099453169867, "grad_norm": 0.09139011800289154, "learning_rate": 2.8380935693008588e-05, "loss": 10.462, "step": 11280 }, { "epoch": 0.056380933356638116, "grad_norm": 0.09335776418447495, "learning_rate": 2.8379433778067035e-05, "loss": 10.4619, "step": 11290 }, { "epoch": 0.056430872181577565, "grad_norm": 0.0932675376534462, "learning_rate": 2.8377931863125485e-05, "loss": 10.4618, "step": 11300 }, { "epoch": 0.056480811006517015, "grad_norm": 0.09276323765516281, "learning_rate": 2.8376429948183935e-05, "loss": 10.4618, "step": 11310 }, { "epoch": 0.056530749831456464, "grad_norm": 0.09329287707805634, "learning_rate": 2.8374928033242382e-05, "loss": 10.4613, "step": 11320 }, { "epoch": 0.056580688656395914, "grad_norm": 0.09470265358686447, "learning_rate": 2.8373426118300836e-05, "loss": 10.4618, "step": 11330 }, { "epoch": 0.05663062748133536, "grad_norm": 0.09163019806146622, "learning_rate": 2.8371924203359283e-05, "loss": 10.4631, "step": 11340 }, { "epoch": 0.05668056630627481, "grad_norm": 0.09264067560434341, "learning_rate": 2.8370422288417733e-05, "loss": 10.4619, "step": 11350 }, { "epoch": 0.05673050513121426, "grad_norm": 0.09891056269407272, "learning_rate": 2.8368920373476183e-05, "loss": 10.459, "step": 11360 }, { "epoch": 0.05678044395615371, "grad_norm": 0.09387898445129395, "learning_rate": 2.836741845853463e-05, "loss": 10.4621, "step": 11370 }, { "epoch": 0.05683038278109316, "grad_norm": 0.09117914736270905, "learning_rate": 2.8365916543593083e-05, "loss": 10.4607, "step": 11380 }, { "epoch": 0.05688032160603261, "grad_norm": 0.09382776916027069, "learning_rate": 2.836441462865153e-05, "loss": 10.4601, "step": 11390 }, { "epoch": 0.05693026043097206, "grad_norm": 0.09707043319940567, "learning_rate": 2.836291271370998e-05, "loss": 10.4598, "step": 11400 }, { "epoch": 0.05698019925591151, "grad_norm": 0.09478152543306351, "learning_rate": 2.836141079876843e-05, "loss": 10.4594, "step": 11410 }, { "epoch": 0.05703013808085096, "grad_norm": 0.09193360805511475, "learning_rate": 2.835990888382688e-05, "loss": 10.4586, "step": 11420 }, { "epoch": 0.05708007690579041, "grad_norm": 0.09122911840677261, "learning_rate": 2.835840696888533e-05, "loss": 10.4587, "step": 11430 }, { "epoch": 0.05713001573072986, "grad_norm": 0.09300627559423447, "learning_rate": 2.8356905053943778e-05, "loss": 10.4607, "step": 11440 }, { "epoch": 0.05717995455566931, "grad_norm": 0.09430982172489166, "learning_rate": 2.8355403139002228e-05, "loss": 10.4581, "step": 11450 }, { "epoch": 0.05722989338060876, "grad_norm": 0.1006307452917099, "learning_rate": 2.8353901224060678e-05, "loss": 10.4581, "step": 11460 }, { "epoch": 0.057279832205548206, "grad_norm": 0.09385743737220764, "learning_rate": 2.8352399309119128e-05, "loss": 10.4586, "step": 11470 }, { "epoch": 0.057329771030487656, "grad_norm": 0.09824958443641663, "learning_rate": 2.835089739417758e-05, "loss": 10.4587, "step": 11480 }, { "epoch": 0.057379709855427105, "grad_norm": 0.0924108624458313, "learning_rate": 2.8349395479236025e-05, "loss": 10.4568, "step": 11490 }, { "epoch": 0.05742964868036655, "grad_norm": 0.09391859918832779, "learning_rate": 2.834789356429448e-05, "loss": 10.4601, "step": 11500 }, { "epoch": 0.057479587505306, "grad_norm": 0.0961366668343544, "learning_rate": 2.8346391649352925e-05, "loss": 10.4558, "step": 11510 }, { "epoch": 0.05752952633024545, "grad_norm": 0.0933762714266777, "learning_rate": 2.8344889734411376e-05, "loss": 10.4547, "step": 11520 }, { "epoch": 0.057579465155184896, "grad_norm": 0.09340479224920273, "learning_rate": 2.8343387819469826e-05, "loss": 10.4567, "step": 11530 }, { "epoch": 0.057629403980124345, "grad_norm": 0.09362339973449707, "learning_rate": 2.8341885904528273e-05, "loss": 10.4587, "step": 11540 }, { "epoch": 0.057679342805063795, "grad_norm": 0.09768813848495483, "learning_rate": 2.8340383989586726e-05, "loss": 10.4558, "step": 11550 }, { "epoch": 0.057729281630003244, "grad_norm": 0.09130477160215378, "learning_rate": 2.8338882074645173e-05, "loss": 10.4552, "step": 11560 }, { "epoch": 0.057779220454942694, "grad_norm": 0.0935993641614914, "learning_rate": 2.8337380159703623e-05, "loss": 10.4522, "step": 11570 }, { "epoch": 0.05782915927988214, "grad_norm": 0.09301052987575531, "learning_rate": 2.8335878244762073e-05, "loss": 10.4558, "step": 11580 }, { "epoch": 0.05787909810482159, "grad_norm": 0.09924156963825226, "learning_rate": 2.833437632982052e-05, "loss": 10.4555, "step": 11590 }, { "epoch": 0.05792903692976104, "grad_norm": 0.09675630927085876, "learning_rate": 2.8332874414878974e-05, "loss": 10.4523, "step": 11600 }, { "epoch": 0.05797897575470049, "grad_norm": 0.09440577030181885, "learning_rate": 2.833137249993742e-05, "loss": 10.4536, "step": 11610 }, { "epoch": 0.05802891457963994, "grad_norm": 0.09317108243703842, "learning_rate": 2.832987058499587e-05, "loss": 10.4548, "step": 11620 }, { "epoch": 0.05807885340457939, "grad_norm": 0.09359714388847351, "learning_rate": 2.832836867005432e-05, "loss": 10.4534, "step": 11630 }, { "epoch": 0.05812879222951884, "grad_norm": 0.09872293472290039, "learning_rate": 2.8326866755112768e-05, "loss": 10.4551, "step": 11640 }, { "epoch": 0.05817873105445829, "grad_norm": 0.0959540382027626, "learning_rate": 2.832536484017122e-05, "loss": 10.4505, "step": 11650 }, { "epoch": 0.05822866987939774, "grad_norm": 0.09325166791677475, "learning_rate": 2.8323862925229668e-05, "loss": 10.4514, "step": 11660 }, { "epoch": 0.05827860870433719, "grad_norm": 0.0949905589222908, "learning_rate": 2.8322361010288118e-05, "loss": 10.4537, "step": 11670 }, { "epoch": 0.05832854752927664, "grad_norm": 0.09572743624448776, "learning_rate": 2.832085909534657e-05, "loss": 10.4515, "step": 11680 }, { "epoch": 0.05837848635421609, "grad_norm": 0.09633267670869827, "learning_rate": 2.8319357180405015e-05, "loss": 10.4507, "step": 11690 }, { "epoch": 0.05842842517915554, "grad_norm": 0.09619545936584473, "learning_rate": 2.831785526546347e-05, "loss": 10.4505, "step": 11700 }, { "epoch": 0.058478364004094986, "grad_norm": 0.0950017124414444, "learning_rate": 2.8316353350521915e-05, "loss": 10.4486, "step": 11710 }, { "epoch": 0.058528302829034436, "grad_norm": 0.09573563188314438, "learning_rate": 2.8314851435580366e-05, "loss": 10.45, "step": 11720 }, { "epoch": 0.058578241653973885, "grad_norm": 0.09022394567728043, "learning_rate": 2.8313349520638816e-05, "loss": 10.4487, "step": 11730 }, { "epoch": 0.058628180478913335, "grad_norm": 0.09762822836637497, "learning_rate": 2.8311847605697266e-05, "loss": 10.451, "step": 11740 }, { "epoch": 0.05867811930385278, "grad_norm": 0.10274951159954071, "learning_rate": 2.8310345690755716e-05, "loss": 10.4502, "step": 11750 }, { "epoch": 0.05872805812879223, "grad_norm": 0.09342820197343826, "learning_rate": 2.8308843775814163e-05, "loss": 10.4512, "step": 11760 }, { "epoch": 0.058777996953731676, "grad_norm": 0.09088432788848877, "learning_rate": 2.8307341860872613e-05, "loss": 10.4489, "step": 11770 }, { "epoch": 0.058827935778671125, "grad_norm": 0.09308407455682755, "learning_rate": 2.8305839945931063e-05, "loss": 10.4484, "step": 11780 }, { "epoch": 0.058877874603610575, "grad_norm": 0.09679020196199417, "learning_rate": 2.8304338030989514e-05, "loss": 10.4507, "step": 11790 }, { "epoch": 0.058927813428550024, "grad_norm": 0.09807083010673523, "learning_rate": 2.8302836116047964e-05, "loss": 10.4473, "step": 11800 }, { "epoch": 0.058977752253489474, "grad_norm": 0.09622442722320557, "learning_rate": 2.830133420110641e-05, "loss": 10.4472, "step": 11810 }, { "epoch": 0.05902769107842892, "grad_norm": 0.09451454877853394, "learning_rate": 2.829983228616486e-05, "loss": 10.4465, "step": 11820 }, { "epoch": 0.05907762990336837, "grad_norm": 0.10234472155570984, "learning_rate": 2.829833037122331e-05, "loss": 10.4481, "step": 11830 }, { "epoch": 0.05912756872830782, "grad_norm": 0.09240438789129257, "learning_rate": 2.829682845628176e-05, "loss": 10.4469, "step": 11840 }, { "epoch": 0.05917750755324727, "grad_norm": 0.09405849874019623, "learning_rate": 2.829532654134021e-05, "loss": 10.4481, "step": 11850 }, { "epoch": 0.05922744637818672, "grad_norm": 0.08946629613637924, "learning_rate": 2.8293824626398658e-05, "loss": 10.4498, "step": 11860 }, { "epoch": 0.05927738520312617, "grad_norm": 0.09469353407621384, "learning_rate": 2.8292322711457108e-05, "loss": 10.4447, "step": 11870 }, { "epoch": 0.05932732402806562, "grad_norm": 0.09458359330892563, "learning_rate": 2.829082079651556e-05, "loss": 10.4443, "step": 11880 }, { "epoch": 0.05937726285300507, "grad_norm": 0.08880256861448288, "learning_rate": 2.828931888157401e-05, "loss": 10.4459, "step": 11890 }, { "epoch": 0.05942720167794452, "grad_norm": 0.09645318239927292, "learning_rate": 2.828781696663246e-05, "loss": 10.4466, "step": 11900 }, { "epoch": 0.05947714050288397, "grad_norm": 0.0943111777305603, "learning_rate": 2.8286315051690905e-05, "loss": 10.4462, "step": 11910 }, { "epoch": 0.05952707932782342, "grad_norm": 0.09864623099565506, "learning_rate": 2.8284813136749356e-05, "loss": 10.4449, "step": 11920 }, { "epoch": 0.05957701815276287, "grad_norm": 0.09035992622375488, "learning_rate": 2.8283311221807806e-05, "loss": 10.4435, "step": 11930 }, { "epoch": 0.05962695697770232, "grad_norm": 0.09477786719799042, "learning_rate": 2.8281809306866256e-05, "loss": 10.4472, "step": 11940 }, { "epoch": 0.059676895802641766, "grad_norm": 0.09425050765275955, "learning_rate": 2.8280307391924706e-05, "loss": 10.4472, "step": 11950 }, { "epoch": 0.059726834627581216, "grad_norm": 0.0916745588183403, "learning_rate": 2.8278805476983153e-05, "loss": 10.4452, "step": 11960 }, { "epoch": 0.059776773452520665, "grad_norm": 0.09402048587799072, "learning_rate": 2.8277303562041603e-05, "loss": 10.4401, "step": 11970 }, { "epoch": 0.059826712277460115, "grad_norm": 0.09692014008760452, "learning_rate": 2.8275801647100053e-05, "loss": 10.444, "step": 11980 }, { "epoch": 0.05987665110239956, "grad_norm": 0.09546181559562683, "learning_rate": 2.8274299732158504e-05, "loss": 10.4423, "step": 11990 }, { "epoch": 0.059926589927339007, "grad_norm": 0.09214867651462555, "learning_rate": 2.8272797817216954e-05, "loss": 10.4401, "step": 12000 }, { "epoch": 0.059976528752278456, "grad_norm": 0.09537333995103836, "learning_rate": 2.82712959022754e-05, "loss": 10.4422, "step": 12010 }, { "epoch": 0.060026467577217905, "grad_norm": 0.0944821685552597, "learning_rate": 2.826979398733385e-05, "loss": 10.4448, "step": 12020 }, { "epoch": 0.060076406402157355, "grad_norm": 0.08920750021934509, "learning_rate": 2.82682920723923e-05, "loss": 10.4432, "step": 12030 }, { "epoch": 0.060126345227096804, "grad_norm": 0.09650328755378723, "learning_rate": 2.826679015745075e-05, "loss": 10.4419, "step": 12040 }, { "epoch": 0.060176284052036254, "grad_norm": 0.09876326471567154, "learning_rate": 2.82652882425092e-05, "loss": 10.439, "step": 12050 }, { "epoch": 0.0602262228769757, "grad_norm": 0.09975486248731613, "learning_rate": 2.826378632756765e-05, "loss": 10.439, "step": 12060 }, { "epoch": 0.06027616170191515, "grad_norm": 0.09765785932540894, "learning_rate": 2.8262284412626098e-05, "loss": 10.4414, "step": 12070 }, { "epoch": 0.0603261005268546, "grad_norm": 0.09784495830535889, "learning_rate": 2.826078249768455e-05, "loss": 10.4388, "step": 12080 }, { "epoch": 0.06037603935179405, "grad_norm": 0.09981703758239746, "learning_rate": 2.8259280582743e-05, "loss": 10.4412, "step": 12090 }, { "epoch": 0.0604259781767335, "grad_norm": 0.09435340017080307, "learning_rate": 2.825777866780145e-05, "loss": 10.439, "step": 12100 }, { "epoch": 0.06047591700167295, "grad_norm": 0.10208732634782791, "learning_rate": 2.82562767528599e-05, "loss": 10.4376, "step": 12110 }, { "epoch": 0.0605258558266124, "grad_norm": 0.09435919672250748, "learning_rate": 2.8254774837918346e-05, "loss": 10.44, "step": 12120 }, { "epoch": 0.06057579465155185, "grad_norm": 0.09108801931142807, "learning_rate": 2.8253272922976796e-05, "loss": 10.4375, "step": 12130 }, { "epoch": 0.0606257334764913, "grad_norm": 0.0965215414762497, "learning_rate": 2.8251771008035246e-05, "loss": 10.439, "step": 12140 }, { "epoch": 0.06067567230143075, "grad_norm": 0.09519945085048676, "learning_rate": 2.8250269093093696e-05, "loss": 10.4363, "step": 12150 }, { "epoch": 0.0607256111263702, "grad_norm": 0.09803465753793716, "learning_rate": 2.8248767178152146e-05, "loss": 10.4383, "step": 12160 }, { "epoch": 0.06077554995130965, "grad_norm": 0.09530342370271683, "learning_rate": 2.8247265263210593e-05, "loss": 10.438, "step": 12170 }, { "epoch": 0.0608254887762491, "grad_norm": 0.09839750826358795, "learning_rate": 2.8245763348269043e-05, "loss": 10.4355, "step": 12180 }, { "epoch": 0.060875427601188546, "grad_norm": 0.09567271173000336, "learning_rate": 2.8244261433327494e-05, "loss": 10.4368, "step": 12190 }, { "epoch": 0.060925366426127996, "grad_norm": 0.09974398463964462, "learning_rate": 2.8242759518385944e-05, "loss": 10.4357, "step": 12200 }, { "epoch": 0.060975305251067445, "grad_norm": 0.09306304156780243, "learning_rate": 2.8241257603444394e-05, "loss": 10.4359, "step": 12210 }, { "epoch": 0.061025244076006895, "grad_norm": 0.09435778856277466, "learning_rate": 2.823975568850284e-05, "loss": 10.4371, "step": 12220 }, { "epoch": 0.061075182900946344, "grad_norm": 0.10123757272958755, "learning_rate": 2.823825377356129e-05, "loss": 10.4379, "step": 12230 }, { "epoch": 0.061125121725885787, "grad_norm": 0.09351859986782074, "learning_rate": 2.823675185861974e-05, "loss": 10.434, "step": 12240 }, { "epoch": 0.061175060550825236, "grad_norm": 0.09679336100816727, "learning_rate": 2.823524994367819e-05, "loss": 10.4362, "step": 12250 }, { "epoch": 0.061224999375764685, "grad_norm": 0.09072909504175186, "learning_rate": 2.823374802873664e-05, "loss": 10.4377, "step": 12260 }, { "epoch": 0.061274938200704135, "grad_norm": 0.08954602479934692, "learning_rate": 2.8232246113795088e-05, "loss": 10.4366, "step": 12270 }, { "epoch": 0.061324877025643584, "grad_norm": 0.0955163910984993, "learning_rate": 2.823074419885354e-05, "loss": 10.4351, "step": 12280 }, { "epoch": 0.061374815850583034, "grad_norm": 0.09503049403429031, "learning_rate": 2.822924228391199e-05, "loss": 10.4351, "step": 12290 }, { "epoch": 0.06142475467552248, "grad_norm": 0.10008996725082397, "learning_rate": 2.822774036897044e-05, "loss": 10.4331, "step": 12300 }, { "epoch": 0.06147469350046193, "grad_norm": 0.1014408990740776, "learning_rate": 2.822623845402889e-05, "loss": 10.4347, "step": 12310 }, { "epoch": 0.06152463232540138, "grad_norm": 0.09109607338905334, "learning_rate": 2.8224736539087336e-05, "loss": 10.4333, "step": 12320 }, { "epoch": 0.06157457115034083, "grad_norm": 0.09781739115715027, "learning_rate": 2.8223234624145786e-05, "loss": 10.4332, "step": 12330 }, { "epoch": 0.06162450997528028, "grad_norm": 0.09548245370388031, "learning_rate": 2.8221732709204236e-05, "loss": 10.4314, "step": 12340 }, { "epoch": 0.06167444880021973, "grad_norm": 0.09480303525924683, "learning_rate": 2.8220230794262686e-05, "loss": 10.43, "step": 12350 }, { "epoch": 0.06172438762515918, "grad_norm": 0.09702970832586288, "learning_rate": 2.8218728879321136e-05, "loss": 10.433, "step": 12360 }, { "epoch": 0.06177432645009863, "grad_norm": 0.09529626369476318, "learning_rate": 2.8217226964379583e-05, "loss": 10.4315, "step": 12370 }, { "epoch": 0.06182426527503808, "grad_norm": 0.09766676276922226, "learning_rate": 2.8215725049438033e-05, "loss": 10.4315, "step": 12380 }, { "epoch": 0.06187420409997753, "grad_norm": 0.09520022571086884, "learning_rate": 2.8214223134496484e-05, "loss": 10.4313, "step": 12390 }, { "epoch": 0.06192414292491698, "grad_norm": 0.09235453605651855, "learning_rate": 2.8212721219554934e-05, "loss": 10.4308, "step": 12400 }, { "epoch": 0.06197408174985643, "grad_norm": 0.09697452187538147, "learning_rate": 2.8211219304613384e-05, "loss": 10.4296, "step": 12410 }, { "epoch": 0.06202402057479588, "grad_norm": 0.09309589117765427, "learning_rate": 2.820971738967183e-05, "loss": 10.4306, "step": 12420 }, { "epoch": 0.062073959399735326, "grad_norm": 0.09192889928817749, "learning_rate": 2.8208215474730284e-05, "loss": 10.4305, "step": 12430 }, { "epoch": 0.062123898224674776, "grad_norm": 0.09932176768779755, "learning_rate": 2.820671355978873e-05, "loss": 10.4302, "step": 12440 }, { "epoch": 0.062173837049614225, "grad_norm": 0.09811878204345703, "learning_rate": 2.820521164484718e-05, "loss": 10.429, "step": 12450 }, { "epoch": 0.062223775874553675, "grad_norm": 0.09283413738012314, "learning_rate": 2.820370972990563e-05, "loss": 10.4304, "step": 12460 }, { "epoch": 0.062273714699493124, "grad_norm": 0.09722374379634857, "learning_rate": 2.8202207814964078e-05, "loss": 10.429, "step": 12470 }, { "epoch": 0.06232365352443257, "grad_norm": 0.09965924918651581, "learning_rate": 2.8200705900022532e-05, "loss": 10.4291, "step": 12480 }, { "epoch": 0.062373592349372016, "grad_norm": 0.09636944532394409, "learning_rate": 2.819920398508098e-05, "loss": 10.4304, "step": 12490 }, { "epoch": 0.062423531174311465, "grad_norm": 0.09442856162786484, "learning_rate": 2.819770207013943e-05, "loss": 10.4282, "step": 12500 }, { "epoch": 0.062473469999250915, "grad_norm": 0.09579756110906601, "learning_rate": 2.819620015519788e-05, "loss": 10.4286, "step": 12510 }, { "epoch": 0.06252340882419037, "grad_norm": 0.09269638359546661, "learning_rate": 2.8194698240256326e-05, "loss": 10.4284, "step": 12520 }, { "epoch": 0.06257334764912982, "grad_norm": 0.0913536548614502, "learning_rate": 2.819319632531478e-05, "loss": 10.4262, "step": 12530 }, { "epoch": 0.06262328647406927, "grad_norm": 0.09674961864948273, "learning_rate": 2.8191694410373226e-05, "loss": 10.4301, "step": 12540 }, { "epoch": 0.06267322529900872, "grad_norm": 0.09084776788949966, "learning_rate": 2.8190192495431676e-05, "loss": 10.4284, "step": 12550 }, { "epoch": 0.06272316412394817, "grad_norm": 0.09603569656610489, "learning_rate": 2.8188690580490126e-05, "loss": 10.4262, "step": 12560 }, { "epoch": 0.06277310294888762, "grad_norm": 0.100166916847229, "learning_rate": 2.8187188665548573e-05, "loss": 10.4253, "step": 12570 }, { "epoch": 0.06282304177382707, "grad_norm": 0.09294571727514267, "learning_rate": 2.8185686750607027e-05, "loss": 10.4256, "step": 12580 }, { "epoch": 0.06287298059876652, "grad_norm": 0.09238424152135849, "learning_rate": 2.8184184835665474e-05, "loss": 10.427, "step": 12590 }, { "epoch": 0.06292291942370597, "grad_norm": 0.09537918120622635, "learning_rate": 2.8182682920723924e-05, "loss": 10.425, "step": 12600 }, { "epoch": 0.0629728582486454, "grad_norm": 0.09167756885290146, "learning_rate": 2.8181181005782374e-05, "loss": 10.4268, "step": 12610 }, { "epoch": 0.06302279707358485, "grad_norm": 0.09255441278219223, "learning_rate": 2.817967909084082e-05, "loss": 10.4283, "step": 12620 }, { "epoch": 0.0630727358985243, "grad_norm": 0.09267347306013107, "learning_rate": 2.8178177175899274e-05, "loss": 10.4236, "step": 12630 }, { "epoch": 0.06312267472346375, "grad_norm": 0.09058328717947006, "learning_rate": 2.817667526095772e-05, "loss": 10.4257, "step": 12640 }, { "epoch": 0.0631726135484032, "grad_norm": 0.09093445539474487, "learning_rate": 2.817517334601617e-05, "loss": 10.4247, "step": 12650 }, { "epoch": 0.06322255237334265, "grad_norm": 0.09584364295005798, "learning_rate": 2.817367143107462e-05, "loss": 10.4212, "step": 12660 }, { "epoch": 0.0632724911982821, "grad_norm": 0.09648899734020233, "learning_rate": 2.8172169516133068e-05, "loss": 10.4236, "step": 12670 }, { "epoch": 0.06332243002322155, "grad_norm": 0.10392139106988907, "learning_rate": 2.8170667601191522e-05, "loss": 10.4231, "step": 12680 }, { "epoch": 0.063372368848161, "grad_norm": 0.09030050039291382, "learning_rate": 2.816916568624997e-05, "loss": 10.424, "step": 12690 }, { "epoch": 0.06342230767310045, "grad_norm": 0.0974743589758873, "learning_rate": 2.816766377130842e-05, "loss": 10.4226, "step": 12700 }, { "epoch": 0.0634722464980399, "grad_norm": 0.09758103638887405, "learning_rate": 2.816616185636687e-05, "loss": 10.4236, "step": 12710 }, { "epoch": 0.06352218532297935, "grad_norm": 0.09443262219429016, "learning_rate": 2.8164659941425316e-05, "loss": 10.4204, "step": 12720 }, { "epoch": 0.0635721241479188, "grad_norm": 0.09165245294570923, "learning_rate": 2.816315802648377e-05, "loss": 10.4208, "step": 12730 }, { "epoch": 0.06362206297285825, "grad_norm": 0.09566860646009445, "learning_rate": 2.8161656111542216e-05, "loss": 10.4215, "step": 12740 }, { "epoch": 0.0636720017977977, "grad_norm": 0.0996147021651268, "learning_rate": 2.816015419660067e-05, "loss": 10.423, "step": 12750 }, { "epoch": 0.06372194062273714, "grad_norm": 0.09040533006191254, "learning_rate": 2.8158652281659116e-05, "loss": 10.422, "step": 12760 }, { "epoch": 0.0637718794476766, "grad_norm": 0.09722060710191727, "learning_rate": 2.8157150366717563e-05, "loss": 10.4204, "step": 12770 }, { "epoch": 0.06382181827261604, "grad_norm": 0.09629692882299423, "learning_rate": 2.8155648451776017e-05, "loss": 10.4208, "step": 12780 }, { "epoch": 0.06387175709755549, "grad_norm": 0.08845196664333344, "learning_rate": 2.8154146536834464e-05, "loss": 10.4187, "step": 12790 }, { "epoch": 0.06392169592249494, "grad_norm": 0.0945558026432991, "learning_rate": 2.8152644621892917e-05, "loss": 10.4237, "step": 12800 }, { "epoch": 0.06397163474743439, "grad_norm": 0.09783702343702316, "learning_rate": 2.8151142706951364e-05, "loss": 10.4185, "step": 12810 }, { "epoch": 0.06402157357237384, "grad_norm": 0.09721019119024277, "learning_rate": 2.814964079200981e-05, "loss": 10.4202, "step": 12820 }, { "epoch": 0.06407151239731329, "grad_norm": 0.09728706628084183, "learning_rate": 2.8148138877068264e-05, "loss": 10.4211, "step": 12830 }, { "epoch": 0.06412145122225274, "grad_norm": 0.0896829217672348, "learning_rate": 2.814663696212671e-05, "loss": 10.4196, "step": 12840 }, { "epoch": 0.06417139004719219, "grad_norm": 0.09445061534643173, "learning_rate": 2.8145135047185165e-05, "loss": 10.4193, "step": 12850 }, { "epoch": 0.06422132887213164, "grad_norm": 0.0961589440703392, "learning_rate": 2.814363313224361e-05, "loss": 10.4175, "step": 12860 }, { "epoch": 0.06427126769707109, "grad_norm": 0.09850755333900452, "learning_rate": 2.8142131217302058e-05, "loss": 10.4181, "step": 12870 }, { "epoch": 0.06432120652201054, "grad_norm": 0.093666210770607, "learning_rate": 2.8140629302360512e-05, "loss": 10.419, "step": 12880 }, { "epoch": 0.06437114534694999, "grad_norm": 0.09768545627593994, "learning_rate": 2.813912738741896e-05, "loss": 10.417, "step": 12890 }, { "epoch": 0.06442108417188944, "grad_norm": 0.0899498239159584, "learning_rate": 2.8137625472477412e-05, "loss": 10.4175, "step": 12900 }, { "epoch": 0.06447102299682889, "grad_norm": 0.09297134727239609, "learning_rate": 2.813612355753586e-05, "loss": 10.418, "step": 12910 }, { "epoch": 0.06452096182176834, "grad_norm": 0.09266901016235352, "learning_rate": 2.8134621642594306e-05, "loss": 10.4167, "step": 12920 }, { "epoch": 0.06457090064670779, "grad_norm": 0.09560717642307281, "learning_rate": 2.813311972765276e-05, "loss": 10.4158, "step": 12930 }, { "epoch": 0.06462083947164723, "grad_norm": 0.09199609607458115, "learning_rate": 2.8131617812711206e-05, "loss": 10.4151, "step": 12940 }, { "epoch": 0.06467077829658668, "grad_norm": 0.08880805224180222, "learning_rate": 2.813011589776966e-05, "loss": 10.4152, "step": 12950 }, { "epoch": 0.06472071712152613, "grad_norm": 0.09574782103300095, "learning_rate": 2.8128613982828106e-05, "loss": 10.4178, "step": 12960 }, { "epoch": 0.06477065594646558, "grad_norm": 0.09296387434005737, "learning_rate": 2.8127112067886553e-05, "loss": 10.415, "step": 12970 }, { "epoch": 0.06482059477140503, "grad_norm": 0.09417938441038132, "learning_rate": 2.8125610152945007e-05, "loss": 10.4148, "step": 12980 }, { "epoch": 0.06487053359634448, "grad_norm": 0.09101897478103638, "learning_rate": 2.8124108238003454e-05, "loss": 10.4149, "step": 12990 }, { "epoch": 0.06492047242128393, "grad_norm": 0.09890080988407135, "learning_rate": 2.8122606323061907e-05, "loss": 10.4157, "step": 13000 }, { "epoch": 0.06497041124622338, "grad_norm": 0.09360602498054504, "learning_rate": 2.8121104408120354e-05, "loss": 10.4141, "step": 13010 }, { "epoch": 0.06502035007116283, "grad_norm": 0.10021226108074188, "learning_rate": 2.81196024931788e-05, "loss": 10.4117, "step": 13020 }, { "epoch": 0.06507028889610228, "grad_norm": 0.09726858139038086, "learning_rate": 2.8118100578237254e-05, "loss": 10.4117, "step": 13030 }, { "epoch": 0.06512022772104173, "grad_norm": 0.09342258423566818, "learning_rate": 2.81165986632957e-05, "loss": 10.4155, "step": 13040 }, { "epoch": 0.06517016654598118, "grad_norm": 0.09800725430250168, "learning_rate": 2.8115096748354155e-05, "loss": 10.4146, "step": 13050 }, { "epoch": 0.06522010537092063, "grad_norm": 0.09531977027654648, "learning_rate": 2.81135948334126e-05, "loss": 10.4122, "step": 13060 }, { "epoch": 0.06527004419586008, "grad_norm": 0.09499124437570572, "learning_rate": 2.811209291847105e-05, "loss": 10.4126, "step": 13070 }, { "epoch": 0.06531998302079953, "grad_norm": 0.09590071439743042, "learning_rate": 2.8110591003529502e-05, "loss": 10.4127, "step": 13080 }, { "epoch": 0.06536992184573898, "grad_norm": 0.09835195541381836, "learning_rate": 2.810908908858795e-05, "loss": 10.4109, "step": 13090 }, { "epoch": 0.06541986067067843, "grad_norm": 0.09182047098875046, "learning_rate": 2.8107587173646402e-05, "loss": 10.4132, "step": 13100 }, { "epoch": 0.06546979949561786, "grad_norm": 0.09572348743677139, "learning_rate": 2.810608525870485e-05, "loss": 10.4111, "step": 13110 }, { "epoch": 0.06551973832055731, "grad_norm": 0.09192756563425064, "learning_rate": 2.81045833437633e-05, "loss": 10.4094, "step": 13120 }, { "epoch": 0.06556967714549676, "grad_norm": 0.09450101852416992, "learning_rate": 2.810308142882175e-05, "loss": 10.4124, "step": 13130 }, { "epoch": 0.06561961597043621, "grad_norm": 0.09333983063697815, "learning_rate": 2.8101579513880196e-05, "loss": 10.4101, "step": 13140 }, { "epoch": 0.06566955479537566, "grad_norm": 0.09769093245267868, "learning_rate": 2.810007759893865e-05, "loss": 10.4109, "step": 13150 }, { "epoch": 0.06571949362031511, "grad_norm": 0.09517354518175125, "learning_rate": 2.8098575683997096e-05, "loss": 10.4101, "step": 13160 }, { "epoch": 0.06576943244525456, "grad_norm": 0.09823792427778244, "learning_rate": 2.8097073769055547e-05, "loss": 10.4095, "step": 13170 }, { "epoch": 0.06581937127019401, "grad_norm": 0.09689707309007645, "learning_rate": 2.8095571854113997e-05, "loss": 10.4104, "step": 13180 }, { "epoch": 0.06586931009513346, "grad_norm": 0.09225734323263168, "learning_rate": 2.8094069939172444e-05, "loss": 10.4067, "step": 13190 }, { "epoch": 0.0659192489200729, "grad_norm": 0.10383229702711105, "learning_rate": 2.8092568024230897e-05, "loss": 10.41, "step": 13200 }, { "epoch": 0.06596918774501236, "grad_norm": 0.09270164370536804, "learning_rate": 2.8091066109289344e-05, "loss": 10.4071, "step": 13210 }, { "epoch": 0.0660191265699518, "grad_norm": 0.09225078672170639, "learning_rate": 2.8089564194347794e-05, "loss": 10.4071, "step": 13220 }, { "epoch": 0.06606906539489125, "grad_norm": 0.09302686154842377, "learning_rate": 2.8088062279406244e-05, "loss": 10.4091, "step": 13230 }, { "epoch": 0.0661190042198307, "grad_norm": 0.09300468862056732, "learning_rate": 2.808656036446469e-05, "loss": 10.4079, "step": 13240 }, { "epoch": 0.06616894304477015, "grad_norm": 0.10051462054252625, "learning_rate": 2.8085058449523145e-05, "loss": 10.4058, "step": 13250 }, { "epoch": 0.0662188818697096, "grad_norm": 0.10076721012592316, "learning_rate": 2.808355653458159e-05, "loss": 10.4052, "step": 13260 }, { "epoch": 0.06626882069464905, "grad_norm": 0.09321360290050507, "learning_rate": 2.808205461964004e-05, "loss": 10.4091, "step": 13270 }, { "epoch": 0.0663187595195885, "grad_norm": 0.09066988527774811, "learning_rate": 2.8080552704698492e-05, "loss": 10.4054, "step": 13280 }, { "epoch": 0.06636869834452795, "grad_norm": 0.09279420971870422, "learning_rate": 2.807905078975694e-05, "loss": 10.4051, "step": 13290 }, { "epoch": 0.0664186371694674, "grad_norm": 0.09202752262353897, "learning_rate": 2.8077548874815392e-05, "loss": 10.4091, "step": 13300 }, { "epoch": 0.06646857599440685, "grad_norm": 0.09188825637102127, "learning_rate": 2.807604695987384e-05, "loss": 10.4076, "step": 13310 }, { "epoch": 0.0665185148193463, "grad_norm": 0.0955401360988617, "learning_rate": 2.807454504493229e-05, "loss": 10.406, "step": 13320 }, { "epoch": 0.06656845364428575, "grad_norm": 0.09799564629793167, "learning_rate": 2.807304312999074e-05, "loss": 10.4049, "step": 13330 }, { "epoch": 0.0666183924692252, "grad_norm": 0.09366542100906372, "learning_rate": 2.8071541215049186e-05, "loss": 10.4073, "step": 13340 }, { "epoch": 0.06666833129416465, "grad_norm": 0.08726493269205093, "learning_rate": 2.807003930010764e-05, "loss": 10.4078, "step": 13350 }, { "epoch": 0.0667182701191041, "grad_norm": 0.09408682584762573, "learning_rate": 2.8068537385166086e-05, "loss": 10.4018, "step": 13360 }, { "epoch": 0.06676820894404355, "grad_norm": 0.1020817831158638, "learning_rate": 2.8067035470224537e-05, "loss": 10.4036, "step": 13370 }, { "epoch": 0.066818147768983, "grad_norm": 0.0937168076634407, "learning_rate": 2.8065533555282987e-05, "loss": 10.405, "step": 13380 }, { "epoch": 0.06686808659392245, "grad_norm": 0.08735665678977966, "learning_rate": 2.8064031640341437e-05, "loss": 10.4038, "step": 13390 }, { "epoch": 0.0669180254188619, "grad_norm": 0.09232959151268005, "learning_rate": 2.8062529725399887e-05, "loss": 10.4007, "step": 13400 }, { "epoch": 0.06696796424380135, "grad_norm": 0.09150397777557373, "learning_rate": 2.8061027810458334e-05, "loss": 10.4059, "step": 13410 }, { "epoch": 0.0670179030687408, "grad_norm": 0.09197428822517395, "learning_rate": 2.8059525895516784e-05, "loss": 10.4031, "step": 13420 }, { "epoch": 0.06706784189368024, "grad_norm": 0.09242592751979828, "learning_rate": 2.8058023980575234e-05, "loss": 10.402, "step": 13430 }, { "epoch": 0.0671177807186197, "grad_norm": 0.09760529547929764, "learning_rate": 2.8056522065633685e-05, "loss": 10.402, "step": 13440 }, { "epoch": 0.06716771954355914, "grad_norm": 0.09522455185651779, "learning_rate": 2.8055020150692135e-05, "loss": 10.4012, "step": 13450 }, { "epoch": 0.06721765836849859, "grad_norm": 0.093946672976017, "learning_rate": 2.805351823575058e-05, "loss": 10.4044, "step": 13460 }, { "epoch": 0.06726759719343804, "grad_norm": 0.09539136290550232, "learning_rate": 2.805201632080903e-05, "loss": 10.4038, "step": 13470 }, { "epoch": 0.06731753601837749, "grad_norm": 0.10141439735889435, "learning_rate": 2.8050514405867482e-05, "loss": 10.4001, "step": 13480 }, { "epoch": 0.06736747484331694, "grad_norm": 0.094613216817379, "learning_rate": 2.8049012490925932e-05, "loss": 10.4008, "step": 13490 }, { "epoch": 0.06741741366825639, "grad_norm": 0.09187793731689453, "learning_rate": 2.8047510575984382e-05, "loss": 10.4017, "step": 13500 }, { "epoch": 0.06746735249319584, "grad_norm": 0.10149643570184708, "learning_rate": 2.804600866104283e-05, "loss": 10.4028, "step": 13510 }, { "epoch": 0.06751729131813529, "grad_norm": 0.09373277425765991, "learning_rate": 2.804450674610128e-05, "loss": 10.3996, "step": 13520 }, { "epoch": 0.06756723014307474, "grad_norm": 0.09465789794921875, "learning_rate": 2.804300483115973e-05, "loss": 10.3994, "step": 13530 }, { "epoch": 0.06761716896801419, "grad_norm": 0.08999823033809662, "learning_rate": 2.804150291621818e-05, "loss": 10.4001, "step": 13540 }, { "epoch": 0.06766710779295364, "grad_norm": 0.09075751155614853, "learning_rate": 2.804000100127663e-05, "loss": 10.4001, "step": 13550 }, { "epoch": 0.06771704661789309, "grad_norm": 0.09290343523025513, "learning_rate": 2.8038499086335076e-05, "loss": 10.3973, "step": 13560 }, { "epoch": 0.06776698544283254, "grad_norm": 0.09069476276636124, "learning_rate": 2.8036997171393527e-05, "loss": 10.3977, "step": 13570 }, { "epoch": 0.06781692426777199, "grad_norm": 0.09564889967441559, "learning_rate": 2.8035495256451977e-05, "loss": 10.4, "step": 13580 }, { "epoch": 0.06786686309271144, "grad_norm": 0.09241501986980438, "learning_rate": 2.8033993341510427e-05, "loss": 10.3973, "step": 13590 }, { "epoch": 0.06791680191765087, "grad_norm": 0.09421627223491669, "learning_rate": 2.8032491426568877e-05, "loss": 10.3985, "step": 13600 }, { "epoch": 0.06796674074259032, "grad_norm": 0.0919337123632431, "learning_rate": 2.8030989511627324e-05, "loss": 10.3968, "step": 13610 }, { "epoch": 0.06801667956752977, "grad_norm": 0.09475470334291458, "learning_rate": 2.8029487596685774e-05, "loss": 10.3991, "step": 13620 }, { "epoch": 0.06806661839246922, "grad_norm": 0.09623359888792038, "learning_rate": 2.8027985681744224e-05, "loss": 10.3965, "step": 13630 }, { "epoch": 0.06811655721740867, "grad_norm": 0.09497568756341934, "learning_rate": 2.8026483766802675e-05, "loss": 10.3988, "step": 13640 }, { "epoch": 0.06816649604234812, "grad_norm": 0.09501062333583832, "learning_rate": 2.8024981851861125e-05, "loss": 10.3959, "step": 13650 }, { "epoch": 0.06821643486728757, "grad_norm": 0.09278145432472229, "learning_rate": 2.802347993691957e-05, "loss": 10.3948, "step": 13660 }, { "epoch": 0.06826637369222702, "grad_norm": 0.09373094141483307, "learning_rate": 2.802197802197802e-05, "loss": 10.395, "step": 13670 }, { "epoch": 0.06831631251716647, "grad_norm": 0.09346432983875275, "learning_rate": 2.8020476107036472e-05, "loss": 10.396, "step": 13680 }, { "epoch": 0.06836625134210592, "grad_norm": 0.09329260885715485, "learning_rate": 2.8018974192094922e-05, "loss": 10.3962, "step": 13690 }, { "epoch": 0.06841619016704537, "grad_norm": 0.09555350244045258, "learning_rate": 2.8017472277153372e-05, "loss": 10.3954, "step": 13700 }, { "epoch": 0.06846612899198481, "grad_norm": 0.09405256807804108, "learning_rate": 2.8015970362211822e-05, "loss": 10.3936, "step": 13710 }, { "epoch": 0.06851606781692426, "grad_norm": 0.09874436259269714, "learning_rate": 2.801446844727027e-05, "loss": 10.3941, "step": 13720 }, { "epoch": 0.06856600664186371, "grad_norm": 0.0999235138297081, "learning_rate": 2.801296653232872e-05, "loss": 10.3911, "step": 13730 }, { "epoch": 0.06861594546680316, "grad_norm": 0.09541259706020355, "learning_rate": 2.801146461738717e-05, "loss": 10.3937, "step": 13740 }, { "epoch": 0.06866588429174261, "grad_norm": 0.09852302819490433, "learning_rate": 2.800996270244562e-05, "loss": 10.3953, "step": 13750 }, { "epoch": 0.06871582311668206, "grad_norm": 0.09277693927288055, "learning_rate": 2.800846078750407e-05, "loss": 10.3929, "step": 13760 }, { "epoch": 0.06876576194162151, "grad_norm": 0.09327711910009384, "learning_rate": 2.8006958872562517e-05, "loss": 10.3907, "step": 13770 }, { "epoch": 0.06881570076656096, "grad_norm": 0.08925262838602066, "learning_rate": 2.8005456957620967e-05, "loss": 10.3949, "step": 13780 }, { "epoch": 0.06886563959150041, "grad_norm": 0.0973198264837265, "learning_rate": 2.8003955042679417e-05, "loss": 10.3892, "step": 13790 }, { "epoch": 0.06891557841643986, "grad_norm": 0.09705457836389542, "learning_rate": 2.8002453127737867e-05, "loss": 10.3911, "step": 13800 }, { "epoch": 0.06896551724137931, "grad_norm": 0.09261533617973328, "learning_rate": 2.8000951212796317e-05, "loss": 10.3915, "step": 13810 }, { "epoch": 0.06901545606631876, "grad_norm": 0.09760327637195587, "learning_rate": 2.7999449297854764e-05, "loss": 10.3938, "step": 13820 }, { "epoch": 0.06906539489125821, "grad_norm": 0.09349595010280609, "learning_rate": 2.7997947382913214e-05, "loss": 10.3904, "step": 13830 }, { "epoch": 0.06911533371619766, "grad_norm": 0.0936850905418396, "learning_rate": 2.7996445467971665e-05, "loss": 10.3909, "step": 13840 }, { "epoch": 0.06916527254113711, "grad_norm": 0.09198077023029327, "learning_rate": 2.7994943553030115e-05, "loss": 10.3907, "step": 13850 }, { "epoch": 0.06921521136607656, "grad_norm": 0.09293072670698166, "learning_rate": 2.7993441638088565e-05, "loss": 10.3906, "step": 13860 }, { "epoch": 0.069265150191016, "grad_norm": 0.10718231648206711, "learning_rate": 2.7991939723147012e-05, "loss": 10.388, "step": 13870 }, { "epoch": 0.06931508901595546, "grad_norm": 0.09268306940793991, "learning_rate": 2.7990437808205462e-05, "loss": 10.3907, "step": 13880 }, { "epoch": 0.0693650278408949, "grad_norm": 0.09039410948753357, "learning_rate": 2.7988935893263912e-05, "loss": 10.3901, "step": 13890 }, { "epoch": 0.06941496666583435, "grad_norm": 0.09920943528413773, "learning_rate": 2.7987433978322362e-05, "loss": 10.3878, "step": 13900 }, { "epoch": 0.0694649054907738, "grad_norm": 0.09512588381767273, "learning_rate": 2.7985932063380812e-05, "loss": 10.3883, "step": 13910 }, { "epoch": 0.06951484431571325, "grad_norm": 0.0949581190943718, "learning_rate": 2.798443014843926e-05, "loss": 10.388, "step": 13920 }, { "epoch": 0.0695647831406527, "grad_norm": 0.09540549665689468, "learning_rate": 2.798292823349771e-05, "loss": 10.3886, "step": 13930 }, { "epoch": 0.06961472196559215, "grad_norm": 0.10153383016586304, "learning_rate": 2.798142631855616e-05, "loss": 10.3875, "step": 13940 }, { "epoch": 0.0696646607905316, "grad_norm": 0.09448625147342682, "learning_rate": 2.797992440361461e-05, "loss": 10.3908, "step": 13950 }, { "epoch": 0.06971459961547105, "grad_norm": 0.09156591445207596, "learning_rate": 2.797842248867306e-05, "loss": 10.3874, "step": 13960 }, { "epoch": 0.0697645384404105, "grad_norm": 0.10002894699573517, "learning_rate": 2.7976920573731507e-05, "loss": 10.3878, "step": 13970 }, { "epoch": 0.06981447726534995, "grad_norm": 0.1005663126707077, "learning_rate": 2.7975418658789957e-05, "loss": 10.3849, "step": 13980 }, { "epoch": 0.0698644160902894, "grad_norm": 0.09588824957609177, "learning_rate": 2.7973916743848407e-05, "loss": 10.3876, "step": 13990 }, { "epoch": 0.06991435491522885, "grad_norm": 0.10445947200059891, "learning_rate": 2.7972414828906857e-05, "loss": 10.3845, "step": 14000 }, { "epoch": 0.0699642937401683, "grad_norm": 0.09666018187999725, "learning_rate": 2.7970912913965307e-05, "loss": 10.3877, "step": 14010 }, { "epoch": 0.07001423256510775, "grad_norm": 0.09730076789855957, "learning_rate": 2.7969410999023754e-05, "loss": 10.3862, "step": 14020 }, { "epoch": 0.0700641713900472, "grad_norm": 0.09373754262924194, "learning_rate": 2.7967909084082208e-05, "loss": 10.3866, "step": 14030 }, { "epoch": 0.07011411021498665, "grad_norm": 0.09593051671981812, "learning_rate": 2.7966407169140655e-05, "loss": 10.3842, "step": 14040 }, { "epoch": 0.0701640490399261, "grad_norm": 0.09878380596637726, "learning_rate": 2.7964905254199105e-05, "loss": 10.3858, "step": 14050 }, { "epoch": 0.07021398786486555, "grad_norm": 0.09511502087116241, "learning_rate": 2.7963403339257555e-05, "loss": 10.3866, "step": 14060 }, { "epoch": 0.070263926689805, "grad_norm": 0.09328655153512955, "learning_rate": 2.7961901424316002e-05, "loss": 10.3844, "step": 14070 }, { "epoch": 0.07031386551474444, "grad_norm": 0.09099029004573822, "learning_rate": 2.7960399509374455e-05, "loss": 10.3844, "step": 14080 }, { "epoch": 0.0703638043396839, "grad_norm": 0.09044034779071808, "learning_rate": 2.7958897594432902e-05, "loss": 10.383, "step": 14090 }, { "epoch": 0.07041374316462333, "grad_norm": 0.09568076580762863, "learning_rate": 2.7957395679491352e-05, "loss": 10.3804, "step": 14100 }, { "epoch": 0.07046368198956278, "grad_norm": 0.09366782754659653, "learning_rate": 2.7955893764549802e-05, "loss": 10.3835, "step": 14110 }, { "epoch": 0.07051362081450223, "grad_norm": 0.0898858904838562, "learning_rate": 2.795439184960825e-05, "loss": 10.3813, "step": 14120 }, { "epoch": 0.07056355963944168, "grad_norm": 0.09581518173217773, "learning_rate": 2.7952889934666703e-05, "loss": 10.3861, "step": 14130 }, { "epoch": 0.07061349846438113, "grad_norm": 0.09709804505109787, "learning_rate": 2.795138801972515e-05, "loss": 10.3827, "step": 14140 }, { "epoch": 0.07066343728932058, "grad_norm": 0.09556079655885696, "learning_rate": 2.79498861047836e-05, "loss": 10.3806, "step": 14150 }, { "epoch": 0.07071337611426003, "grad_norm": 0.09610017389059067, "learning_rate": 2.794838418984205e-05, "loss": 10.3825, "step": 14160 }, { "epoch": 0.07076331493919948, "grad_norm": 0.094112828373909, "learning_rate": 2.7946882274900497e-05, "loss": 10.3794, "step": 14170 }, { "epoch": 0.07081325376413893, "grad_norm": 0.09225358068943024, "learning_rate": 2.794538035995895e-05, "loss": 10.3818, "step": 14180 }, { "epoch": 0.07086319258907837, "grad_norm": 0.09522094577550888, "learning_rate": 2.7943878445017397e-05, "loss": 10.3811, "step": 14190 }, { "epoch": 0.07091313141401782, "grad_norm": 0.09772941470146179, "learning_rate": 2.7942376530075847e-05, "loss": 10.3835, "step": 14200 }, { "epoch": 0.07096307023895727, "grad_norm": 0.09984889626502991, "learning_rate": 2.7940874615134297e-05, "loss": 10.3794, "step": 14210 }, { "epoch": 0.07101300906389672, "grad_norm": 0.09417369961738586, "learning_rate": 2.7939372700192744e-05, "loss": 10.3824, "step": 14220 }, { "epoch": 0.07106294788883617, "grad_norm": 0.09816978126764297, "learning_rate": 2.7937870785251198e-05, "loss": 10.3796, "step": 14230 }, { "epoch": 0.07111288671377562, "grad_norm": 0.10052361339330673, "learning_rate": 2.7936368870309645e-05, "loss": 10.3792, "step": 14240 }, { "epoch": 0.07116282553871507, "grad_norm": 0.09660011529922485, "learning_rate": 2.7934866955368095e-05, "loss": 10.38, "step": 14250 }, { "epoch": 0.07121276436365452, "grad_norm": 0.09375877678394318, "learning_rate": 2.7933365040426545e-05, "loss": 10.3788, "step": 14260 }, { "epoch": 0.07126270318859397, "grad_norm": 0.09189482033252716, "learning_rate": 2.7931863125484992e-05, "loss": 10.3798, "step": 14270 }, { "epoch": 0.07131264201353342, "grad_norm": 0.09980373829603195, "learning_rate": 2.7930361210543445e-05, "loss": 10.381, "step": 14280 }, { "epoch": 0.07136258083847287, "grad_norm": 0.08954178541898727, "learning_rate": 2.7928859295601892e-05, "loss": 10.3781, "step": 14290 }, { "epoch": 0.07141251966341232, "grad_norm": 0.08987992256879807, "learning_rate": 2.7927357380660342e-05, "loss": 10.3801, "step": 14300 }, { "epoch": 0.07146245848835177, "grad_norm": 0.0954456776380539, "learning_rate": 2.7925855465718792e-05, "loss": 10.3775, "step": 14310 }, { "epoch": 0.07151239731329122, "grad_norm": 0.09449037164449692, "learning_rate": 2.792435355077724e-05, "loss": 10.3813, "step": 14320 }, { "epoch": 0.07156233613823067, "grad_norm": 0.09394067525863647, "learning_rate": 2.7922851635835693e-05, "loss": 10.3778, "step": 14330 }, { "epoch": 0.07161227496317012, "grad_norm": 0.0954408049583435, "learning_rate": 2.792134972089414e-05, "loss": 10.3772, "step": 14340 }, { "epoch": 0.07166221378810957, "grad_norm": 0.09376630187034607, "learning_rate": 2.7919847805952593e-05, "loss": 10.3763, "step": 14350 }, { "epoch": 0.07171215261304902, "grad_norm": 0.0952349603176117, "learning_rate": 2.791834589101104e-05, "loss": 10.3765, "step": 14360 }, { "epoch": 0.07176209143798847, "grad_norm": 0.10054927319288254, "learning_rate": 2.7916843976069487e-05, "loss": 10.3729, "step": 14370 }, { "epoch": 0.07181203026292791, "grad_norm": 0.09942678362131119, "learning_rate": 2.791534206112794e-05, "loss": 10.3755, "step": 14380 }, { "epoch": 0.07186196908786736, "grad_norm": 0.09826812148094177, "learning_rate": 2.7913840146186387e-05, "loss": 10.3753, "step": 14390 }, { "epoch": 0.07191190791280681, "grad_norm": 0.09203541278839111, "learning_rate": 2.791233823124484e-05, "loss": 10.3753, "step": 14400 }, { "epoch": 0.07196184673774626, "grad_norm": 0.09987717121839523, "learning_rate": 2.7910836316303287e-05, "loss": 10.3732, "step": 14410 }, { "epoch": 0.07201178556268571, "grad_norm": 0.09702242910861969, "learning_rate": 2.7909334401361734e-05, "loss": 10.3748, "step": 14420 }, { "epoch": 0.07206172438762516, "grad_norm": 0.09924949705600739, "learning_rate": 2.7907832486420188e-05, "loss": 10.3758, "step": 14430 }, { "epoch": 0.07211166321256461, "grad_norm": 0.10308583825826645, "learning_rate": 2.7906330571478635e-05, "loss": 10.3698, "step": 14440 }, { "epoch": 0.07216160203750406, "grad_norm": 0.09504105895757675, "learning_rate": 2.7904828656537088e-05, "loss": 10.3751, "step": 14450 }, { "epoch": 0.07221154086244351, "grad_norm": 0.0910039022564888, "learning_rate": 2.7903326741595535e-05, "loss": 10.3762, "step": 14460 }, { "epoch": 0.07226147968738296, "grad_norm": 0.09338394552469254, "learning_rate": 2.7901824826653982e-05, "loss": 10.3741, "step": 14470 }, { "epoch": 0.07231141851232241, "grad_norm": 0.095545195043087, "learning_rate": 2.7900322911712435e-05, "loss": 10.3728, "step": 14480 }, { "epoch": 0.07236135733726186, "grad_norm": 0.0895906612277031, "learning_rate": 2.7898820996770882e-05, "loss": 10.3754, "step": 14490 }, { "epoch": 0.07241129616220131, "grad_norm": 0.09767705202102661, "learning_rate": 2.7897319081829336e-05, "loss": 10.3734, "step": 14500 }, { "epoch": 0.07246123498714076, "grad_norm": 0.09386033564805984, "learning_rate": 2.7895817166887782e-05, "loss": 10.3728, "step": 14510 }, { "epoch": 0.0725111738120802, "grad_norm": 0.09241237491369247, "learning_rate": 2.789431525194623e-05, "loss": 10.3727, "step": 14520 }, { "epoch": 0.07256111263701966, "grad_norm": 0.09265079349279404, "learning_rate": 2.7892813337004683e-05, "loss": 10.3714, "step": 14530 }, { "epoch": 0.0726110514619591, "grad_norm": 0.09971288591623306, "learning_rate": 2.789131142206313e-05, "loss": 10.3708, "step": 14540 }, { "epoch": 0.07266099028689856, "grad_norm": 0.09628092497587204, "learning_rate": 2.7889809507121583e-05, "loss": 10.3714, "step": 14550 }, { "epoch": 0.072710929111838, "grad_norm": 0.09946174919605255, "learning_rate": 2.788830759218003e-05, "loss": 10.3706, "step": 14560 }, { "epoch": 0.07276086793677745, "grad_norm": 0.09489379823207855, "learning_rate": 2.7886805677238477e-05, "loss": 10.3715, "step": 14570 }, { "epoch": 0.0728108067617169, "grad_norm": 0.09554824233055115, "learning_rate": 2.788530376229693e-05, "loss": 10.3715, "step": 14580 }, { "epoch": 0.07286074558665634, "grad_norm": 0.09551879018545151, "learning_rate": 2.7883801847355377e-05, "loss": 10.3715, "step": 14590 }, { "epoch": 0.07291068441159579, "grad_norm": 0.10136498510837555, "learning_rate": 2.788229993241383e-05, "loss": 10.371, "step": 14600 }, { "epoch": 0.07296062323653524, "grad_norm": 0.09273940324783325, "learning_rate": 2.7880798017472277e-05, "loss": 10.3697, "step": 14610 }, { "epoch": 0.07301056206147469, "grad_norm": 0.09797700494527817, "learning_rate": 2.7879296102530724e-05, "loss": 10.369, "step": 14620 }, { "epoch": 0.07306050088641414, "grad_norm": 0.0974142774939537, "learning_rate": 2.7877794187589178e-05, "loss": 10.3689, "step": 14630 }, { "epoch": 0.07311043971135359, "grad_norm": 0.10973493754863739, "learning_rate": 2.7876292272647625e-05, "loss": 10.3659, "step": 14640 }, { "epoch": 0.07316037853629304, "grad_norm": 0.0951044112443924, "learning_rate": 2.7874790357706078e-05, "loss": 10.3671, "step": 14650 }, { "epoch": 0.07321031736123249, "grad_norm": 0.09662732481956482, "learning_rate": 2.7873288442764525e-05, "loss": 10.369, "step": 14660 }, { "epoch": 0.07326025618617193, "grad_norm": 0.09085254371166229, "learning_rate": 2.7871786527822975e-05, "loss": 10.3666, "step": 14670 }, { "epoch": 0.07331019501111138, "grad_norm": 0.09421254694461823, "learning_rate": 2.7870284612881425e-05, "loss": 10.3648, "step": 14680 }, { "epoch": 0.07336013383605083, "grad_norm": 0.09277543425559998, "learning_rate": 2.7868782697939872e-05, "loss": 10.3655, "step": 14690 }, { "epoch": 0.07341007266099028, "grad_norm": 0.09704422950744629, "learning_rate": 2.7867280782998326e-05, "loss": 10.3657, "step": 14700 }, { "epoch": 0.07346001148592973, "grad_norm": 0.09107200056314468, "learning_rate": 2.7865778868056772e-05, "loss": 10.3688, "step": 14710 }, { "epoch": 0.07350995031086918, "grad_norm": 0.08991669118404388, "learning_rate": 2.7864276953115223e-05, "loss": 10.3676, "step": 14720 }, { "epoch": 0.07355988913580863, "grad_norm": 0.09315147995948792, "learning_rate": 2.7862775038173673e-05, "loss": 10.3692, "step": 14730 }, { "epoch": 0.07360982796074808, "grad_norm": 0.09433738887310028, "learning_rate": 2.786127312323212e-05, "loss": 10.364, "step": 14740 }, { "epoch": 0.07365976678568753, "grad_norm": 0.0911378338932991, "learning_rate": 2.7859771208290573e-05, "loss": 10.3655, "step": 14750 }, { "epoch": 0.07370970561062698, "grad_norm": 0.09321308135986328, "learning_rate": 2.785826929334902e-05, "loss": 10.3665, "step": 14760 }, { "epoch": 0.07375964443556643, "grad_norm": 0.09273282438516617, "learning_rate": 2.785676737840747e-05, "loss": 10.3633, "step": 14770 }, { "epoch": 0.07380958326050588, "grad_norm": 0.0901220440864563, "learning_rate": 2.785526546346592e-05, "loss": 10.3613, "step": 14780 }, { "epoch": 0.07385952208544533, "grad_norm": 0.09423750638961792, "learning_rate": 2.7853763548524367e-05, "loss": 10.3643, "step": 14790 }, { "epoch": 0.07390946091038478, "grad_norm": 0.09342154115438461, "learning_rate": 2.785226163358282e-05, "loss": 10.366, "step": 14800 }, { "epoch": 0.07395939973532423, "grad_norm": 0.09549295902252197, "learning_rate": 2.7850759718641267e-05, "loss": 10.3637, "step": 14810 }, { "epoch": 0.07400933856026368, "grad_norm": 0.09613852202892303, "learning_rate": 2.7849257803699718e-05, "loss": 10.3668, "step": 14820 }, { "epoch": 0.07405927738520313, "grad_norm": 0.09182946383953094, "learning_rate": 2.7847755888758168e-05, "loss": 10.3647, "step": 14830 }, { "epoch": 0.07410921621014258, "grad_norm": 0.08932546526193619, "learning_rate": 2.7846253973816615e-05, "loss": 10.3626, "step": 14840 }, { "epoch": 0.07415915503508202, "grad_norm": 0.09887928515672684, "learning_rate": 2.7844752058875068e-05, "loss": 10.3609, "step": 14850 }, { "epoch": 0.07420909386002147, "grad_norm": 0.09238041192293167, "learning_rate": 2.7843250143933515e-05, "loss": 10.3625, "step": 14860 }, { "epoch": 0.07425903268496092, "grad_norm": 0.08984861522912979, "learning_rate": 2.7841748228991965e-05, "loss": 10.3626, "step": 14870 }, { "epoch": 0.07430897150990037, "grad_norm": 0.09524226188659668, "learning_rate": 2.7840246314050415e-05, "loss": 10.3622, "step": 14880 }, { "epoch": 0.07435891033483982, "grad_norm": 0.09836632758378983, "learning_rate": 2.7838744399108862e-05, "loss": 10.3668, "step": 14890 }, { "epoch": 0.07440884915977927, "grad_norm": 0.08989029377698898, "learning_rate": 2.7837242484167316e-05, "loss": 10.364, "step": 14900 }, { "epoch": 0.07445878798471872, "grad_norm": 0.09499987214803696, "learning_rate": 2.7835740569225762e-05, "loss": 10.3642, "step": 14910 }, { "epoch": 0.07450872680965817, "grad_norm": 0.09698102623224258, "learning_rate": 2.7834238654284213e-05, "loss": 10.3599, "step": 14920 }, { "epoch": 0.07455866563459762, "grad_norm": 0.09677815437316895, "learning_rate": 2.7832736739342663e-05, "loss": 10.3597, "step": 14930 }, { "epoch": 0.07460860445953707, "grad_norm": 0.09893867373466492, "learning_rate": 2.783123482440111e-05, "loss": 10.358, "step": 14940 }, { "epoch": 0.07465854328447652, "grad_norm": 0.09698786586523056, "learning_rate": 2.7829732909459563e-05, "loss": 10.3576, "step": 14950 }, { "epoch": 0.07470848210941597, "grad_norm": 0.08816642314195633, "learning_rate": 2.782823099451801e-05, "loss": 10.3598, "step": 14960 }, { "epoch": 0.07475842093435542, "grad_norm": 0.09455441683530807, "learning_rate": 2.782672907957646e-05, "loss": 10.3586, "step": 14970 }, { "epoch": 0.07480835975929487, "grad_norm": 0.09937630593776703, "learning_rate": 2.782522716463491e-05, "loss": 10.3571, "step": 14980 }, { "epoch": 0.07485829858423432, "grad_norm": 0.09834357351064682, "learning_rate": 2.782372524969336e-05, "loss": 10.357, "step": 14990 }, { "epoch": 0.07490823740917377, "grad_norm": 0.09351138025522232, "learning_rate": 2.782222333475181e-05, "loss": 10.3599, "step": 15000 }, { "epoch": 0.07495817623411322, "grad_norm": 0.09158463031053543, "learning_rate": 2.7820721419810257e-05, "loss": 10.3608, "step": 15010 }, { "epoch": 0.07500811505905267, "grad_norm": 0.09933115541934967, "learning_rate": 2.7819219504868708e-05, "loss": 10.3587, "step": 15020 }, { "epoch": 0.07505805388399212, "grad_norm": 0.09445559233427048, "learning_rate": 2.7817717589927158e-05, "loss": 10.356, "step": 15030 }, { "epoch": 0.07510799270893156, "grad_norm": 0.0958174541592598, "learning_rate": 2.7816215674985608e-05, "loss": 10.3601, "step": 15040 }, { "epoch": 0.07515793153387101, "grad_norm": 0.09285058826208115, "learning_rate": 2.7814713760044058e-05, "loss": 10.3572, "step": 15050 }, { "epoch": 0.07520787035881046, "grad_norm": 0.09183011949062347, "learning_rate": 2.7813211845102505e-05, "loss": 10.3567, "step": 15060 }, { "epoch": 0.07525780918374991, "grad_norm": 0.09525202959775925, "learning_rate": 2.7811709930160955e-05, "loss": 10.3597, "step": 15070 }, { "epoch": 0.07530774800868936, "grad_norm": 0.09245378524065018, "learning_rate": 2.7810208015219405e-05, "loss": 10.3559, "step": 15080 }, { "epoch": 0.0753576868336288, "grad_norm": 0.09502734988927841, "learning_rate": 2.7808706100277856e-05, "loss": 10.3569, "step": 15090 }, { "epoch": 0.07540762565856825, "grad_norm": 0.09782931953668594, "learning_rate": 2.7807204185336306e-05, "loss": 10.3561, "step": 15100 }, { "epoch": 0.0754575644835077, "grad_norm": 0.09502676874399185, "learning_rate": 2.7805702270394752e-05, "loss": 10.3553, "step": 15110 }, { "epoch": 0.07550750330844715, "grad_norm": 0.09578611701726913, "learning_rate": 2.7804200355453203e-05, "loss": 10.3571, "step": 15120 }, { "epoch": 0.0755574421333866, "grad_norm": 0.0919770896434784, "learning_rate": 2.7802698440511653e-05, "loss": 10.3541, "step": 15130 }, { "epoch": 0.07560738095832605, "grad_norm": 0.09363967925310135, "learning_rate": 2.7801196525570103e-05, "loss": 10.3542, "step": 15140 }, { "epoch": 0.0756573197832655, "grad_norm": 0.09395650029182434, "learning_rate": 2.7799694610628553e-05, "loss": 10.3552, "step": 15150 }, { "epoch": 0.07570725860820494, "grad_norm": 0.09580358117818832, "learning_rate": 2.7798192695687e-05, "loss": 10.3551, "step": 15160 }, { "epoch": 0.0757571974331444, "grad_norm": 0.09569013118743896, "learning_rate": 2.779669078074545e-05, "loss": 10.3505, "step": 15170 }, { "epoch": 0.07580713625808384, "grad_norm": 0.09753712266683578, "learning_rate": 2.77951888658039e-05, "loss": 10.3539, "step": 15180 }, { "epoch": 0.07585707508302329, "grad_norm": 0.09192202240228653, "learning_rate": 2.779368695086235e-05, "loss": 10.354, "step": 15190 }, { "epoch": 0.07590701390796274, "grad_norm": 0.09564004093408585, "learning_rate": 2.77921850359208e-05, "loss": 10.3533, "step": 15200 }, { "epoch": 0.07595695273290219, "grad_norm": 0.08988484740257263, "learning_rate": 2.7790683120979248e-05, "loss": 10.3559, "step": 15210 }, { "epoch": 0.07600689155784164, "grad_norm": 0.09135802835226059, "learning_rate": 2.7789181206037698e-05, "loss": 10.355, "step": 15220 }, { "epoch": 0.07605683038278109, "grad_norm": 0.10438718646764755, "learning_rate": 2.7787679291096148e-05, "loss": 10.3533, "step": 15230 }, { "epoch": 0.07610676920772054, "grad_norm": 0.0949469655752182, "learning_rate": 2.7786177376154598e-05, "loss": 10.3526, "step": 15240 }, { "epoch": 0.07615670803265999, "grad_norm": 0.09750612825155258, "learning_rate": 2.7784675461213048e-05, "loss": 10.3543, "step": 15250 }, { "epoch": 0.07620664685759944, "grad_norm": 0.09662304818630219, "learning_rate": 2.7783173546271495e-05, "loss": 10.3492, "step": 15260 }, { "epoch": 0.07625658568253889, "grad_norm": 0.09173522889614105, "learning_rate": 2.7781671631329945e-05, "loss": 10.3505, "step": 15270 }, { "epoch": 0.07630652450747834, "grad_norm": 0.09761576354503632, "learning_rate": 2.7780169716388395e-05, "loss": 10.35, "step": 15280 }, { "epoch": 0.07635646333241779, "grad_norm": 0.091497503221035, "learning_rate": 2.7778667801446846e-05, "loss": 10.353, "step": 15290 }, { "epoch": 0.07640640215735724, "grad_norm": 0.0867045447230339, "learning_rate": 2.7777165886505296e-05, "loss": 10.3486, "step": 15300 }, { "epoch": 0.07645634098229669, "grad_norm": 0.09160704165697098, "learning_rate": 2.7775663971563743e-05, "loss": 10.3531, "step": 15310 }, { "epoch": 0.07650627980723614, "grad_norm": 0.09530753642320633, "learning_rate": 2.7774162056622193e-05, "loss": 10.3502, "step": 15320 }, { "epoch": 0.07655621863217558, "grad_norm": 0.09408596903085709, "learning_rate": 2.7772660141680643e-05, "loss": 10.3514, "step": 15330 }, { "epoch": 0.07660615745711503, "grad_norm": 0.10033749043941498, "learning_rate": 2.7771158226739093e-05, "loss": 10.3467, "step": 15340 }, { "epoch": 0.07665609628205448, "grad_norm": 0.08937489241361618, "learning_rate": 2.7769656311797543e-05, "loss": 10.3505, "step": 15350 }, { "epoch": 0.07670603510699393, "grad_norm": 0.09332039952278137, "learning_rate": 2.7768154396855993e-05, "loss": 10.349, "step": 15360 }, { "epoch": 0.07675597393193338, "grad_norm": 0.10080984979867935, "learning_rate": 2.776665248191444e-05, "loss": 10.3472, "step": 15370 }, { "epoch": 0.07680591275687283, "grad_norm": 0.09363698959350586, "learning_rate": 2.776515056697289e-05, "loss": 10.3491, "step": 15380 }, { "epoch": 0.07685585158181228, "grad_norm": 0.09421728551387787, "learning_rate": 2.776364865203134e-05, "loss": 10.3457, "step": 15390 }, { "epoch": 0.07690579040675173, "grad_norm": 0.09391890466213226, "learning_rate": 2.776214673708979e-05, "loss": 10.3481, "step": 15400 }, { "epoch": 0.07695572923169118, "grad_norm": 0.09497596323490143, "learning_rate": 2.776064482214824e-05, "loss": 10.3486, "step": 15410 }, { "epoch": 0.07700566805663063, "grad_norm": 0.09952494502067566, "learning_rate": 2.7759142907206688e-05, "loss": 10.3471, "step": 15420 }, { "epoch": 0.07705560688157008, "grad_norm": 0.09322735667228699, "learning_rate": 2.7757640992265138e-05, "loss": 10.346, "step": 15430 }, { "epoch": 0.07710554570650953, "grad_norm": 0.09606785327196121, "learning_rate": 2.7756139077323588e-05, "loss": 10.3519, "step": 15440 }, { "epoch": 0.07715548453144898, "grad_norm": 0.09380047023296356, "learning_rate": 2.7754637162382038e-05, "loss": 10.3466, "step": 15450 }, { "epoch": 0.07720542335638843, "grad_norm": 0.09241063892841339, "learning_rate": 2.775313524744049e-05, "loss": 10.3461, "step": 15460 }, { "epoch": 0.07725536218132788, "grad_norm": 0.09774025529623032, "learning_rate": 2.7751633332498935e-05, "loss": 10.3434, "step": 15470 }, { "epoch": 0.07730530100626733, "grad_norm": 0.09376423805952072, "learning_rate": 2.7750131417557385e-05, "loss": 10.3487, "step": 15480 }, { "epoch": 0.07735523983120678, "grad_norm": 0.08905985951423645, "learning_rate": 2.7748629502615836e-05, "loss": 10.3459, "step": 15490 }, { "epoch": 0.07740517865614623, "grad_norm": 0.09236487746238708, "learning_rate": 2.7747127587674286e-05, "loss": 10.3436, "step": 15500 }, { "epoch": 0.07745511748108568, "grad_norm": 0.0898410752415657, "learning_rate": 2.7745625672732736e-05, "loss": 10.3419, "step": 15510 }, { "epoch": 0.07750505630602512, "grad_norm": 0.09169359505176544, "learning_rate": 2.7744123757791183e-05, "loss": 10.3459, "step": 15520 }, { "epoch": 0.07755499513096457, "grad_norm": 0.09277211874723434, "learning_rate": 2.7742621842849633e-05, "loss": 10.3462, "step": 15530 }, { "epoch": 0.07760493395590402, "grad_norm": 0.09616000950336456, "learning_rate": 2.7741119927908083e-05, "loss": 10.3427, "step": 15540 }, { "epoch": 0.07765487278084347, "grad_norm": 0.09537960588932037, "learning_rate": 2.7739618012966533e-05, "loss": 10.3464, "step": 15550 }, { "epoch": 0.07770481160578292, "grad_norm": 0.09606116265058517, "learning_rate": 2.7738116098024983e-05, "loss": 10.348, "step": 15560 }, { "epoch": 0.07775475043072237, "grad_norm": 0.10531776398420334, "learning_rate": 2.773661418308343e-05, "loss": 10.344, "step": 15570 }, { "epoch": 0.07780468925566181, "grad_norm": 0.09302783012390137, "learning_rate": 2.773511226814188e-05, "loss": 10.3427, "step": 15580 }, { "epoch": 0.07785462808060126, "grad_norm": 0.09965275973081589, "learning_rate": 2.773361035320033e-05, "loss": 10.3477, "step": 15590 }, { "epoch": 0.0779045669055407, "grad_norm": 0.0914713442325592, "learning_rate": 2.773210843825878e-05, "loss": 10.343, "step": 15600 }, { "epoch": 0.07795450573048016, "grad_norm": 0.09361792355775833, "learning_rate": 2.773060652331723e-05, "loss": 10.3476, "step": 15610 }, { "epoch": 0.0780044445554196, "grad_norm": 0.09312635660171509, "learning_rate": 2.7729104608375678e-05, "loss": 10.3434, "step": 15620 }, { "epoch": 0.07805438338035905, "grad_norm": 0.09331481158733368, "learning_rate": 2.7727602693434128e-05, "loss": 10.3433, "step": 15630 }, { "epoch": 0.0781043222052985, "grad_norm": 0.0952400341629982, "learning_rate": 2.7726100778492578e-05, "loss": 10.343, "step": 15640 }, { "epoch": 0.07815426103023795, "grad_norm": 0.09251481294631958, "learning_rate": 2.7724598863551028e-05, "loss": 10.3406, "step": 15650 }, { "epoch": 0.0782041998551774, "grad_norm": 0.09364163130521774, "learning_rate": 2.772309694860948e-05, "loss": 10.3429, "step": 15660 }, { "epoch": 0.07825413868011685, "grad_norm": 0.09640027582645416, "learning_rate": 2.772159503366793e-05, "loss": 10.3399, "step": 15670 }, { "epoch": 0.0783040775050563, "grad_norm": 0.09361618012189865, "learning_rate": 2.772009311872638e-05, "loss": 10.3391, "step": 15680 }, { "epoch": 0.07835401632999575, "grad_norm": 0.09318644553422928, "learning_rate": 2.7718591203784826e-05, "loss": 10.3398, "step": 15690 }, { "epoch": 0.0784039551549352, "grad_norm": 0.09669560939073563, "learning_rate": 2.7717089288843276e-05, "loss": 10.3404, "step": 15700 }, { "epoch": 0.07845389397987465, "grad_norm": 0.09504924714565277, "learning_rate": 2.7715587373901726e-05, "loss": 10.3368, "step": 15710 }, { "epoch": 0.0785038328048141, "grad_norm": 0.09268130362033844, "learning_rate": 2.7714085458960176e-05, "loss": 10.3395, "step": 15720 }, { "epoch": 0.07855377162975355, "grad_norm": 0.09593498706817627, "learning_rate": 2.7712583544018626e-05, "loss": 10.3378, "step": 15730 }, { "epoch": 0.078603710454693, "grad_norm": 0.09858926385641098, "learning_rate": 2.7711081629077073e-05, "loss": 10.3395, "step": 15740 }, { "epoch": 0.07865364927963245, "grad_norm": 0.09996471554040909, "learning_rate": 2.7709579714135523e-05, "loss": 10.3401, "step": 15750 }, { "epoch": 0.0787035881045719, "grad_norm": 0.09353151172399521, "learning_rate": 2.7708077799193973e-05, "loss": 10.3392, "step": 15760 }, { "epoch": 0.07875352692951135, "grad_norm": 0.0970112755894661, "learning_rate": 2.7706575884252424e-05, "loss": 10.337, "step": 15770 }, { "epoch": 0.0788034657544508, "grad_norm": 0.09143032133579254, "learning_rate": 2.7705073969310874e-05, "loss": 10.3367, "step": 15780 }, { "epoch": 0.07885340457939025, "grad_norm": 0.09837393462657928, "learning_rate": 2.770357205436932e-05, "loss": 10.3368, "step": 15790 }, { "epoch": 0.0789033434043297, "grad_norm": 0.0949447974562645, "learning_rate": 2.770207013942777e-05, "loss": 10.3381, "step": 15800 }, { "epoch": 0.07895328222926914, "grad_norm": 0.09616595506668091, "learning_rate": 2.770056822448622e-05, "loss": 10.3356, "step": 15810 }, { "epoch": 0.0790032210542086, "grad_norm": 0.0987720862030983, "learning_rate": 2.769906630954467e-05, "loss": 10.3361, "step": 15820 }, { "epoch": 0.07905315987914804, "grad_norm": 0.09198544174432755, "learning_rate": 2.769756439460312e-05, "loss": 10.3322, "step": 15830 }, { "epoch": 0.0791030987040875, "grad_norm": 0.09431766718626022, "learning_rate": 2.7696062479661568e-05, "loss": 10.3391, "step": 15840 }, { "epoch": 0.07915303752902694, "grad_norm": 0.08981557935476303, "learning_rate": 2.7694560564720018e-05, "loss": 10.3416, "step": 15850 }, { "epoch": 0.07920297635396639, "grad_norm": 0.09577486664056778, "learning_rate": 2.769305864977847e-05, "loss": 10.3368, "step": 15860 }, { "epoch": 0.07925291517890584, "grad_norm": 0.09910629689693451, "learning_rate": 2.769155673483692e-05, "loss": 10.3372, "step": 15870 }, { "epoch": 0.07930285400384529, "grad_norm": 0.0959116742014885, "learning_rate": 2.769005481989537e-05, "loss": 10.332, "step": 15880 }, { "epoch": 0.07935279282878474, "grad_norm": 0.09615825861692429, "learning_rate": 2.7688552904953816e-05, "loss": 10.3353, "step": 15890 }, { "epoch": 0.07940273165372419, "grad_norm": 0.09139242023229599, "learning_rate": 2.7687050990012266e-05, "loss": 10.3368, "step": 15900 }, { "epoch": 0.07945267047866364, "grad_norm": 0.09158849716186523, "learning_rate": 2.7685549075070716e-05, "loss": 10.3365, "step": 15910 }, { "epoch": 0.07950260930360309, "grad_norm": 0.09999959915876389, "learning_rate": 2.7684047160129166e-05, "loss": 10.3314, "step": 15920 }, { "epoch": 0.07955254812854254, "grad_norm": 0.09477412700653076, "learning_rate": 2.7682545245187616e-05, "loss": 10.3342, "step": 15930 }, { "epoch": 0.07960248695348199, "grad_norm": 0.09604287892580032, "learning_rate": 2.7681043330246063e-05, "loss": 10.3322, "step": 15940 }, { "epoch": 0.07965242577842144, "grad_norm": 0.09351278841495514, "learning_rate": 2.7679541415304513e-05, "loss": 10.3344, "step": 15950 }, { "epoch": 0.07970236460336089, "grad_norm": 0.09625541418790817, "learning_rate": 2.7678039500362963e-05, "loss": 10.3328, "step": 15960 }, { "epoch": 0.07975230342830034, "grad_norm": 0.09256501495838165, "learning_rate": 2.7676537585421414e-05, "loss": 10.3316, "step": 15970 }, { "epoch": 0.07980224225323979, "grad_norm": 0.09308816492557526, "learning_rate": 2.7675035670479864e-05, "loss": 10.3311, "step": 15980 }, { "epoch": 0.07985218107817924, "grad_norm": 0.09923449158668518, "learning_rate": 2.767353375553831e-05, "loss": 10.3336, "step": 15990 }, { "epoch": 0.07990211990311868, "grad_norm": 0.09855549037456512, "learning_rate": 2.7672031840596764e-05, "loss": 10.3271, "step": 16000 }, { "epoch": 0.07995205872805813, "grad_norm": 0.09425193816423416, "learning_rate": 2.767052992565521e-05, "loss": 10.3321, "step": 16010 }, { "epoch": 0.08000199755299758, "grad_norm": 0.09234164655208588, "learning_rate": 2.766902801071366e-05, "loss": 10.334, "step": 16020 }, { "epoch": 0.08005193637793703, "grad_norm": 0.09213145077228546, "learning_rate": 2.766752609577211e-05, "loss": 10.3343, "step": 16030 }, { "epoch": 0.08010187520287648, "grad_norm": 0.09849076718091965, "learning_rate": 2.7666024180830558e-05, "loss": 10.3308, "step": 16040 }, { "epoch": 0.08015181402781593, "grad_norm": 0.09698436409235, "learning_rate": 2.766452226588901e-05, "loss": 10.3301, "step": 16050 }, { "epoch": 0.08020175285275538, "grad_norm": 0.10218020528554916, "learning_rate": 2.766302035094746e-05, "loss": 10.3282, "step": 16060 }, { "epoch": 0.08025169167769482, "grad_norm": 0.0932408943772316, "learning_rate": 2.766151843600591e-05, "loss": 10.3309, "step": 16070 }, { "epoch": 0.08030163050263427, "grad_norm": 0.09290464967489243, "learning_rate": 2.766001652106436e-05, "loss": 10.3298, "step": 16080 }, { "epoch": 0.08035156932757372, "grad_norm": 0.09502555429935455, "learning_rate": 2.7658514606122806e-05, "loss": 10.3309, "step": 16090 }, { "epoch": 0.08040150815251317, "grad_norm": 0.09941313415765762, "learning_rate": 2.765701269118126e-05, "loss": 10.3306, "step": 16100 }, { "epoch": 0.08045144697745261, "grad_norm": 0.09274566918611526, "learning_rate": 2.7655510776239706e-05, "loss": 10.3321, "step": 16110 }, { "epoch": 0.08050138580239206, "grad_norm": 0.094278983771801, "learning_rate": 2.7654008861298156e-05, "loss": 10.3285, "step": 16120 }, { "epoch": 0.08055132462733151, "grad_norm": 0.09539202600717545, "learning_rate": 2.7652506946356606e-05, "loss": 10.3332, "step": 16130 }, { "epoch": 0.08060126345227096, "grad_norm": 0.09607014805078506, "learning_rate": 2.7651005031415053e-05, "loss": 10.3289, "step": 16140 }, { "epoch": 0.08065120227721041, "grad_norm": 0.09501274675130844, "learning_rate": 2.7649503116473507e-05, "loss": 10.3272, "step": 16150 }, { "epoch": 0.08070114110214986, "grad_norm": 0.10182977467775345, "learning_rate": 2.7648001201531953e-05, "loss": 10.3256, "step": 16160 }, { "epoch": 0.08075107992708931, "grad_norm": 0.10245219618082047, "learning_rate": 2.7646499286590404e-05, "loss": 10.3296, "step": 16170 }, { "epoch": 0.08080101875202876, "grad_norm": 0.09179327636957169, "learning_rate": 2.7644997371648854e-05, "loss": 10.3269, "step": 16180 }, { "epoch": 0.08085095757696821, "grad_norm": 0.08983848243951797, "learning_rate": 2.76434954567073e-05, "loss": 10.3286, "step": 16190 }, { "epoch": 0.08090089640190766, "grad_norm": 0.09037093073129654, "learning_rate": 2.7641993541765754e-05, "loss": 10.3275, "step": 16200 }, { "epoch": 0.08095083522684711, "grad_norm": 0.09235038608312607, "learning_rate": 2.76404916268242e-05, "loss": 10.3267, "step": 16210 }, { "epoch": 0.08100077405178656, "grad_norm": 0.09570086747407913, "learning_rate": 2.763898971188265e-05, "loss": 10.3259, "step": 16220 }, { "epoch": 0.08105071287672601, "grad_norm": 0.10826196521520615, "learning_rate": 2.76374877969411e-05, "loss": 10.3268, "step": 16230 }, { "epoch": 0.08110065170166546, "grad_norm": 0.09768252819776535, "learning_rate": 2.7635985881999548e-05, "loss": 10.3236, "step": 16240 }, { "epoch": 0.08115059052660491, "grad_norm": 0.09747245907783508, "learning_rate": 2.7634483967058e-05, "loss": 10.3286, "step": 16250 }, { "epoch": 0.08120052935154436, "grad_norm": 0.0909043624997139, "learning_rate": 2.763298205211645e-05, "loss": 10.3277, "step": 16260 }, { "epoch": 0.0812504681764838, "grad_norm": 0.09629534184932709, "learning_rate": 2.76314801371749e-05, "loss": 10.3245, "step": 16270 }, { "epoch": 0.08130040700142326, "grad_norm": 0.08753444999456406, "learning_rate": 2.762997822223335e-05, "loss": 10.3248, "step": 16280 }, { "epoch": 0.0813503458263627, "grad_norm": 0.09728357940912247, "learning_rate": 2.7628476307291796e-05, "loss": 10.3241, "step": 16290 }, { "epoch": 0.08140028465130215, "grad_norm": 0.09314234554767609, "learning_rate": 2.762697439235025e-05, "loss": 10.3236, "step": 16300 }, { "epoch": 0.0814502234762416, "grad_norm": 0.10189732909202576, "learning_rate": 2.7625472477408696e-05, "loss": 10.323, "step": 16310 }, { "epoch": 0.08150016230118105, "grad_norm": 0.09725593775510788, "learning_rate": 2.762397056246715e-05, "loss": 10.3217, "step": 16320 }, { "epoch": 0.0815501011261205, "grad_norm": 0.09180109947919846, "learning_rate": 2.7622468647525596e-05, "loss": 10.3236, "step": 16330 }, { "epoch": 0.08160003995105995, "grad_norm": 0.09535349905490875, "learning_rate": 2.7620966732584043e-05, "loss": 10.3231, "step": 16340 }, { "epoch": 0.0816499787759994, "grad_norm": 0.09873170405626297, "learning_rate": 2.7619464817642497e-05, "loss": 10.3229, "step": 16350 }, { "epoch": 0.08169991760093885, "grad_norm": 0.09569983929395676, "learning_rate": 2.7617962902700943e-05, "loss": 10.3197, "step": 16360 }, { "epoch": 0.0817498564258783, "grad_norm": 0.0878266841173172, "learning_rate": 2.7616460987759397e-05, "loss": 10.3213, "step": 16370 }, { "epoch": 0.08179979525081775, "grad_norm": 0.09356684237718582, "learning_rate": 2.7614959072817844e-05, "loss": 10.3215, "step": 16380 }, { "epoch": 0.0818497340757572, "grad_norm": 0.09417729079723358, "learning_rate": 2.761345715787629e-05, "loss": 10.3196, "step": 16390 }, { "epoch": 0.08189967290069665, "grad_norm": 0.09582322090864182, "learning_rate": 2.7611955242934744e-05, "loss": 10.3188, "step": 16400 }, { "epoch": 0.0819496117256361, "grad_norm": 0.09269662201404572, "learning_rate": 2.761045332799319e-05, "loss": 10.3197, "step": 16410 }, { "epoch": 0.08199955055057555, "grad_norm": 0.09405113756656647, "learning_rate": 2.7608951413051645e-05, "loss": 10.3219, "step": 16420 }, { "epoch": 0.082049489375515, "grad_norm": 0.09437499940395355, "learning_rate": 2.760744949811009e-05, "loss": 10.3209, "step": 16430 }, { "epoch": 0.08209942820045445, "grad_norm": 0.09764499962329865, "learning_rate": 2.7605947583168538e-05, "loss": 10.3186, "step": 16440 }, { "epoch": 0.0821493670253939, "grad_norm": 0.09104657918214798, "learning_rate": 2.7604445668226992e-05, "loss": 10.3198, "step": 16450 }, { "epoch": 0.08219930585033335, "grad_norm": 0.09402047097682953, "learning_rate": 2.760294375328544e-05, "loss": 10.3226, "step": 16460 }, { "epoch": 0.0822492446752728, "grad_norm": 0.09690111875534058, "learning_rate": 2.7601441838343892e-05, "loss": 10.3231, "step": 16470 }, { "epoch": 0.08229918350021224, "grad_norm": 0.09647475183010101, "learning_rate": 2.759993992340234e-05, "loss": 10.318, "step": 16480 }, { "epoch": 0.0823491223251517, "grad_norm": 0.09171656519174576, "learning_rate": 2.7598438008460786e-05, "loss": 10.3201, "step": 16490 }, { "epoch": 0.08239906115009114, "grad_norm": 0.09819825738668442, "learning_rate": 2.759693609351924e-05, "loss": 10.3202, "step": 16500 }, { "epoch": 0.08244899997503059, "grad_norm": 0.09369684010744095, "learning_rate": 2.7595434178577686e-05, "loss": 10.32, "step": 16510 }, { "epoch": 0.08249893879997004, "grad_norm": 0.0983380377292633, "learning_rate": 2.759393226363614e-05, "loss": 10.3198, "step": 16520 }, { "epoch": 0.08254887762490949, "grad_norm": 0.0950574278831482, "learning_rate": 2.7592430348694586e-05, "loss": 10.3151, "step": 16530 }, { "epoch": 0.08259881644984894, "grad_norm": 0.09450704604387283, "learning_rate": 2.7590928433753033e-05, "loss": 10.3157, "step": 16540 }, { "epoch": 0.08264875527478839, "grad_norm": 0.09773395955562592, "learning_rate": 2.7589426518811487e-05, "loss": 10.3158, "step": 16550 }, { "epoch": 0.08269869409972784, "grad_norm": 0.09340257197618484, "learning_rate": 2.7587924603869933e-05, "loss": 10.318, "step": 16560 }, { "epoch": 0.08274863292466728, "grad_norm": 0.09730247408151627, "learning_rate": 2.7586422688928387e-05, "loss": 10.3164, "step": 16570 }, { "epoch": 0.08279857174960673, "grad_norm": 0.09569399803876877, "learning_rate": 2.7584920773986834e-05, "loss": 10.3131, "step": 16580 }, { "epoch": 0.08284851057454617, "grad_norm": 0.09237007796764374, "learning_rate": 2.758341885904528e-05, "loss": 10.3144, "step": 16590 }, { "epoch": 0.08289844939948562, "grad_norm": 0.09663589298725128, "learning_rate": 2.7581916944103734e-05, "loss": 10.3138, "step": 16600 }, { "epoch": 0.08294838822442507, "grad_norm": 0.09514245390892029, "learning_rate": 2.758041502916218e-05, "loss": 10.3155, "step": 16610 }, { "epoch": 0.08299832704936452, "grad_norm": 0.09728768467903137, "learning_rate": 2.7578913114220635e-05, "loss": 10.3166, "step": 16620 }, { "epoch": 0.08304826587430397, "grad_norm": 0.10066792368888855, "learning_rate": 2.757741119927908e-05, "loss": 10.3149, "step": 16630 }, { "epoch": 0.08309820469924342, "grad_norm": 0.09562237560749054, "learning_rate": 2.757590928433753e-05, "loss": 10.3161, "step": 16640 }, { "epoch": 0.08314814352418287, "grad_norm": 0.09799202531576157, "learning_rate": 2.7574407369395982e-05, "loss": 10.3133, "step": 16650 }, { "epoch": 0.08319808234912232, "grad_norm": 0.09569067507982254, "learning_rate": 2.757290545445443e-05, "loss": 10.314, "step": 16660 }, { "epoch": 0.08324802117406177, "grad_norm": 0.09926281124353409, "learning_rate": 2.7571403539512882e-05, "loss": 10.3098, "step": 16670 }, { "epoch": 0.08329795999900122, "grad_norm": 0.09658607095479965, "learning_rate": 2.756990162457133e-05, "loss": 10.3153, "step": 16680 }, { "epoch": 0.08334789882394067, "grad_norm": 0.09640868008136749, "learning_rate": 2.756839970962978e-05, "loss": 10.3109, "step": 16690 }, { "epoch": 0.08339783764888012, "grad_norm": 0.0960702896118164, "learning_rate": 2.756689779468823e-05, "loss": 10.3135, "step": 16700 }, { "epoch": 0.08344777647381957, "grad_norm": 0.10049708187580109, "learning_rate": 2.7565395879746676e-05, "loss": 10.3111, "step": 16710 }, { "epoch": 0.08349771529875902, "grad_norm": 0.1001792773604393, "learning_rate": 2.756389396480513e-05, "loss": 10.3115, "step": 16720 }, { "epoch": 0.08354765412369847, "grad_norm": 0.09567177295684814, "learning_rate": 2.7562392049863576e-05, "loss": 10.3118, "step": 16730 }, { "epoch": 0.08359759294863792, "grad_norm": 0.09801898151636124, "learning_rate": 2.7560890134922027e-05, "loss": 43.1471, "step": 16740 }, { "epoch": 0.08364753177357737, "grad_norm": 0.09383946657180786, "learning_rate": 2.7559388219980477e-05, "loss": 14.7706, "step": 16750 }, { "epoch": 0.08369747059851682, "grad_norm": 0.10004527121782303, "learning_rate": 2.7557886305038924e-05, "loss": 10.3106, "step": 16760 }, { "epoch": 0.08374740942345626, "grad_norm": 0.10205840319395065, "learning_rate": 2.7556384390097377e-05, "loss": 10.7248, "step": 16770 }, { "epoch": 0.08379734824839571, "grad_norm": 0.10261974483728409, "learning_rate": 2.7554882475155824e-05, "loss": 10.3106, "step": 16780 }, { "epoch": 0.08384728707333516, "grad_norm": 0.09215449541807175, "learning_rate": 2.7553380560214274e-05, "loss": 10.3098, "step": 16790 }, { "epoch": 0.08389722589827461, "grad_norm": 0.09583956003189087, "learning_rate": 2.7551878645272724e-05, "loss": 10.3098, "step": 16800 }, { "epoch": 0.08394716472321406, "grad_norm": 0.09968181699514389, "learning_rate": 2.755037673033117e-05, "loss": 10.3098, "step": 16810 }, { "epoch": 0.08399710354815351, "grad_norm": 0.09686349332332611, "learning_rate": 2.7548874815389625e-05, "loss": 10.3073, "step": 16820 }, { "epoch": 0.08404704237309296, "grad_norm": 0.09120567888021469, "learning_rate": 2.754737290044807e-05, "loss": 10.3103, "step": 16830 }, { "epoch": 0.08409698119803241, "grad_norm": 0.0937332808971405, "learning_rate": 2.754587098550652e-05, "loss": 10.3085, "step": 16840 }, { "epoch": 0.08414692002297186, "grad_norm": 0.091478131711483, "learning_rate": 2.7544369070564972e-05, "loss": 10.3071, "step": 16850 }, { "epoch": 0.08419685884791131, "grad_norm": 0.09270336478948593, "learning_rate": 2.754286715562342e-05, "loss": 10.3107, "step": 16860 }, { "epoch": 0.08424679767285076, "grad_norm": 0.09209531545639038, "learning_rate": 2.7541365240681872e-05, "loss": 10.3128, "step": 16870 }, { "epoch": 0.08429673649779021, "grad_norm": 0.09706257283687592, "learning_rate": 2.753986332574032e-05, "loss": 10.3076, "step": 16880 }, { "epoch": 0.08434667532272966, "grad_norm": 0.09445540606975555, "learning_rate": 2.753836141079877e-05, "loss": 10.3094, "step": 16890 }, { "epoch": 0.08439661414766911, "grad_norm": 0.09534412622451782, "learning_rate": 2.753685949585722e-05, "loss": 10.311, "step": 16900 }, { "epoch": 0.08444655297260856, "grad_norm": 0.09859678149223328, "learning_rate": 2.7535357580915666e-05, "loss": 10.3073, "step": 16910 }, { "epoch": 0.084496491797548, "grad_norm": 0.09540943056344986, "learning_rate": 2.753385566597412e-05, "loss": 10.3114, "step": 16920 }, { "epoch": 0.08454643062248746, "grad_norm": 0.09608754515647888, "learning_rate": 2.7532353751032566e-05, "loss": 10.3052, "step": 16930 }, { "epoch": 0.0845963694474269, "grad_norm": 0.09397275745868683, "learning_rate": 2.7530851836091017e-05, "loss": 10.3078, "step": 16940 }, { "epoch": 0.08464630827236636, "grad_norm": 0.09713399410247803, "learning_rate": 2.7529349921149467e-05, "loss": 10.3083, "step": 16950 }, { "epoch": 0.0846962470973058, "grad_norm": 0.09662938863039017, "learning_rate": 2.7527848006207917e-05, "loss": 10.304, "step": 16960 }, { "epoch": 0.08474618592224525, "grad_norm": 0.09605155885219574, "learning_rate": 2.7526346091266367e-05, "loss": 10.3048, "step": 16970 }, { "epoch": 0.0847961247471847, "grad_norm": 0.09501200169324875, "learning_rate": 2.7524844176324814e-05, "loss": 10.3018, "step": 16980 }, { "epoch": 0.08484606357212415, "grad_norm": 0.0933113843202591, "learning_rate": 2.7523342261383264e-05, "loss": 10.3059, "step": 16990 }, { "epoch": 0.0848960023970636, "grad_norm": 0.09310649335384369, "learning_rate": 2.7521840346441714e-05, "loss": 10.3055, "step": 17000 }, { "epoch": 0.08494594122200305, "grad_norm": 0.09754558652639389, "learning_rate": 2.7520338431500164e-05, "loss": 10.3086, "step": 17010 }, { "epoch": 0.0849958800469425, "grad_norm": 0.08995413780212402, "learning_rate": 2.7518836516558615e-05, "loss": 10.3065, "step": 17020 }, { "epoch": 0.08504581887188195, "grad_norm": 0.09209508448839188, "learning_rate": 2.751733460161706e-05, "loss": 10.3058, "step": 17030 }, { "epoch": 0.0850957576968214, "grad_norm": 0.0900413915514946, "learning_rate": 2.751583268667551e-05, "loss": 10.3033, "step": 17040 }, { "epoch": 0.08514569652176085, "grad_norm": 0.10113733261823654, "learning_rate": 2.7514330771733962e-05, "loss": 10.3027, "step": 17050 }, { "epoch": 0.08519563534670029, "grad_norm": 0.10684780031442642, "learning_rate": 2.7512828856792412e-05, "loss": 10.3006, "step": 17060 }, { "epoch": 0.08524557417163973, "grad_norm": 0.09294813871383667, "learning_rate": 2.7511326941850862e-05, "loss": 10.3065, "step": 17070 }, { "epoch": 0.08529551299657918, "grad_norm": 0.09303073585033417, "learning_rate": 2.750982502690931e-05, "loss": 10.3031, "step": 17080 }, { "epoch": 0.08534545182151863, "grad_norm": 0.09494984894990921, "learning_rate": 2.750832311196776e-05, "loss": 10.3067, "step": 17090 }, { "epoch": 0.08539539064645808, "grad_norm": 0.09534880518913269, "learning_rate": 2.750682119702621e-05, "loss": 10.302, "step": 17100 }, { "epoch": 0.08544532947139753, "grad_norm": 0.09925422072410583, "learning_rate": 2.750531928208466e-05, "loss": 10.3026, "step": 17110 }, { "epoch": 0.08549526829633698, "grad_norm": 0.09157177805900574, "learning_rate": 2.750381736714311e-05, "loss": 10.3023, "step": 17120 }, { "epoch": 0.08554520712127643, "grad_norm": 0.09446504712104797, "learning_rate": 2.7502315452201556e-05, "loss": 10.3023, "step": 17130 }, { "epoch": 0.08559514594621588, "grad_norm": 0.08893059939146042, "learning_rate": 2.7500813537260007e-05, "loss": 10.3017, "step": 17140 }, { "epoch": 0.08564508477115533, "grad_norm": 0.09873572736978531, "learning_rate": 2.7499311622318457e-05, "loss": 10.3041, "step": 17150 }, { "epoch": 0.08569502359609478, "grad_norm": 0.10007481276988983, "learning_rate": 2.7497809707376907e-05, "loss": 10.2991, "step": 17160 }, { "epoch": 0.08574496242103423, "grad_norm": 0.0909745916724205, "learning_rate": 2.7496307792435357e-05, "loss": 12.4831, "step": 17170 }, { "epoch": 0.08579490124597368, "grad_norm": 0.09608662128448486, "learning_rate": 2.7494805877493804e-05, "loss": 10.3047, "step": 17180 }, { "epoch": 0.08584484007091313, "grad_norm": 0.0924767255783081, "learning_rate": 2.7493303962552254e-05, "loss": 10.2995, "step": 17190 }, { "epoch": 0.08589477889585258, "grad_norm": 0.09744284301996231, "learning_rate": 2.7491802047610704e-05, "loss": 10.3003, "step": 17200 }, { "epoch": 0.08594471772079203, "grad_norm": 0.09493273496627808, "learning_rate": 2.7490300132669154e-05, "loss": 10.3025, "step": 17210 }, { "epoch": 0.08599465654573148, "grad_norm": 0.10000015050172806, "learning_rate": 2.7488798217727605e-05, "loss": 10.3011, "step": 17220 }, { "epoch": 0.08604459537067093, "grad_norm": 0.09295312315225601, "learning_rate": 2.748729630278605e-05, "loss": 10.3004, "step": 17230 }, { "epoch": 0.08609453419561038, "grad_norm": 0.10081910341978073, "learning_rate": 2.74857943878445e-05, "loss": 10.2935, "step": 17240 }, { "epoch": 0.08614447302054982, "grad_norm": 0.09449261426925659, "learning_rate": 2.7484292472902952e-05, "loss": 10.2981, "step": 17250 }, { "epoch": 0.08619441184548927, "grad_norm": 0.09793805330991745, "learning_rate": 2.7482790557961402e-05, "loss": 10.2947, "step": 17260 }, { "epoch": 0.08624435067042872, "grad_norm": 0.10022538155317307, "learning_rate": 2.7481288643019852e-05, "loss": 10.2971, "step": 17270 }, { "epoch": 0.08629428949536817, "grad_norm": 0.0923459604382515, "learning_rate": 2.7479786728078302e-05, "loss": 10.2982, "step": 17280 }, { "epoch": 0.08634422832030762, "grad_norm": 0.09417003393173218, "learning_rate": 2.747828481313675e-05, "loss": 10.2989, "step": 17290 }, { "epoch": 0.08639416714524707, "grad_norm": 0.09343262761831284, "learning_rate": 2.74767828981952e-05, "loss": 10.2968, "step": 17300 }, { "epoch": 0.08644410597018652, "grad_norm": 0.09520164877176285, "learning_rate": 2.747528098325365e-05, "loss": 10.2959, "step": 17310 }, { "epoch": 0.08649404479512597, "grad_norm": 0.09455534815788269, "learning_rate": 2.74737790683121e-05, "loss": 10.2957, "step": 17320 }, { "epoch": 0.08654398362006542, "grad_norm": 0.09446435421705246, "learning_rate": 2.747227715337055e-05, "loss": 10.2937, "step": 17330 }, { "epoch": 0.08659392244500487, "grad_norm": 0.09533445537090302, "learning_rate": 2.7470775238428997e-05, "loss": 10.2936, "step": 17340 }, { "epoch": 0.08664386126994432, "grad_norm": 0.09248590469360352, "learning_rate": 2.7469273323487447e-05, "loss": 10.2965, "step": 17350 }, { "epoch": 0.08669380009488377, "grad_norm": 0.0972449779510498, "learning_rate": 2.7467771408545897e-05, "loss": 10.2934, "step": 17360 }, { "epoch": 0.08674373891982322, "grad_norm": 0.08786053210496902, "learning_rate": 2.7466269493604347e-05, "loss": 10.2956, "step": 17370 }, { "epoch": 0.08679367774476267, "grad_norm": 0.10034439712762833, "learning_rate": 2.7464767578662797e-05, "loss": 10.2939, "step": 17380 }, { "epoch": 0.08684361656970212, "grad_norm": 0.10132072865962982, "learning_rate": 2.7463265663721244e-05, "loss": 10.2934, "step": 17390 }, { "epoch": 0.08689355539464157, "grad_norm": 0.093051977455616, "learning_rate": 2.7461763748779694e-05, "loss": 10.2921, "step": 17400 }, { "epoch": 0.08694349421958102, "grad_norm": 0.09505248069763184, "learning_rate": 2.7460261833838144e-05, "loss": 10.2927, "step": 17410 }, { "epoch": 0.08699343304452047, "grad_norm": 0.09846062958240509, "learning_rate": 2.7458759918896595e-05, "loss": 10.2926, "step": 17420 }, { "epoch": 0.08704337186945992, "grad_norm": 0.10331861674785614, "learning_rate": 2.7457258003955045e-05, "loss": 10.2922, "step": 17430 }, { "epoch": 0.08709331069439936, "grad_norm": 0.0902588963508606, "learning_rate": 2.745575608901349e-05, "loss": 10.2916, "step": 17440 }, { "epoch": 0.08714324951933881, "grad_norm": 0.09760275483131409, "learning_rate": 2.7454254174071942e-05, "loss": 10.294, "step": 17450 }, { "epoch": 0.08719318834427826, "grad_norm": 0.09371180832386017, "learning_rate": 2.7452752259130392e-05, "loss": 10.2919, "step": 17460 }, { "epoch": 0.08724312716921771, "grad_norm": 0.09257213771343231, "learning_rate": 2.7451250344188842e-05, "loss": 10.2953, "step": 17470 }, { "epoch": 0.08729306599415716, "grad_norm": 0.09264030307531357, "learning_rate": 2.7449748429247292e-05, "loss": 10.2946, "step": 17480 }, { "epoch": 0.08734300481909661, "grad_norm": 0.09427089989185333, "learning_rate": 2.744824651430574e-05, "loss": 10.2918, "step": 17490 }, { "epoch": 0.08739294364403606, "grad_norm": 0.09720293432474136, "learning_rate": 2.744674459936419e-05, "loss": 10.2937, "step": 17500 }, { "epoch": 0.08744288246897551, "grad_norm": 0.09387167543172836, "learning_rate": 2.744524268442264e-05, "loss": 10.2891, "step": 17510 }, { "epoch": 0.08749282129391496, "grad_norm": 0.09527067095041275, "learning_rate": 2.744374076948109e-05, "loss": 10.2929, "step": 17520 }, { "epoch": 0.08754276011885441, "grad_norm": 0.09935829043388367, "learning_rate": 2.744223885453954e-05, "loss": 10.2936, "step": 17530 }, { "epoch": 0.08759269894379386, "grad_norm": 0.0906045064330101, "learning_rate": 2.7440736939597987e-05, "loss": 10.2928, "step": 17540 }, { "epoch": 0.08764263776873331, "grad_norm": 0.09333600848913193, "learning_rate": 2.7439235024656437e-05, "loss": 10.2918, "step": 17550 }, { "epoch": 0.08769257659367274, "grad_norm": 0.09050074964761734, "learning_rate": 2.7437733109714887e-05, "loss": 10.2893, "step": 17560 }, { "epoch": 0.0877425154186122, "grad_norm": 0.0935349315404892, "learning_rate": 2.7436231194773337e-05, "loss": 10.2924, "step": 17570 }, { "epoch": 0.08779245424355164, "grad_norm": 0.09864573180675507, "learning_rate": 2.7434729279831787e-05, "loss": 10.2915, "step": 17580 }, { "epoch": 0.08784239306849109, "grad_norm": 0.09615197032690048, "learning_rate": 2.7433227364890234e-05, "loss": 10.2898, "step": 17590 }, { "epoch": 0.08789233189343054, "grad_norm": 0.09623583406209946, "learning_rate": 2.7431725449948688e-05, "loss": 10.2904, "step": 17600 }, { "epoch": 0.08794227071836999, "grad_norm": 0.0986003652215004, "learning_rate": 2.7430223535007134e-05, "loss": 10.2876, "step": 17610 }, { "epoch": 0.08799220954330944, "grad_norm": 0.09806670993566513, "learning_rate": 2.7428721620065585e-05, "loss": 10.2888, "step": 17620 }, { "epoch": 0.08804214836824889, "grad_norm": 0.09443091601133347, "learning_rate": 2.7427219705124035e-05, "loss": 10.2883, "step": 17630 }, { "epoch": 0.08809208719318834, "grad_norm": 0.0967443585395813, "learning_rate": 2.742571779018248e-05, "loss": 10.2869, "step": 17640 }, { "epoch": 0.08814202601812779, "grad_norm": 0.09863567352294922, "learning_rate": 2.7424215875240935e-05, "loss": 10.2899, "step": 17650 }, { "epoch": 0.08819196484306724, "grad_norm": 0.09256859123706818, "learning_rate": 2.7422713960299382e-05, "loss": 10.2843, "step": 17660 }, { "epoch": 0.08824190366800669, "grad_norm": 0.09759136289358139, "learning_rate": 2.7421212045357832e-05, "loss": 10.2869, "step": 17670 }, { "epoch": 0.08829184249294614, "grad_norm": 0.09880513697862625, "learning_rate": 2.7419710130416282e-05, "loss": 10.2891, "step": 17680 }, { "epoch": 0.08834178131788559, "grad_norm": 0.09688255190849304, "learning_rate": 2.741820821547473e-05, "loss": 10.2857, "step": 17690 }, { "epoch": 0.08839172014282504, "grad_norm": 0.0934658944606781, "learning_rate": 2.7416706300533183e-05, "loss": 10.2866, "step": 17700 }, { "epoch": 0.08844165896776449, "grad_norm": 0.09990676492452621, "learning_rate": 2.741520438559163e-05, "loss": 10.2842, "step": 17710 }, { "epoch": 0.08849159779270394, "grad_norm": 0.09611719101667404, "learning_rate": 2.741370247065008e-05, "loss": 10.2883, "step": 17720 }, { "epoch": 0.08854153661764338, "grad_norm": 0.09569796919822693, "learning_rate": 2.741220055570853e-05, "loss": 10.2871, "step": 17730 }, { "epoch": 0.08859147544258283, "grad_norm": 0.09596303105354309, "learning_rate": 2.7410698640766977e-05, "loss": 10.2823, "step": 17740 }, { "epoch": 0.08864141426752228, "grad_norm": 0.0938568115234375, "learning_rate": 2.740919672582543e-05, "loss": 10.2833, "step": 17750 }, { "epoch": 0.08869135309246173, "grad_norm": 0.09523828327655792, "learning_rate": 2.7407694810883877e-05, "loss": 10.2872, "step": 17760 }, { "epoch": 0.08874129191740118, "grad_norm": 0.09327805787324905, "learning_rate": 2.7406192895942327e-05, "loss": 10.2872, "step": 17770 }, { "epoch": 0.08879123074234063, "grad_norm": 0.09197764843702316, "learning_rate": 2.7404690981000777e-05, "loss": 10.2836, "step": 17780 }, { "epoch": 0.08884116956728008, "grad_norm": 0.08873019367456436, "learning_rate": 2.7403189066059224e-05, "loss": 10.2816, "step": 17790 }, { "epoch": 0.08889110839221953, "grad_norm": 0.0956963300704956, "learning_rate": 2.7401687151117678e-05, "loss": 10.2801, "step": 17800 }, { "epoch": 0.08894104721715898, "grad_norm": 0.09462413936853409, "learning_rate": 2.7400185236176124e-05, "loss": 10.2806, "step": 17810 }, { "epoch": 0.08899098604209843, "grad_norm": 0.09391899406909943, "learning_rate": 2.7398683321234575e-05, "loss": 10.2877, "step": 17820 }, { "epoch": 0.08904092486703788, "grad_norm": 0.09864463657140732, "learning_rate": 2.7397181406293025e-05, "loss": 10.2851, "step": 17830 }, { "epoch": 0.08909086369197733, "grad_norm": 0.09195532649755478, "learning_rate": 2.739567949135147e-05, "loss": 10.2851, "step": 17840 }, { "epoch": 0.08914080251691678, "grad_norm": 0.09261298924684525, "learning_rate": 2.7394177576409925e-05, "loss": 10.2853, "step": 17850 }, { "epoch": 0.08919074134185623, "grad_norm": 0.09226420521736145, "learning_rate": 2.7392675661468372e-05, "loss": 10.2833, "step": 17860 }, { "epoch": 0.08924068016679568, "grad_norm": 0.09687785059213638, "learning_rate": 2.7391173746526822e-05, "loss": 10.2819, "step": 17870 }, { "epoch": 0.08929061899173513, "grad_norm": 0.09745992720127106, "learning_rate": 2.7389671831585272e-05, "loss": 10.285, "step": 17880 }, { "epoch": 0.08934055781667458, "grad_norm": 0.10478182882070541, "learning_rate": 2.738816991664372e-05, "loss": 10.2759, "step": 17890 }, { "epoch": 0.08939049664161403, "grad_norm": 0.0948200598359108, "learning_rate": 2.7386668001702173e-05, "loss": 10.2803, "step": 17900 }, { "epoch": 0.08944043546655348, "grad_norm": 0.09617318212985992, "learning_rate": 2.738516608676062e-05, "loss": 10.2789, "step": 17910 }, { "epoch": 0.08949037429149292, "grad_norm": 0.09210513532161713, "learning_rate": 2.7383664171819073e-05, "loss": 10.2788, "step": 17920 }, { "epoch": 0.08954031311643237, "grad_norm": 0.09981706738471985, "learning_rate": 2.738216225687752e-05, "loss": 10.2767, "step": 17930 }, { "epoch": 0.08959025194137182, "grad_norm": 0.1067042201757431, "learning_rate": 2.7380660341935967e-05, "loss": 10.2798, "step": 17940 }, { "epoch": 0.08964019076631127, "grad_norm": 0.0890965685248375, "learning_rate": 2.737915842699442e-05, "loss": 10.2807, "step": 17950 }, { "epoch": 0.08969012959125072, "grad_norm": 0.09566470980644226, "learning_rate": 2.7377656512052867e-05, "loss": 10.2792, "step": 17960 }, { "epoch": 0.08974006841619017, "grad_norm": 0.09969697147607803, "learning_rate": 2.737615459711132e-05, "loss": 10.2778, "step": 17970 }, { "epoch": 0.08979000724112962, "grad_norm": 0.09558671712875366, "learning_rate": 2.7374652682169767e-05, "loss": 10.2777, "step": 17980 }, { "epoch": 0.08983994606606907, "grad_norm": 0.09214569628238678, "learning_rate": 2.7373150767228214e-05, "loss": 10.2794, "step": 17990 }, { "epoch": 0.08988988489100852, "grad_norm": 0.09558131545782089, "learning_rate": 2.7371648852286668e-05, "loss": 10.279, "step": 18000 }, { "epoch": 0.08993982371594797, "grad_norm": 0.09858100861310959, "learning_rate": 2.7370146937345114e-05, "loss": 10.2783, "step": 18010 }, { "epoch": 0.08998976254088742, "grad_norm": 0.09311861544847488, "learning_rate": 2.7368645022403568e-05, "loss": 10.28, "step": 18020 }, { "epoch": 0.09003970136582687, "grad_norm": 0.09338545799255371, "learning_rate": 2.7367143107462015e-05, "loss": 10.2755, "step": 18030 }, { "epoch": 0.09008964019076632, "grad_norm": 0.09420605003833771, "learning_rate": 2.736564119252046e-05, "loss": 10.2778, "step": 18040 }, { "epoch": 0.09013957901570575, "grad_norm": 0.09855175018310547, "learning_rate": 2.7364139277578915e-05, "loss": 10.2763, "step": 18050 }, { "epoch": 0.0901895178406452, "grad_norm": 0.09705475717782974, "learning_rate": 2.7362637362637362e-05, "loss": 10.2781, "step": 18060 }, { "epoch": 0.09023945666558465, "grad_norm": 0.09603998064994812, "learning_rate": 2.7361135447695816e-05, "loss": 10.277, "step": 18070 }, { "epoch": 0.0902893954905241, "grad_norm": 0.09418918937444687, "learning_rate": 2.7359633532754262e-05, "loss": 10.274, "step": 18080 }, { "epoch": 0.09033933431546355, "grad_norm": 0.09442311525344849, "learning_rate": 2.735813161781271e-05, "loss": 10.2778, "step": 18090 }, { "epoch": 0.090389273140403, "grad_norm": 0.0943770557641983, "learning_rate": 2.7356629702871163e-05, "loss": 10.273, "step": 18100 }, { "epoch": 0.09043921196534245, "grad_norm": 0.09986016154289246, "learning_rate": 2.735512778792961e-05, "loss": 10.2782, "step": 18110 }, { "epoch": 0.0904891507902819, "grad_norm": 0.09035969525575638, "learning_rate": 2.7353625872988063e-05, "loss": 10.2772, "step": 18120 }, { "epoch": 0.09053908961522135, "grad_norm": 0.09936581552028656, "learning_rate": 2.735212395804651e-05, "loss": 10.2784, "step": 18130 }, { "epoch": 0.0905890284401608, "grad_norm": 0.09473800659179688, "learning_rate": 2.7350622043104957e-05, "loss": 10.2698, "step": 18140 }, { "epoch": 0.09063896726510025, "grad_norm": 0.09298662096261978, "learning_rate": 2.734912012816341e-05, "loss": 10.2763, "step": 18150 }, { "epoch": 0.0906889060900397, "grad_norm": 0.09476146847009659, "learning_rate": 2.7347618213221857e-05, "loss": 10.2771, "step": 18160 }, { "epoch": 0.09073884491497915, "grad_norm": 0.09148777276277542, "learning_rate": 2.734611629828031e-05, "loss": 10.2752, "step": 18170 }, { "epoch": 0.0907887837399186, "grad_norm": 0.09469624608755112, "learning_rate": 2.7344614383338757e-05, "loss": 10.27, "step": 18180 }, { "epoch": 0.09083872256485805, "grad_norm": 0.09769047796726227, "learning_rate": 2.7343112468397204e-05, "loss": 10.2733, "step": 18190 }, { "epoch": 0.0908886613897975, "grad_norm": 0.09332720935344696, "learning_rate": 2.7341610553455658e-05, "loss": 10.2736, "step": 18200 }, { "epoch": 0.09093860021473694, "grad_norm": 0.09613288938999176, "learning_rate": 2.7340108638514105e-05, "loss": 10.2693, "step": 18210 }, { "epoch": 0.0909885390396764, "grad_norm": 0.09733350574970245, "learning_rate": 2.7338606723572558e-05, "loss": 10.2727, "step": 18220 }, { "epoch": 0.09103847786461584, "grad_norm": 0.09478358179330826, "learning_rate": 2.7337104808631005e-05, "loss": 10.2741, "step": 18230 }, { "epoch": 0.0910884166895553, "grad_norm": 0.0969361811876297, "learning_rate": 2.733560289368945e-05, "loss": 10.2716, "step": 18240 }, { "epoch": 0.09113835551449474, "grad_norm": 0.09295036643743515, "learning_rate": 2.7334100978747905e-05, "loss": 10.2748, "step": 18250 }, { "epoch": 0.09118829433943419, "grad_norm": 0.09057419002056122, "learning_rate": 2.7332599063806352e-05, "loss": 10.2733, "step": 18260 }, { "epoch": 0.09123823316437364, "grad_norm": 0.09219224005937576, "learning_rate": 2.7331097148864806e-05, "loss": 10.2736, "step": 18270 }, { "epoch": 0.09128817198931309, "grad_norm": 0.09382915496826172, "learning_rate": 2.7329595233923252e-05, "loss": 10.2732, "step": 18280 }, { "epoch": 0.09133811081425254, "grad_norm": 0.09011480212211609, "learning_rate": 2.7328093318981703e-05, "loss": 10.2728, "step": 18290 }, { "epoch": 0.09138804963919199, "grad_norm": 0.09689683467149734, "learning_rate": 2.7326591404040153e-05, "loss": 10.2674, "step": 18300 }, { "epoch": 0.09143798846413144, "grad_norm": 0.09588810056447983, "learning_rate": 2.73250894890986e-05, "loss": 10.2684, "step": 18310 }, { "epoch": 0.09148792728907089, "grad_norm": 0.09385774284601212, "learning_rate": 2.7323587574157053e-05, "loss": 10.2706, "step": 18320 }, { "epoch": 0.09153786611401034, "grad_norm": 0.09056130051612854, "learning_rate": 2.73220856592155e-05, "loss": 10.27, "step": 18330 }, { "epoch": 0.09158780493894979, "grad_norm": 0.09509393572807312, "learning_rate": 2.732058374427395e-05, "loss": 10.2709, "step": 18340 }, { "epoch": 0.09163774376388924, "grad_norm": 0.09376955777406693, "learning_rate": 2.73190818293324e-05, "loss": 10.2682, "step": 18350 }, { "epoch": 0.09168768258882869, "grad_norm": 0.10004284977912903, "learning_rate": 2.7317579914390847e-05, "loss": 10.2671, "step": 18360 }, { "epoch": 0.09173762141376814, "grad_norm": 0.09222781658172607, "learning_rate": 2.73160779994493e-05, "loss": 10.2699, "step": 18370 }, { "epoch": 0.09178756023870759, "grad_norm": 0.09528329968452454, "learning_rate": 2.7314576084507747e-05, "loss": 10.2711, "step": 18380 }, { "epoch": 0.09183749906364704, "grad_norm": 0.09617341309785843, "learning_rate": 2.7313074169566198e-05, "loss": 10.2668, "step": 18390 }, { "epoch": 0.09188743788858648, "grad_norm": 0.09537350386381149, "learning_rate": 2.7311572254624648e-05, "loss": 10.2692, "step": 18400 }, { "epoch": 0.09193737671352593, "grad_norm": 0.09477705508470535, "learning_rate": 2.7310070339683095e-05, "loss": 10.2695, "step": 18410 }, { "epoch": 0.09198731553846538, "grad_norm": 0.0950855240225792, "learning_rate": 2.7308568424741548e-05, "loss": 10.2652, "step": 18420 }, { "epoch": 0.09203725436340483, "grad_norm": 0.09190298616886139, "learning_rate": 2.7307066509799995e-05, "loss": 10.2666, "step": 18430 }, { "epoch": 0.09208719318834428, "grad_norm": 0.09800129383802414, "learning_rate": 2.7305564594858445e-05, "loss": 10.2699, "step": 18440 }, { "epoch": 0.09213713201328373, "grad_norm": 0.09911572933197021, "learning_rate": 2.7304062679916895e-05, "loss": 10.2665, "step": 18450 }, { "epoch": 0.09218707083822318, "grad_norm": 0.09081263840198517, "learning_rate": 2.7302560764975342e-05, "loss": 10.2648, "step": 18460 }, { "epoch": 0.09223700966316263, "grad_norm": 0.09170430898666382, "learning_rate": 2.7301058850033796e-05, "loss": 10.2636, "step": 18470 }, { "epoch": 0.09228694848810208, "grad_norm": 0.09938862174749374, "learning_rate": 2.7299556935092242e-05, "loss": 10.2678, "step": 18480 }, { "epoch": 0.09233688731304153, "grad_norm": 0.09364289045333862, "learning_rate": 2.7298055020150693e-05, "loss": 10.2656, "step": 18490 }, { "epoch": 0.09238682613798098, "grad_norm": 0.0970052108168602, "learning_rate": 2.7296553105209143e-05, "loss": 10.2641, "step": 18500 }, { "epoch": 0.09243676496292043, "grad_norm": 0.09379066526889801, "learning_rate": 2.729505119026759e-05, "loss": 10.2681, "step": 18510 }, { "epoch": 0.09248670378785988, "grad_norm": 0.0943591296672821, "learning_rate": 2.7293549275326043e-05, "loss": 10.2699, "step": 18520 }, { "epoch": 0.09253664261279933, "grad_norm": 0.09379053860902786, "learning_rate": 2.729204736038449e-05, "loss": 10.2647, "step": 18530 }, { "epoch": 0.09258658143773878, "grad_norm": 0.10091900825500488, "learning_rate": 2.729054544544294e-05, "loss": 10.2604, "step": 18540 }, { "epoch": 0.09263652026267821, "grad_norm": 0.09774335473775864, "learning_rate": 2.728904353050139e-05, "loss": 10.263, "step": 18550 }, { "epoch": 0.09268645908761766, "grad_norm": 0.0980936661362648, "learning_rate": 2.7287541615559837e-05, "loss": 10.2615, "step": 18560 }, { "epoch": 0.09273639791255711, "grad_norm": 0.09376488626003265, "learning_rate": 2.728603970061829e-05, "loss": 10.2634, "step": 18570 }, { "epoch": 0.09278633673749656, "grad_norm": 0.10236544907093048, "learning_rate": 2.7284537785676737e-05, "loss": 10.2677, "step": 18580 }, { "epoch": 0.09283627556243601, "grad_norm": 0.09201527386903763, "learning_rate": 2.7283035870735188e-05, "loss": 10.2606, "step": 18590 }, { "epoch": 0.09288621438737546, "grad_norm": 0.09707942605018616, "learning_rate": 2.7281533955793638e-05, "loss": 10.2669, "step": 18600 }, { "epoch": 0.09293615321231491, "grad_norm": 0.09611304849386215, "learning_rate": 2.7280032040852088e-05, "loss": 10.2644, "step": 18610 }, { "epoch": 0.09298609203725436, "grad_norm": 0.09938688576221466, "learning_rate": 2.7278530125910538e-05, "loss": 10.262, "step": 18620 }, { "epoch": 0.09303603086219381, "grad_norm": 0.10115701705217361, "learning_rate": 2.7277028210968985e-05, "loss": 10.26, "step": 18630 }, { "epoch": 0.09308596968713326, "grad_norm": 0.0930614024400711, "learning_rate": 2.7275526296027435e-05, "loss": 10.2623, "step": 18640 }, { "epoch": 0.09313590851207271, "grad_norm": 0.09945568442344666, "learning_rate": 2.7274024381085885e-05, "loss": 10.2623, "step": 18650 }, { "epoch": 0.09318584733701216, "grad_norm": 0.09213350713253021, "learning_rate": 2.7272522466144335e-05, "loss": 10.2614, "step": 18660 }, { "epoch": 0.0932357861619516, "grad_norm": 0.09411916881799698, "learning_rate": 2.7271020551202786e-05, "loss": 10.2594, "step": 18670 }, { "epoch": 0.09328572498689106, "grad_norm": 0.09325142949819565, "learning_rate": 2.7269518636261232e-05, "loss": 10.2569, "step": 18680 }, { "epoch": 0.0933356638118305, "grad_norm": 0.09338999539613724, "learning_rate": 2.7268016721319683e-05, "loss": 10.2605, "step": 18690 }, { "epoch": 0.09338560263676995, "grad_norm": 0.09095557034015656, "learning_rate": 2.7266514806378133e-05, "loss": 10.2629, "step": 18700 }, { "epoch": 0.0934355414617094, "grad_norm": 0.09274104982614517, "learning_rate": 2.7265012891436583e-05, "loss": 10.2582, "step": 18710 }, { "epoch": 0.09348548028664885, "grad_norm": 0.09546738117933273, "learning_rate": 2.7263510976495033e-05, "loss": 10.2595, "step": 18720 }, { "epoch": 0.0935354191115883, "grad_norm": 0.0984828770160675, "learning_rate": 2.726200906155348e-05, "loss": 10.2569, "step": 18730 }, { "epoch": 0.09358535793652775, "grad_norm": 0.09252218902111053, "learning_rate": 2.726050714661193e-05, "loss": 10.2585, "step": 18740 }, { "epoch": 0.0936352967614672, "grad_norm": 0.09975884854793549, "learning_rate": 2.725900523167038e-05, "loss": 10.2581, "step": 18750 }, { "epoch": 0.09368523558640665, "grad_norm": 0.08888278901576996, "learning_rate": 2.725750331672883e-05, "loss": 10.257, "step": 18760 }, { "epoch": 0.0937351744113461, "grad_norm": 0.09609197080135345, "learning_rate": 2.725600140178728e-05, "loss": 10.2589, "step": 18770 }, { "epoch": 0.09378511323628555, "grad_norm": 0.09457485377788544, "learning_rate": 2.7254499486845727e-05, "loss": 10.2558, "step": 18780 }, { "epoch": 0.093835052061225, "grad_norm": 0.09543312340974808, "learning_rate": 2.7252997571904178e-05, "loss": 10.2583, "step": 18790 }, { "epoch": 0.09388499088616445, "grad_norm": 0.08900411427021027, "learning_rate": 2.7251495656962628e-05, "loss": 10.2572, "step": 18800 }, { "epoch": 0.0939349297111039, "grad_norm": 0.09842579811811447, "learning_rate": 2.7249993742021078e-05, "loss": 10.2532, "step": 18810 }, { "epoch": 0.09398486853604335, "grad_norm": 0.0965215340256691, "learning_rate": 2.7248491827079528e-05, "loss": 10.2548, "step": 18820 }, { "epoch": 0.0940348073609828, "grad_norm": 0.09778942912817001, "learning_rate": 2.7246989912137975e-05, "loss": 10.2543, "step": 18830 }, { "epoch": 0.09408474618592225, "grad_norm": 0.09559378027915955, "learning_rate": 2.7245487997196425e-05, "loss": 10.2552, "step": 18840 }, { "epoch": 0.0941346850108617, "grad_norm": 0.09030424058437347, "learning_rate": 2.7243986082254875e-05, "loss": 10.2562, "step": 18850 }, { "epoch": 0.09418462383580115, "grad_norm": 0.09515605866909027, "learning_rate": 2.7242484167313325e-05, "loss": 10.2521, "step": 18860 }, { "epoch": 0.0942345626607406, "grad_norm": 0.09284040331840515, "learning_rate": 2.7240982252371776e-05, "loss": 10.2552, "step": 18870 }, { "epoch": 0.09428450148568004, "grad_norm": 0.09655296802520752, "learning_rate": 2.7239480337430222e-05, "loss": 10.2526, "step": 18880 }, { "epoch": 0.0943344403106195, "grad_norm": 0.09709782898426056, "learning_rate": 2.7237978422488673e-05, "loss": 10.2554, "step": 18890 }, { "epoch": 0.09438437913555894, "grad_norm": 0.09283418953418732, "learning_rate": 2.7236476507547123e-05, "loss": 10.2538, "step": 18900 }, { "epoch": 0.09443431796049839, "grad_norm": 0.09396416693925858, "learning_rate": 2.7234974592605573e-05, "loss": 10.2539, "step": 18910 }, { "epoch": 0.09448425678543784, "grad_norm": 0.1044858917593956, "learning_rate": 2.7233472677664023e-05, "loss": 10.2573, "step": 18920 }, { "epoch": 0.09453419561037729, "grad_norm": 0.09172073006629944, "learning_rate": 2.7231970762722473e-05, "loss": 10.2548, "step": 18930 }, { "epoch": 0.09458413443531674, "grad_norm": 0.09052781015634537, "learning_rate": 2.723046884778092e-05, "loss": 10.2535, "step": 18940 }, { "epoch": 0.09463407326025619, "grad_norm": 0.09258178621530533, "learning_rate": 2.722896693283937e-05, "loss": 10.2505, "step": 18950 }, { "epoch": 0.09468401208519564, "grad_norm": 0.09820058196783066, "learning_rate": 2.722746501789782e-05, "loss": 10.2543, "step": 18960 }, { "epoch": 0.09473395091013509, "grad_norm": 0.09973189234733582, "learning_rate": 2.722596310295627e-05, "loss": 10.2533, "step": 18970 }, { "epoch": 0.09478388973507454, "grad_norm": 0.09172434359788895, "learning_rate": 2.722446118801472e-05, "loss": 10.2502, "step": 18980 }, { "epoch": 0.09483382856001399, "grad_norm": 0.0971081554889679, "learning_rate": 2.7222959273073168e-05, "loss": 10.2485, "step": 18990 }, { "epoch": 0.09488376738495344, "grad_norm": 0.09259314090013504, "learning_rate": 2.7221457358131618e-05, "loss": 10.2516, "step": 19000 }, { "epoch": 0.09493370620989289, "grad_norm": 0.10186239331960678, "learning_rate": 2.7219955443190068e-05, "loss": 10.2502, "step": 19010 }, { "epoch": 0.09498364503483234, "grad_norm": 0.09782648831605911, "learning_rate": 2.7218453528248518e-05, "loss": 10.254, "step": 19020 }, { "epoch": 0.09503358385977179, "grad_norm": 0.09522926807403564, "learning_rate": 2.7216951613306968e-05, "loss": 10.2542, "step": 19030 }, { "epoch": 0.09508352268471122, "grad_norm": 0.09580672532320023, "learning_rate": 2.7215449698365415e-05, "loss": 10.2509, "step": 19040 }, { "epoch": 0.09513346150965067, "grad_norm": 0.10172213613986969, "learning_rate": 2.7213947783423865e-05, "loss": 10.2519, "step": 19050 }, { "epoch": 0.09518340033459012, "grad_norm": 0.09953866899013519, "learning_rate": 2.7212445868482315e-05, "loss": 10.2514, "step": 19060 }, { "epoch": 0.09523333915952957, "grad_norm": 0.09473235160112381, "learning_rate": 2.7210943953540766e-05, "loss": 10.2493, "step": 19070 }, { "epoch": 0.09528327798446902, "grad_norm": 0.09612436592578888, "learning_rate": 2.7209442038599216e-05, "loss": 10.2495, "step": 19080 }, { "epoch": 0.09533321680940847, "grad_norm": 0.09395024180412292, "learning_rate": 2.7207940123657663e-05, "loss": 10.2494, "step": 19090 }, { "epoch": 0.09538315563434792, "grad_norm": 0.0924721285700798, "learning_rate": 2.7206438208716113e-05, "loss": 10.2493, "step": 19100 }, { "epoch": 0.09543309445928737, "grad_norm": 0.09010235965251923, "learning_rate": 2.7204936293774563e-05, "loss": 10.2508, "step": 19110 }, { "epoch": 0.09548303328422682, "grad_norm": 0.09913980215787888, "learning_rate": 2.7203434378833013e-05, "loss": 10.2485, "step": 19120 }, { "epoch": 0.09553297210916627, "grad_norm": 0.09284950792789459, "learning_rate": 2.7201932463891463e-05, "loss": 10.2497, "step": 19130 }, { "epoch": 0.09558291093410572, "grad_norm": 0.09394965320825577, "learning_rate": 2.720043054894991e-05, "loss": 10.2504, "step": 19140 }, { "epoch": 0.09563284975904517, "grad_norm": 0.09422177821397781, "learning_rate": 2.719892863400836e-05, "loss": 10.2443, "step": 19150 }, { "epoch": 0.09568278858398462, "grad_norm": 0.09094090759754181, "learning_rate": 2.719742671906681e-05, "loss": 10.2472, "step": 19160 }, { "epoch": 0.09573272740892406, "grad_norm": 0.09635195881128311, "learning_rate": 2.719592480412526e-05, "loss": 10.2485, "step": 19170 }, { "epoch": 0.09578266623386351, "grad_norm": 0.09993939101696014, "learning_rate": 2.719442288918371e-05, "loss": 10.2454, "step": 19180 }, { "epoch": 0.09583260505880296, "grad_norm": 0.09371164441108704, "learning_rate": 2.7192920974242158e-05, "loss": 10.2504, "step": 19190 }, { "epoch": 0.09588254388374241, "grad_norm": 0.09679362177848816, "learning_rate": 2.7191419059300608e-05, "loss": 10.2465, "step": 19200 }, { "epoch": 0.09593248270868186, "grad_norm": 0.08799337595701218, "learning_rate": 2.7189917144359058e-05, "loss": 10.2454, "step": 19210 }, { "epoch": 0.09598242153362131, "grad_norm": 0.09332183748483658, "learning_rate": 2.7188415229417508e-05, "loss": 10.2492, "step": 19220 }, { "epoch": 0.09603236035856076, "grad_norm": 0.09035744518041611, "learning_rate": 2.718691331447596e-05, "loss": 10.2473, "step": 19230 }, { "epoch": 0.09608229918350021, "grad_norm": 0.09255316108465195, "learning_rate": 2.7185411399534405e-05, "loss": 10.2487, "step": 19240 }, { "epoch": 0.09613223800843966, "grad_norm": 0.0899682343006134, "learning_rate": 2.718390948459286e-05, "loss": 10.2411, "step": 19250 }, { "epoch": 0.09618217683337911, "grad_norm": 0.09768253564834595, "learning_rate": 2.7182407569651305e-05, "loss": 10.2458, "step": 19260 }, { "epoch": 0.09623211565831856, "grad_norm": 0.09567224979400635, "learning_rate": 2.7180905654709756e-05, "loss": 10.2468, "step": 19270 }, { "epoch": 0.09628205448325801, "grad_norm": 0.09296666830778122, "learning_rate": 2.7179403739768206e-05, "loss": 10.2441, "step": 19280 }, { "epoch": 0.09633199330819746, "grad_norm": 0.09625297039747238, "learning_rate": 2.7177901824826653e-05, "loss": 10.2453, "step": 19290 }, { "epoch": 0.09638193213313691, "grad_norm": 0.09271930158138275, "learning_rate": 2.7176399909885106e-05, "loss": 10.2413, "step": 19300 }, { "epoch": 0.09643187095807636, "grad_norm": 0.09876267611980438, "learning_rate": 2.7174897994943553e-05, "loss": 10.2448, "step": 19310 }, { "epoch": 0.0964818097830158, "grad_norm": 0.09215805679559708, "learning_rate": 2.7173396080002003e-05, "loss": 10.2435, "step": 19320 }, { "epoch": 0.09653174860795526, "grad_norm": 0.09160521626472473, "learning_rate": 2.7171894165060453e-05, "loss": 10.2456, "step": 19330 }, { "epoch": 0.0965816874328947, "grad_norm": 0.09767121821641922, "learning_rate": 2.71703922501189e-05, "loss": 10.2404, "step": 19340 }, { "epoch": 0.09663162625783416, "grad_norm": 0.09696942567825317, "learning_rate": 2.7168890335177354e-05, "loss": 10.2442, "step": 19350 }, { "epoch": 0.0966815650827736, "grad_norm": 0.09589537233114243, "learning_rate": 2.71673884202358e-05, "loss": 10.2434, "step": 19360 }, { "epoch": 0.09673150390771305, "grad_norm": 0.09620683640241623, "learning_rate": 2.716588650529425e-05, "loss": 10.2424, "step": 19370 }, { "epoch": 0.0967814427326525, "grad_norm": 0.08876433968544006, "learning_rate": 2.71643845903527e-05, "loss": 10.2431, "step": 19380 }, { "epoch": 0.09683138155759195, "grad_norm": 0.094940684735775, "learning_rate": 2.7162882675411148e-05, "loss": 10.2415, "step": 19390 }, { "epoch": 0.0968813203825314, "grad_norm": 0.09419713914394379, "learning_rate": 2.71613807604696e-05, "loss": 10.2437, "step": 19400 }, { "epoch": 0.09693125920747085, "grad_norm": 0.09718811511993408, "learning_rate": 2.7159878845528048e-05, "loss": 10.2393, "step": 19410 }, { "epoch": 0.0969811980324103, "grad_norm": 0.09436991810798645, "learning_rate": 2.7158376930586498e-05, "loss": 10.2408, "step": 19420 }, { "epoch": 0.09703113685734975, "grad_norm": 0.09804237633943558, "learning_rate": 2.715687501564495e-05, "loss": 10.24, "step": 19430 }, { "epoch": 0.0970810756822892, "grad_norm": 0.09041360020637512, "learning_rate": 2.7155373100703395e-05, "loss": 10.2429, "step": 19440 }, { "epoch": 0.09713101450722865, "grad_norm": 0.09413240849971771, "learning_rate": 2.715387118576185e-05, "loss": 10.2405, "step": 19450 }, { "epoch": 0.0971809533321681, "grad_norm": 0.09756959229707718, "learning_rate": 2.7152369270820295e-05, "loss": 10.4918, "step": 19460 }, { "epoch": 0.09723089215710755, "grad_norm": 0.08898182958364487, "learning_rate": 2.7150867355878746e-05, "loss": 10.2411, "step": 19470 }, { "epoch": 0.097280830982047, "grad_norm": 0.09335238486528397, "learning_rate": 2.7149365440937196e-05, "loss": 10.2348, "step": 19480 }, { "epoch": 0.09733076980698645, "grad_norm": 0.09306208789348602, "learning_rate": 2.7147863525995643e-05, "loss": 10.2427, "step": 19490 }, { "epoch": 0.0973807086319259, "grad_norm": 0.09693284332752228, "learning_rate": 2.7146361611054096e-05, "loss": 10.2377, "step": 19500 }, { "epoch": 0.09743064745686535, "grad_norm": 0.10119107365608215, "learning_rate": 2.7144859696112543e-05, "loss": 10.2366, "step": 19510 }, { "epoch": 0.0974805862818048, "grad_norm": 0.10089292377233505, "learning_rate": 2.7143357781170993e-05, "loss": 10.2385, "step": 19520 }, { "epoch": 0.09753052510674425, "grad_norm": 0.09562947601079941, "learning_rate": 2.7141855866229443e-05, "loss": 10.2378, "step": 19530 }, { "epoch": 0.09758046393168368, "grad_norm": 0.09517926722764969, "learning_rate": 2.714035395128789e-05, "loss": 10.237, "step": 19540 }, { "epoch": 0.09763040275662313, "grad_norm": 0.0980682447552681, "learning_rate": 2.7138852036346344e-05, "loss": 10.2385, "step": 19550 }, { "epoch": 0.09768034158156258, "grad_norm": 0.09545796364545822, "learning_rate": 2.713735012140479e-05, "loss": 10.2375, "step": 19560 }, { "epoch": 0.09773028040650203, "grad_norm": 0.09550634771585464, "learning_rate": 2.7135848206463244e-05, "loss": 10.2384, "step": 19570 }, { "epoch": 0.09778021923144148, "grad_norm": 0.09427410364151001, "learning_rate": 2.713434629152169e-05, "loss": 10.2344, "step": 19580 }, { "epoch": 0.09783015805638093, "grad_norm": 0.09462705999612808, "learning_rate": 2.7132844376580138e-05, "loss": 10.2342, "step": 19590 }, { "epoch": 0.09788009688132038, "grad_norm": 0.1018369272351265, "learning_rate": 2.713134246163859e-05, "loss": 10.2329, "step": 19600 }, { "epoch": 0.09793003570625983, "grad_norm": 0.09698480367660522, "learning_rate": 2.7129840546697038e-05, "loss": 10.2358, "step": 19610 }, { "epoch": 0.09797997453119928, "grad_norm": 0.09769893437623978, "learning_rate": 2.712833863175549e-05, "loss": 10.2382, "step": 19620 }, { "epoch": 0.09802991335613873, "grad_norm": 0.09582178294658661, "learning_rate": 2.712683671681394e-05, "loss": 10.2342, "step": 19630 }, { "epoch": 0.09807985218107818, "grad_norm": 0.09619859606027603, "learning_rate": 2.7125334801872385e-05, "loss": 10.2356, "step": 19640 }, { "epoch": 0.09812979100601762, "grad_norm": 0.09638599306344986, "learning_rate": 2.712383288693084e-05, "loss": 10.2338, "step": 19650 }, { "epoch": 0.09817972983095707, "grad_norm": 0.10089550167322159, "learning_rate": 2.7122330971989285e-05, "loss": 10.2301, "step": 19660 }, { "epoch": 0.09822966865589652, "grad_norm": 0.09721601009368896, "learning_rate": 2.712082905704774e-05, "loss": 10.2364, "step": 19670 }, { "epoch": 0.09827960748083597, "grad_norm": 0.09357859939336777, "learning_rate": 2.7119327142106186e-05, "loss": 10.236, "step": 19680 }, { "epoch": 0.09832954630577542, "grad_norm": 0.09338711202144623, "learning_rate": 2.7117825227164633e-05, "loss": 10.2298, "step": 19690 }, { "epoch": 0.09837948513071487, "grad_norm": 0.09414970874786377, "learning_rate": 2.7116323312223086e-05, "loss": 10.2383, "step": 19700 }, { "epoch": 0.09842942395565432, "grad_norm": 0.10141325742006302, "learning_rate": 2.7114821397281533e-05, "loss": 10.2353, "step": 19710 }, { "epoch": 0.09847936278059377, "grad_norm": 0.09696213901042938, "learning_rate": 2.7113319482339987e-05, "loss": 10.2326, "step": 19720 }, { "epoch": 0.09852930160553322, "grad_norm": 0.09888730943202972, "learning_rate": 2.7111817567398433e-05, "loss": 10.2354, "step": 19730 }, { "epoch": 0.09857924043047267, "grad_norm": 0.089784175157547, "learning_rate": 2.711031565245688e-05, "loss": 10.2333, "step": 19740 }, { "epoch": 0.09862917925541212, "grad_norm": 0.09449610859155655, "learning_rate": 2.7108813737515334e-05, "loss": 10.233, "step": 19750 }, { "epoch": 0.09867911808035157, "grad_norm": 0.09268537908792496, "learning_rate": 2.710731182257378e-05, "loss": 10.2273, "step": 19760 }, { "epoch": 0.09872905690529102, "grad_norm": 0.09909249097108841, "learning_rate": 2.7105809907632234e-05, "loss": 10.2293, "step": 19770 }, { "epoch": 0.09877899573023047, "grad_norm": 0.10036230832338333, "learning_rate": 2.710430799269068e-05, "loss": 10.2342, "step": 19780 }, { "epoch": 0.09882893455516992, "grad_norm": 0.10188131779432297, "learning_rate": 2.7102806077749128e-05, "loss": 10.2328, "step": 19790 }, { "epoch": 0.09887887338010937, "grad_norm": 0.09562569111585617, "learning_rate": 2.710130416280758e-05, "loss": 10.2278, "step": 19800 }, { "epoch": 0.09892881220504882, "grad_norm": 0.09543158859014511, "learning_rate": 2.7099802247866028e-05, "loss": 10.2257, "step": 19810 }, { "epoch": 0.09897875102998827, "grad_norm": 0.09362302720546722, "learning_rate": 2.709830033292448e-05, "loss": 10.2302, "step": 19820 }, { "epoch": 0.09902868985492772, "grad_norm": 0.09583684056997299, "learning_rate": 2.709679841798293e-05, "loss": 10.2322, "step": 19830 }, { "epoch": 0.09907862867986716, "grad_norm": 0.09406079351902008, "learning_rate": 2.7095296503041375e-05, "loss": 10.2291, "step": 19840 }, { "epoch": 0.09912856750480661, "grad_norm": 0.09514211863279343, "learning_rate": 2.709379458809983e-05, "loss": 10.2303, "step": 19850 }, { "epoch": 0.09917850632974606, "grad_norm": 0.09583709388971329, "learning_rate": 2.7092292673158276e-05, "loss": 10.2299, "step": 19860 }, { "epoch": 0.09922844515468551, "grad_norm": 0.09366525709629059, "learning_rate": 2.709079075821673e-05, "loss": 10.2287, "step": 19870 }, { "epoch": 0.09927838397962496, "grad_norm": 0.09371297061443329, "learning_rate": 2.7089288843275176e-05, "loss": 10.2276, "step": 19880 }, { "epoch": 0.09932832280456441, "grad_norm": 0.09627794474363327, "learning_rate": 2.7087786928333626e-05, "loss": 10.2293, "step": 19890 }, { "epoch": 0.09937826162950386, "grad_norm": 0.09279222786426544, "learning_rate": 2.7086285013392076e-05, "loss": 10.2283, "step": 19900 }, { "epoch": 0.09942820045444331, "grad_norm": 0.0960431769490242, "learning_rate": 2.7084783098450523e-05, "loss": 10.2295, "step": 19910 }, { "epoch": 0.09947813927938276, "grad_norm": 0.0974978655576706, "learning_rate": 2.7083281183508977e-05, "loss": 10.2283, "step": 19920 }, { "epoch": 0.09952807810432221, "grad_norm": 0.09452254325151443, "learning_rate": 2.7081779268567423e-05, "loss": 10.2259, "step": 19930 }, { "epoch": 0.09957801692926166, "grad_norm": 0.09121208637952805, "learning_rate": 2.7080277353625874e-05, "loss": 10.2277, "step": 19940 }, { "epoch": 0.09962795575420111, "grad_norm": 0.09757377207279205, "learning_rate": 2.7078775438684324e-05, "loss": 10.2261, "step": 19950 }, { "epoch": 0.09967789457914056, "grad_norm": 0.09297657012939453, "learning_rate": 2.707727352374277e-05, "loss": 10.2242, "step": 19960 }, { "epoch": 0.09972783340408001, "grad_norm": 0.09815360605716705, "learning_rate": 2.7075771608801224e-05, "loss": 10.2245, "step": 19970 }, { "epoch": 0.09977777222901946, "grad_norm": 0.09438446164131165, "learning_rate": 2.707426969385967e-05, "loss": 10.229, "step": 19980 }, { "epoch": 0.0998277110539589, "grad_norm": 0.09827311336994171, "learning_rate": 2.707276777891812e-05, "loss": 10.2242, "step": 19990 }, { "epoch": 0.09987764987889836, "grad_norm": 0.09929145127534866, "learning_rate": 2.707126586397657e-05, "loss": 10.2251, "step": 20000 }, { "epoch": 0.0999275887038378, "grad_norm": 0.09331025183200836, "learning_rate": 2.7069763949035018e-05, "loss": 10.2282, "step": 20010 }, { "epoch": 0.09997752752877725, "grad_norm": 0.09668101370334625, "learning_rate": 2.706826203409347e-05, "loss": 10.2285, "step": 20020 }, { "epoch": 0.10002746635371669, "grad_norm": 0.09241931140422821, "learning_rate": 2.706676011915192e-05, "loss": 10.2261, "step": 20030 }, { "epoch": 0.10007740517865614, "grad_norm": 0.09374601393938065, "learning_rate": 2.706525820421037e-05, "loss": 10.2251, "step": 20040 }, { "epoch": 0.10012734400359559, "grad_norm": 0.09970963001251221, "learning_rate": 2.706375628926882e-05, "loss": 10.2242, "step": 20050 }, { "epoch": 0.10017728282853504, "grad_norm": 0.10114763677120209, "learning_rate": 2.7062254374327266e-05, "loss": 10.2234, "step": 20060 }, { "epoch": 0.10022722165347449, "grad_norm": 0.09584357589483261, "learning_rate": 2.706075245938572e-05, "loss": 10.223, "step": 20070 }, { "epoch": 0.10027716047841394, "grad_norm": 0.09605442732572556, "learning_rate": 2.7059250544444166e-05, "loss": 10.2236, "step": 20080 }, { "epoch": 0.10032709930335339, "grad_norm": 0.09641440957784653, "learning_rate": 2.7057748629502616e-05, "loss": 10.2236, "step": 20090 }, { "epoch": 0.10037703812829284, "grad_norm": 0.09594506770372391, "learning_rate": 2.7056246714561066e-05, "loss": 10.2208, "step": 20100 }, { "epoch": 0.10042697695323229, "grad_norm": 0.09294579178094864, "learning_rate": 2.7054744799619513e-05, "loss": 10.2232, "step": 20110 }, { "epoch": 0.10047691577817174, "grad_norm": 0.09371384978294373, "learning_rate": 2.7053242884677967e-05, "loss": 10.2246, "step": 20120 }, { "epoch": 0.10052685460311118, "grad_norm": 0.0971645787358284, "learning_rate": 2.7051740969736413e-05, "loss": 10.2199, "step": 20130 }, { "epoch": 0.10057679342805063, "grad_norm": 0.09300613403320312, "learning_rate": 2.7050239054794864e-05, "loss": 10.2229, "step": 20140 }, { "epoch": 0.10062673225299008, "grad_norm": 0.09003175050020218, "learning_rate": 2.7048737139853314e-05, "loss": 10.2235, "step": 20150 }, { "epoch": 0.10067667107792953, "grad_norm": 0.09512855857610703, "learning_rate": 2.704723522491176e-05, "loss": 10.2204, "step": 20160 }, { "epoch": 0.10072660990286898, "grad_norm": 0.10006555169820786, "learning_rate": 2.7045733309970214e-05, "loss": 10.2225, "step": 20170 }, { "epoch": 0.10077654872780843, "grad_norm": 0.09965218603610992, "learning_rate": 2.704423139502866e-05, "loss": 10.2212, "step": 20180 }, { "epoch": 0.10082648755274788, "grad_norm": 0.0935189425945282, "learning_rate": 2.7042729480087114e-05, "loss": 10.2217, "step": 20190 }, { "epoch": 0.10087642637768733, "grad_norm": 0.09258928894996643, "learning_rate": 2.704122756514556e-05, "loss": 10.2222, "step": 20200 }, { "epoch": 0.10092636520262678, "grad_norm": 0.0927526131272316, "learning_rate": 2.703972565020401e-05, "loss": 10.2206, "step": 20210 }, { "epoch": 0.10097630402756623, "grad_norm": 0.09727808088064194, "learning_rate": 2.703822373526246e-05, "loss": 10.2186, "step": 20220 }, { "epoch": 0.10102624285250568, "grad_norm": 0.09847782552242279, "learning_rate": 2.703672182032091e-05, "loss": 10.2159, "step": 20230 }, { "epoch": 0.10107618167744513, "grad_norm": 0.10164419561624527, "learning_rate": 2.7035219905379362e-05, "loss": 10.2183, "step": 20240 }, { "epoch": 0.10112612050238458, "grad_norm": 0.0917501449584961, "learning_rate": 2.703371799043781e-05, "loss": 10.2188, "step": 20250 }, { "epoch": 0.10117605932732403, "grad_norm": 0.09016897529363632, "learning_rate": 2.703221607549626e-05, "loss": 10.2207, "step": 20260 }, { "epoch": 0.10122599815226348, "grad_norm": 0.09659720957279205, "learning_rate": 2.703071416055471e-05, "loss": 10.2198, "step": 20270 }, { "epoch": 0.10127593697720293, "grad_norm": 0.09625518321990967, "learning_rate": 2.7029212245613156e-05, "loss": 10.2209, "step": 20280 }, { "epoch": 0.10132587580214238, "grad_norm": 0.09196675568819046, "learning_rate": 2.702771033067161e-05, "loss": 10.2194, "step": 20290 }, { "epoch": 0.10137581462708183, "grad_norm": 0.09483517706394196, "learning_rate": 2.7026208415730056e-05, "loss": 10.217, "step": 20300 }, { "epoch": 0.10142575345202128, "grad_norm": 0.09146818518638611, "learning_rate": 2.7024706500788506e-05, "loss": 10.2151, "step": 20310 }, { "epoch": 0.10147569227696072, "grad_norm": 0.09970217198133469, "learning_rate": 2.7023204585846957e-05, "loss": 10.2176, "step": 20320 }, { "epoch": 0.10152563110190017, "grad_norm": 0.10319959372282028, "learning_rate": 2.7021702670905403e-05, "loss": 10.2165, "step": 20330 }, { "epoch": 0.10157556992683962, "grad_norm": 0.09527314454317093, "learning_rate": 2.7020200755963857e-05, "loss": 10.2176, "step": 20340 }, { "epoch": 0.10162550875177907, "grad_norm": 0.09798305481672287, "learning_rate": 2.7018698841022304e-05, "loss": 10.2151, "step": 20350 }, { "epoch": 0.10167544757671852, "grad_norm": 0.10353653877973557, "learning_rate": 2.7017196926080754e-05, "loss": 10.2197, "step": 20360 }, { "epoch": 0.10172538640165797, "grad_norm": 0.09512107819318771, "learning_rate": 2.7015695011139204e-05, "loss": 10.2186, "step": 20370 }, { "epoch": 0.10177532522659742, "grad_norm": 0.09513498097658157, "learning_rate": 2.701419309619765e-05, "loss": 10.2179, "step": 20380 }, { "epoch": 0.10182526405153687, "grad_norm": 0.09943837672472, "learning_rate": 2.7012691181256104e-05, "loss": 10.2123, "step": 20390 }, { "epoch": 0.10187520287647632, "grad_norm": 0.09323469549417496, "learning_rate": 2.701118926631455e-05, "loss": 10.2183, "step": 20400 }, { "epoch": 0.10192514170141577, "grad_norm": 0.09345997124910355, "learning_rate": 2.7009687351373e-05, "loss": 10.2135, "step": 20410 }, { "epoch": 0.10197508052635522, "grad_norm": 0.10201733559370041, "learning_rate": 2.700818543643145e-05, "loss": 10.2155, "step": 20420 }, { "epoch": 0.10202501935129467, "grad_norm": 0.08883191645145416, "learning_rate": 2.70066835214899e-05, "loss": 10.2125, "step": 20430 }, { "epoch": 0.10207495817623412, "grad_norm": 0.09999528527259827, "learning_rate": 2.7005181606548352e-05, "loss": 10.2138, "step": 20440 }, { "epoch": 0.10212489700117357, "grad_norm": 0.09363902360200882, "learning_rate": 2.70036796916068e-05, "loss": 10.2105, "step": 20450 }, { "epoch": 0.10217483582611302, "grad_norm": 0.1012590229511261, "learning_rate": 2.700217777666525e-05, "loss": 10.2133, "step": 20460 }, { "epoch": 0.10222477465105247, "grad_norm": 0.09568557888269424, "learning_rate": 2.70006758617237e-05, "loss": 10.2142, "step": 20470 }, { "epoch": 0.10227471347599192, "grad_norm": 0.10000815987586975, "learning_rate": 2.6999173946782146e-05, "loss": 10.2139, "step": 20480 }, { "epoch": 0.10232465230093137, "grad_norm": 0.09673640131950378, "learning_rate": 2.69976720318406e-05, "loss": 10.2101, "step": 20490 }, { "epoch": 0.10237459112587081, "grad_norm": 0.09184068441390991, "learning_rate": 2.6996170116899046e-05, "loss": 10.2125, "step": 20500 }, { "epoch": 0.10242452995081026, "grad_norm": 0.09533202648162842, "learning_rate": 2.6994668201957496e-05, "loss": 10.2116, "step": 20510 }, { "epoch": 0.1024744687757497, "grad_norm": 0.09686296433210373, "learning_rate": 2.6993166287015947e-05, "loss": 10.213, "step": 20520 }, { "epoch": 0.10252440760068915, "grad_norm": 0.09219254553318024, "learning_rate": 2.6991664372074397e-05, "loss": 10.2101, "step": 20530 }, { "epoch": 0.1025743464256286, "grad_norm": 0.0953301191329956, "learning_rate": 2.6990162457132847e-05, "loss": 10.2123, "step": 20540 }, { "epoch": 0.10262428525056805, "grad_norm": 0.09723500162363052, "learning_rate": 2.6988660542191294e-05, "loss": 10.2132, "step": 20550 }, { "epoch": 0.1026742240755075, "grad_norm": 0.09278090298175812, "learning_rate": 2.6987158627249744e-05, "loss": 10.2107, "step": 20560 }, { "epoch": 0.10272416290044695, "grad_norm": 0.09172572195529938, "learning_rate": 2.6985656712308194e-05, "loss": 10.2093, "step": 20570 }, { "epoch": 0.1027741017253864, "grad_norm": 0.09108912199735641, "learning_rate": 2.6984154797366644e-05, "loss": 10.2067, "step": 20580 }, { "epoch": 0.10282404055032585, "grad_norm": 0.09847711771726608, "learning_rate": 2.6982652882425094e-05, "loss": 10.211, "step": 20590 }, { "epoch": 0.1028739793752653, "grad_norm": 0.08993741869926453, "learning_rate": 2.698115096748354e-05, "loss": 10.2101, "step": 20600 }, { "epoch": 0.10292391820020474, "grad_norm": 0.0974576473236084, "learning_rate": 2.697964905254199e-05, "loss": 10.2092, "step": 20610 }, { "epoch": 0.1029738570251442, "grad_norm": 0.09782136976718903, "learning_rate": 2.697814713760044e-05, "loss": 10.206, "step": 20620 }, { "epoch": 0.10302379585008364, "grad_norm": 0.09769613295793533, "learning_rate": 2.6976645222658892e-05, "loss": 10.2108, "step": 20630 }, { "epoch": 0.1030737346750231, "grad_norm": 0.09461203962564468, "learning_rate": 2.6975143307717342e-05, "loss": 10.2053, "step": 20640 }, { "epoch": 0.10312367349996254, "grad_norm": 0.09983788430690765, "learning_rate": 2.697364139277579e-05, "loss": 10.2081, "step": 20650 }, { "epoch": 0.10317361232490199, "grad_norm": 0.09139873832464218, "learning_rate": 2.697213947783424e-05, "loss": 10.2104, "step": 20660 }, { "epoch": 0.10322355114984144, "grad_norm": 0.10040886700153351, "learning_rate": 2.697063756289269e-05, "loss": 10.2067, "step": 20670 }, { "epoch": 0.10327348997478089, "grad_norm": 0.0953102856874466, "learning_rate": 2.696913564795114e-05, "loss": 10.2053, "step": 20680 }, { "epoch": 0.10332342879972034, "grad_norm": 0.0996142104268074, "learning_rate": 2.696763373300959e-05, "loss": 10.2083, "step": 20690 }, { "epoch": 0.10337336762465979, "grad_norm": 0.09398601204156876, "learning_rate": 2.6966131818068036e-05, "loss": 10.2055, "step": 20700 }, { "epoch": 0.10342330644959924, "grad_norm": 0.09323547780513763, "learning_rate": 2.6964629903126486e-05, "loss": 10.2101, "step": 20710 }, { "epoch": 0.10347324527453869, "grad_norm": 0.09476854652166367, "learning_rate": 2.6963127988184937e-05, "loss": 10.2063, "step": 20720 }, { "epoch": 0.10352318409947814, "grad_norm": 0.08883500099182129, "learning_rate": 2.6961626073243387e-05, "loss": 10.2072, "step": 20730 }, { "epoch": 0.10357312292441759, "grad_norm": 0.09644431620836258, "learning_rate": 2.6960124158301837e-05, "loss": 10.2066, "step": 20740 }, { "epoch": 0.10362306174935704, "grad_norm": 0.09586118906736374, "learning_rate": 2.6958622243360284e-05, "loss": 10.2093, "step": 20750 }, { "epoch": 0.10367300057429649, "grad_norm": 0.09478766471147537, "learning_rate": 2.6957120328418734e-05, "loss": 10.2054, "step": 20760 }, { "epoch": 0.10372293939923594, "grad_norm": 0.09579043835401535, "learning_rate": 2.6955618413477184e-05, "loss": 10.207, "step": 20770 }, { "epoch": 0.10377287822417539, "grad_norm": 0.08978202193975449, "learning_rate": 2.6954116498535634e-05, "loss": 10.2019, "step": 20780 }, { "epoch": 0.10382281704911483, "grad_norm": 0.09112431108951569, "learning_rate": 2.6952614583594084e-05, "loss": 10.2072, "step": 20790 }, { "epoch": 0.10387275587405428, "grad_norm": 0.09767849743366241, "learning_rate": 2.695111266865253e-05, "loss": 10.2056, "step": 20800 }, { "epoch": 0.10392269469899373, "grad_norm": 0.09825193881988525, "learning_rate": 2.694961075371098e-05, "loss": 10.2068, "step": 20810 }, { "epoch": 0.10397263352393318, "grad_norm": 0.09760141372680664, "learning_rate": 2.694810883876943e-05, "loss": 10.2078, "step": 20820 }, { "epoch": 0.10402257234887263, "grad_norm": 0.09558884054422379, "learning_rate": 2.6946606923827882e-05, "loss": 10.2056, "step": 20830 }, { "epoch": 0.10407251117381208, "grad_norm": 0.09403236955404282, "learning_rate": 2.6945105008886332e-05, "loss": 10.2005, "step": 20840 }, { "epoch": 0.10412244999875153, "grad_norm": 0.09680522978305817, "learning_rate": 2.6943603093944782e-05, "loss": 10.2052, "step": 20850 }, { "epoch": 0.10417238882369098, "grad_norm": 0.09517577290534973, "learning_rate": 2.694210117900323e-05, "loss": 10.2047, "step": 20860 }, { "epoch": 0.10422232764863043, "grad_norm": 0.09840694069862366, "learning_rate": 2.694059926406168e-05, "loss": 10.2048, "step": 20870 }, { "epoch": 0.10427226647356988, "grad_norm": 0.09037858247756958, "learning_rate": 2.693909734912013e-05, "loss": 10.1986, "step": 20880 }, { "epoch": 0.10432220529850933, "grad_norm": 0.09096881747245789, "learning_rate": 2.693759543417858e-05, "loss": 10.204, "step": 20890 }, { "epoch": 0.10437214412344878, "grad_norm": 0.0983474999666214, "learning_rate": 2.693609351923703e-05, "loss": 10.2116, "step": 20900 }, { "epoch": 0.10442208294838823, "grad_norm": 0.08912882208824158, "learning_rate": 2.6934591604295476e-05, "loss": 10.2036, "step": 20910 }, { "epoch": 0.10447202177332768, "grad_norm": 122905.046875, "learning_rate": 2.6933089689353927e-05, "loss": 11.4131, "step": 20920 }, { "epoch": 0.10452196059826713, "grad_norm": 0.09772419929504395, "learning_rate": 2.6931587774412377e-05, "loss": 38.3405, "step": 20930 }, { "epoch": 0.10457189942320658, "grad_norm": 0.09270507097244263, "learning_rate": 2.6930085859470827e-05, "loss": 10.579, "step": 20940 }, { "epoch": 0.10462183824814603, "grad_norm": 0.10307878255844116, "learning_rate": 2.6928583944529277e-05, "loss": 10.1982, "step": 20950 }, { "epoch": 0.10467177707308548, "grad_norm": 0.10254894942045212, "learning_rate": 2.6927082029587724e-05, "loss": 10.2039, "step": 20960 }, { "epoch": 0.10472171589802493, "grad_norm": 0.0985196977853775, "learning_rate": 2.6925580114646174e-05, "loss": 10.2041, "step": 20970 }, { "epoch": 0.10477165472296437, "grad_norm": 0.0948171392083168, "learning_rate": 2.6924078199704624e-05, "loss": 10.2012, "step": 20980 }, { "epoch": 0.10482159354790382, "grad_norm": 0.09431871771812439, "learning_rate": 2.6922576284763075e-05, "loss": 10.2013, "step": 20990 }, { "epoch": 0.10487153237284327, "grad_norm": 0.09847448766231537, "learning_rate": 2.6921074369821525e-05, "loss": 10.1986, "step": 21000 }, { "epoch": 0.10492147119778272, "grad_norm": 0.09803617000579834, "learning_rate": 2.691957245487997e-05, "loss": 10.1999, "step": 21010 }, { "epoch": 0.10497141002272216, "grad_norm": 0.09164676815271378, "learning_rate": 2.691807053993842e-05, "loss": 10.1964, "step": 21020 }, { "epoch": 0.10502134884766161, "grad_norm": 0.09696748852729797, "learning_rate": 2.6916568624996872e-05, "loss": 10.1992, "step": 21030 }, { "epoch": 0.10507128767260106, "grad_norm": 0.090817391872406, "learning_rate": 2.6915066710055322e-05, "loss": 10.199, "step": 21040 }, { "epoch": 0.10512122649754051, "grad_norm": 0.09760018438100815, "learning_rate": 2.6913564795113772e-05, "loss": 10.2002, "step": 21050 }, { "epoch": 0.10517116532247996, "grad_norm": 0.09421108663082123, "learning_rate": 2.691206288017222e-05, "loss": 10.1982, "step": 21060 }, { "epoch": 0.1052211041474194, "grad_norm": 0.09282322973012924, "learning_rate": 2.691056096523067e-05, "loss": 10.1998, "step": 21070 }, { "epoch": 0.10527104297235886, "grad_norm": 0.09494070708751678, "learning_rate": 2.690905905028912e-05, "loss": 10.1958, "step": 21080 }, { "epoch": 0.1053209817972983, "grad_norm": 0.0971512645483017, "learning_rate": 2.690755713534757e-05, "loss": 10.2041, "step": 21090 }, { "epoch": 0.10537092062223775, "grad_norm": 0.09018143266439438, "learning_rate": 2.690605522040602e-05, "loss": 10.1996, "step": 21100 }, { "epoch": 0.1054208594471772, "grad_norm": 0.09023455530405045, "learning_rate": 2.6904553305464466e-05, "loss": 10.2006, "step": 21110 }, { "epoch": 0.10547079827211665, "grad_norm": 0.09394627809524536, "learning_rate": 2.6903051390522917e-05, "loss": 10.1966, "step": 21120 }, { "epoch": 0.1055207370970561, "grad_norm": 0.09698754549026489, "learning_rate": 2.6901549475581367e-05, "loss": 10.1971, "step": 21130 }, { "epoch": 0.10557067592199555, "grad_norm": 0.09606551378965378, "learning_rate": 2.6900047560639817e-05, "loss": 10.1975, "step": 21140 }, { "epoch": 0.105620614746935, "grad_norm": 0.0962950810790062, "learning_rate": 2.6898545645698267e-05, "loss": 10.194, "step": 21150 }, { "epoch": 0.10567055357187445, "grad_norm": 0.09697403013706207, "learning_rate": 2.6897043730756714e-05, "loss": 10.1934, "step": 21160 }, { "epoch": 0.1057204923968139, "grad_norm": 0.09771595895290375, "learning_rate": 2.6895541815815164e-05, "loss": 10.197, "step": 21170 }, { "epoch": 0.10577043122175335, "grad_norm": 0.09692160040140152, "learning_rate": 2.6894039900873614e-05, "loss": 10.196, "step": 21180 }, { "epoch": 0.1058203700466928, "grad_norm": 0.09422864764928818, "learning_rate": 2.6892537985932065e-05, "loss": 10.1967, "step": 21190 }, { "epoch": 0.10587030887163225, "grad_norm": 0.09830430895090103, "learning_rate": 2.6891036070990515e-05, "loss": 10.1954, "step": 21200 }, { "epoch": 0.1059202476965717, "grad_norm": 0.09837440401315689, "learning_rate": 2.688953415604896e-05, "loss": 10.1955, "step": 21210 }, { "epoch": 0.10597018652151115, "grad_norm": 0.09582128375768661, "learning_rate": 2.6888032241107415e-05, "loss": 10.1957, "step": 21220 }, { "epoch": 0.1060201253464506, "grad_norm": 0.09249364584684372, "learning_rate": 2.6886530326165862e-05, "loss": 10.1941, "step": 21230 }, { "epoch": 0.10607006417139005, "grad_norm": 0.09359084814786911, "learning_rate": 2.6885028411224312e-05, "loss": 10.1938, "step": 21240 }, { "epoch": 0.1061200029963295, "grad_norm": 0.09493474662303925, "learning_rate": 2.6883526496282762e-05, "loss": 10.1944, "step": 21250 }, { "epoch": 0.10616994182126895, "grad_norm": 0.09470515698194504, "learning_rate": 2.688202458134121e-05, "loss": 10.1947, "step": 21260 }, { "epoch": 0.1062198806462084, "grad_norm": 0.09477045387029648, "learning_rate": 2.6880522666399663e-05, "loss": 10.1968, "step": 21270 }, { "epoch": 0.10626981947114784, "grad_norm": 0.0963263064622879, "learning_rate": 2.687902075145811e-05, "loss": 10.1905, "step": 21280 }, { "epoch": 0.1063197582960873, "grad_norm": 0.08664928376674652, "learning_rate": 2.687751883651656e-05, "loss": 10.1931, "step": 21290 }, { "epoch": 0.10636969712102674, "grad_norm": 0.09358591586351395, "learning_rate": 2.687601692157501e-05, "loss": 10.1923, "step": 21300 }, { "epoch": 0.10641963594596619, "grad_norm": 0.09352956712245941, "learning_rate": 2.6874515006633457e-05, "loss": 10.1936, "step": 21310 }, { "epoch": 0.10646957477090564, "grad_norm": 0.09512390941381454, "learning_rate": 2.687301309169191e-05, "loss": 10.1936, "step": 21320 }, { "epoch": 0.10651951359584509, "grad_norm": 0.09855396300554276, "learning_rate": 2.6871511176750357e-05, "loss": 10.1925, "step": 21330 }, { "epoch": 0.10656945242078454, "grad_norm": 0.09649109095335007, "learning_rate": 2.6870009261808807e-05, "loss": 10.189, "step": 21340 }, { "epoch": 0.10661939124572399, "grad_norm": 0.09623111039400101, "learning_rate": 2.6868507346867257e-05, "loss": 10.1903, "step": 21350 }, { "epoch": 0.10666933007066344, "grad_norm": 0.1012498065829277, "learning_rate": 2.6867005431925704e-05, "loss": 10.1914, "step": 21360 }, { "epoch": 0.10671926889560289, "grad_norm": 0.10435005277395248, "learning_rate": 2.6865503516984158e-05, "loss": 10.1913, "step": 21370 }, { "epoch": 0.10676920772054234, "grad_norm": 0.09784939885139465, "learning_rate": 2.6864001602042604e-05, "loss": 10.1911, "step": 21380 }, { "epoch": 0.10681914654548179, "grad_norm": 0.09625697135925293, "learning_rate": 2.6862499687101055e-05, "loss": 10.1886, "step": 21390 }, { "epoch": 0.10686908537042124, "grad_norm": 0.09613115340471268, "learning_rate": 2.6860997772159505e-05, "loss": 10.1896, "step": 21400 }, { "epoch": 0.10691902419536069, "grad_norm": 0.0873713418841362, "learning_rate": 2.685949585721795e-05, "loss": 10.188, "step": 21410 }, { "epoch": 0.10696896302030014, "grad_norm": 0.09823717176914215, "learning_rate": 2.6857993942276405e-05, "loss": 10.1895, "step": 21420 }, { "epoch": 0.10701890184523959, "grad_norm": 0.09435474872589111, "learning_rate": 2.6856492027334852e-05, "loss": 10.1872, "step": 21430 }, { "epoch": 0.10706884067017904, "grad_norm": 0.09932689368724823, "learning_rate": 2.6854990112393302e-05, "loss": 10.189, "step": 21440 }, { "epoch": 0.10711877949511849, "grad_norm": 0.1061321571469307, "learning_rate": 2.6853488197451752e-05, "loss": 10.1871, "step": 21450 }, { "epoch": 0.10716871832005793, "grad_norm": 0.08677263557910919, "learning_rate": 2.68519862825102e-05, "loss": 10.1935, "step": 21460 }, { "epoch": 0.10721865714499738, "grad_norm": 0.09986065328121185, "learning_rate": 2.6850484367568653e-05, "loss": 10.1886, "step": 21470 }, { "epoch": 0.10726859596993683, "grad_norm": 0.09638167917728424, "learning_rate": 2.68489824526271e-05, "loss": 10.1887, "step": 21480 }, { "epoch": 0.10731853479487628, "grad_norm": 0.09618506580591202, "learning_rate": 2.684748053768555e-05, "loss": 10.1871, "step": 21490 }, { "epoch": 0.10736847361981573, "grad_norm": 0.09868462383747101, "learning_rate": 2.6845978622744e-05, "loss": 10.1872, "step": 21500 }, { "epoch": 0.10741841244475517, "grad_norm": 0.08971121162176132, "learning_rate": 2.6844476707802447e-05, "loss": 10.1863, "step": 21510 }, { "epoch": 0.10746835126969462, "grad_norm": 0.10220789909362793, "learning_rate": 2.68429747928609e-05, "loss": 10.1839, "step": 21520 }, { "epoch": 0.10751829009463407, "grad_norm": 0.09101521223783493, "learning_rate": 2.6841472877919347e-05, "loss": 10.1825, "step": 21530 }, { "epoch": 0.10756822891957352, "grad_norm": 0.09986945986747742, "learning_rate": 2.68399709629778e-05, "loss": 10.1873, "step": 21540 }, { "epoch": 0.10761816774451297, "grad_norm": 0.09732682257890701, "learning_rate": 2.6838469048036247e-05, "loss": 10.1841, "step": 21550 }, { "epoch": 0.10766810656945242, "grad_norm": 0.09686817973852158, "learning_rate": 2.6836967133094694e-05, "loss": 10.1836, "step": 21560 }, { "epoch": 0.10771804539439186, "grad_norm": 0.09880684316158295, "learning_rate": 2.6835465218153148e-05, "loss": 10.1874, "step": 21570 }, { "epoch": 0.10776798421933131, "grad_norm": 0.09467620402574539, "learning_rate": 2.6833963303211594e-05, "loss": 10.1826, "step": 21580 }, { "epoch": 0.10781792304427076, "grad_norm": 0.0975777879357338, "learning_rate": 2.6832461388270048e-05, "loss": 10.1839, "step": 21590 }, { "epoch": 0.10786786186921021, "grad_norm": 0.09700115025043488, "learning_rate": 2.6830959473328495e-05, "loss": 10.1848, "step": 21600 }, { "epoch": 0.10791780069414966, "grad_norm": 0.0938863530755043, "learning_rate": 2.682945755838694e-05, "loss": 10.1856, "step": 21610 }, { "epoch": 0.10796773951908911, "grad_norm": 0.09892918914556503, "learning_rate": 2.6827955643445395e-05, "loss": 10.1825, "step": 21620 }, { "epoch": 0.10801767834402856, "grad_norm": 0.08988924324512482, "learning_rate": 2.6826453728503842e-05, "loss": 10.1828, "step": 21630 }, { "epoch": 0.10806761716896801, "grad_norm": 0.09243562072515488, "learning_rate": 2.6824951813562295e-05, "loss": 10.1822, "step": 21640 }, { "epoch": 0.10811755599390746, "grad_norm": 0.09508119523525238, "learning_rate": 2.6823449898620742e-05, "loss": 10.1855, "step": 21650 }, { "epoch": 0.10816749481884691, "grad_norm": 0.09622988849878311, "learning_rate": 2.682194798367919e-05, "loss": 10.1833, "step": 21660 }, { "epoch": 0.10821743364378636, "grad_norm": 0.09975172579288483, "learning_rate": 2.6820446068737643e-05, "loss": 10.1856, "step": 21670 }, { "epoch": 0.10826737246872581, "grad_norm": 0.10020721703767776, "learning_rate": 2.681894415379609e-05, "loss": 10.1846, "step": 21680 }, { "epoch": 0.10831731129366526, "grad_norm": 0.091154083609581, "learning_rate": 2.6817442238854543e-05, "loss": 10.1915, "step": 21690 }, { "epoch": 0.10836725011860471, "grad_norm": 0.09663428366184235, "learning_rate": 2.681594032391299e-05, "loss": 10.1844, "step": 21700 }, { "epoch": 0.10841718894354416, "grad_norm": 0.09310304373502731, "learning_rate": 2.6814438408971437e-05, "loss": 10.1833, "step": 21710 }, { "epoch": 0.1084671277684836, "grad_norm": 0.09270327538251877, "learning_rate": 2.681293649402989e-05, "loss": 10.1821, "step": 21720 }, { "epoch": 0.10851706659342306, "grad_norm": 0.09398692846298218, "learning_rate": 2.6811434579088337e-05, "loss": 10.1829, "step": 21730 }, { "epoch": 0.1085670054183625, "grad_norm": 0.10346375405788422, "learning_rate": 2.680993266414679e-05, "loss": 10.1817, "step": 21740 }, { "epoch": 0.10861694424330195, "grad_norm": 0.09369218349456787, "learning_rate": 2.6808430749205237e-05, "loss": 10.1809, "step": 21750 }, { "epoch": 0.1086668830682414, "grad_norm": 0.09752266108989716, "learning_rate": 2.6806928834263684e-05, "loss": 10.1832, "step": 21760 }, { "epoch": 0.10871682189318085, "grad_norm": 0.09794414043426514, "learning_rate": 2.6805426919322138e-05, "loss": 10.1814, "step": 21770 }, { "epoch": 0.1087667607181203, "grad_norm": 0.09713537245988846, "learning_rate": 2.6803925004380584e-05, "loss": 10.1832, "step": 21780 }, { "epoch": 0.10881669954305975, "grad_norm": 0.09311962127685547, "learning_rate": 2.6802423089439038e-05, "loss": 10.1797, "step": 21790 }, { "epoch": 0.1088666383679992, "grad_norm": 0.095693439245224, "learning_rate": 2.6800921174497485e-05, "loss": 10.1804, "step": 21800 }, { "epoch": 0.10891657719293865, "grad_norm": 0.0987323671579361, "learning_rate": 2.679941925955593e-05, "loss": 10.1776, "step": 21810 }, { "epoch": 0.1089665160178781, "grad_norm": 0.09942522644996643, "learning_rate": 2.6797917344614385e-05, "loss": 10.1785, "step": 21820 }, { "epoch": 0.10901645484281755, "grad_norm": 0.09082325547933578, "learning_rate": 2.6796415429672832e-05, "loss": 10.1772, "step": 21830 }, { "epoch": 0.109066393667757, "grad_norm": 0.08826711773872375, "learning_rate": 2.6794913514731285e-05, "loss": 10.1822, "step": 21840 }, { "epoch": 0.10911633249269645, "grad_norm": 0.09767315536737442, "learning_rate": 2.6793411599789732e-05, "loss": 10.1764, "step": 21850 }, { "epoch": 0.1091662713176359, "grad_norm": 0.0958879366517067, "learning_rate": 2.6791909684848182e-05, "loss": 10.1805, "step": 21860 }, { "epoch": 0.10921621014257535, "grad_norm": 0.09533664584159851, "learning_rate": 2.6790407769906633e-05, "loss": 10.1824, "step": 21870 }, { "epoch": 0.1092661489675148, "grad_norm": 0.09033321589231491, "learning_rate": 2.678890585496508e-05, "loss": 10.1783, "step": 21880 }, { "epoch": 0.10931608779245425, "grad_norm": 0.09859579056501389, "learning_rate": 2.6787403940023533e-05, "loss": 10.1763, "step": 21890 }, { "epoch": 0.1093660266173937, "grad_norm": 0.09806695580482483, "learning_rate": 2.678590202508198e-05, "loss": 10.1808, "step": 21900 }, { "epoch": 0.10941596544233315, "grad_norm": 0.09141387790441513, "learning_rate": 2.678440011014043e-05, "loss": 10.1787, "step": 21910 }, { "epoch": 0.1094659042672726, "grad_norm": 0.09609120339155197, "learning_rate": 2.678289819519888e-05, "loss": 10.1731, "step": 21920 }, { "epoch": 0.10951584309221205, "grad_norm": 0.09060762822628021, "learning_rate": 2.6781396280257327e-05, "loss": 10.1776, "step": 21930 }, { "epoch": 0.1095657819171515, "grad_norm": 0.09392700344324112, "learning_rate": 2.677989436531578e-05, "loss": 10.1774, "step": 21940 }, { "epoch": 0.10961572074209094, "grad_norm": 0.09288621693849564, "learning_rate": 2.6778392450374227e-05, "loss": 10.1799, "step": 21950 }, { "epoch": 0.1096656595670304, "grad_norm": 0.09960731863975525, "learning_rate": 2.6776890535432677e-05, "loss": 10.1738, "step": 21960 }, { "epoch": 0.10971559839196984, "grad_norm": 0.09052469581365585, "learning_rate": 2.6775388620491128e-05, "loss": 10.1785, "step": 21970 }, { "epoch": 0.10976553721690929, "grad_norm": 0.09761349111795425, "learning_rate": 2.6773886705549574e-05, "loss": 10.1752, "step": 21980 }, { "epoch": 0.10981547604184874, "grad_norm": 0.09489648789167404, "learning_rate": 2.6772384790608028e-05, "loss": 10.1732, "step": 21990 }, { "epoch": 0.10986541486678819, "grad_norm": 0.0943918228149414, "learning_rate": 2.6770882875666475e-05, "loss": 10.1755, "step": 22000 }, { "epoch": 0.10991535369172763, "grad_norm": 0.09196553379297256, "learning_rate": 2.6769380960724925e-05, "loss": 10.1725, "step": 22010 }, { "epoch": 0.10996529251666708, "grad_norm": 0.09334488213062286, "learning_rate": 2.6767879045783375e-05, "loss": 10.1756, "step": 22020 }, { "epoch": 0.11001523134160653, "grad_norm": 0.10055167973041534, "learning_rate": 2.6766377130841822e-05, "loss": 10.1705, "step": 22030 }, { "epoch": 0.11006517016654598, "grad_norm": 0.09944339841604233, "learning_rate": 2.6764875215900275e-05, "loss": 10.1801, "step": 22040 }, { "epoch": 0.11011510899148542, "grad_norm": 0.10031431168317795, "learning_rate": 2.6763373300958722e-05, "loss": 10.1704, "step": 22050 }, { "epoch": 0.11016504781642487, "grad_norm": 0.09707958251237869, "learning_rate": 2.6761871386017172e-05, "loss": 10.1752, "step": 22060 }, { "epoch": 0.11021498664136432, "grad_norm": 0.09501238167285919, "learning_rate": 2.6760369471075623e-05, "loss": 10.1698, "step": 22070 }, { "epoch": 0.11026492546630377, "grad_norm": 0.09578476846218109, "learning_rate": 2.675886755613407e-05, "loss": 10.175, "step": 22080 }, { "epoch": 0.11031486429124322, "grad_norm": 0.09270567446947098, "learning_rate": 2.6757365641192523e-05, "loss": 10.1732, "step": 22090 }, { "epoch": 0.11036480311618267, "grad_norm": 0.09588302671909332, "learning_rate": 2.675586372625097e-05, "loss": 10.1726, "step": 22100 }, { "epoch": 0.11041474194112212, "grad_norm": 0.09637803584337234, "learning_rate": 2.675436181130942e-05, "loss": 10.1763, "step": 22110 }, { "epoch": 0.11046468076606157, "grad_norm": 0.09648361802101135, "learning_rate": 2.675285989636787e-05, "loss": 10.1731, "step": 22120 }, { "epoch": 0.11051461959100102, "grad_norm": 0.0957055613398552, "learning_rate": 2.6751357981426317e-05, "loss": 10.1696, "step": 22130 }, { "epoch": 0.11056455841594047, "grad_norm": 0.09542543441057205, "learning_rate": 2.674985606648477e-05, "loss": 10.1725, "step": 22140 }, { "epoch": 0.11061449724087992, "grad_norm": 0.09192683547735214, "learning_rate": 2.6748354151543217e-05, "loss": 10.1725, "step": 22150 }, { "epoch": 0.11066443606581937, "grad_norm": 0.09353506565093994, "learning_rate": 2.6746852236601667e-05, "loss": 10.1663, "step": 22160 }, { "epoch": 0.11071437489075882, "grad_norm": 0.09622788429260254, "learning_rate": 2.6745350321660118e-05, "loss": 10.1658, "step": 22170 }, { "epoch": 0.11076431371569827, "grad_norm": 0.0987735167145729, "learning_rate": 2.6743848406718568e-05, "loss": 10.1678, "step": 22180 }, { "epoch": 0.11081425254063772, "grad_norm": 0.09520336985588074, "learning_rate": 2.6742346491777018e-05, "loss": 10.1679, "step": 22190 }, { "epoch": 0.11086419136557717, "grad_norm": 0.0966307744383812, "learning_rate": 2.6740844576835465e-05, "loss": 10.1683, "step": 22200 }, { "epoch": 0.11091413019051662, "grad_norm": 0.10376681387424469, "learning_rate": 2.6739342661893915e-05, "loss": 10.1714, "step": 22210 }, { "epoch": 0.11096406901545607, "grad_norm": 0.09316543489694595, "learning_rate": 2.6737840746952365e-05, "loss": 10.169, "step": 22220 }, { "epoch": 0.11101400784039551, "grad_norm": 0.09309463202953339, "learning_rate": 2.6736338832010815e-05, "loss": 10.1702, "step": 22230 }, { "epoch": 0.11106394666533496, "grad_norm": 0.09572753310203552, "learning_rate": 2.6734836917069265e-05, "loss": 10.1718, "step": 22240 }, { "epoch": 0.11111388549027441, "grad_norm": 0.0948120579123497, "learning_rate": 2.6733335002127712e-05, "loss": 10.1707, "step": 22250 }, { "epoch": 0.11116382431521386, "grad_norm": 0.09530732035636902, "learning_rate": 2.6731833087186162e-05, "loss": 10.1678, "step": 22260 }, { "epoch": 0.11121376314015331, "grad_norm": 0.10655371099710464, "learning_rate": 2.6730331172244613e-05, "loss": 10.1669, "step": 22270 }, { "epoch": 0.11126370196509276, "grad_norm": 0.09528721123933792, "learning_rate": 2.6728829257303063e-05, "loss": 10.1656, "step": 22280 }, { "epoch": 0.11131364079003221, "grad_norm": 0.09272367507219315, "learning_rate": 2.6727327342361513e-05, "loss": 10.1669, "step": 22290 }, { "epoch": 0.11136357961497166, "grad_norm": 0.0940340906381607, "learning_rate": 2.672582542741996e-05, "loss": 10.1666, "step": 22300 }, { "epoch": 0.11141351843991111, "grad_norm": 0.09455035626888275, "learning_rate": 2.672432351247841e-05, "loss": 10.1686, "step": 22310 }, { "epoch": 0.11146345726485056, "grad_norm": 0.09895509481430054, "learning_rate": 2.672282159753686e-05, "loss": 10.1666, "step": 22320 }, { "epoch": 0.11151339608979001, "grad_norm": 0.09388074278831482, "learning_rate": 2.672131968259531e-05, "loss": 10.1655, "step": 22330 }, { "epoch": 0.11156333491472946, "grad_norm": 0.09425579011440277, "learning_rate": 2.671981776765376e-05, "loss": 10.1626, "step": 22340 }, { "epoch": 0.11161327373966891, "grad_norm": 0.09784049540758133, "learning_rate": 2.6718315852712207e-05, "loss": 10.1689, "step": 22350 }, { "epoch": 0.11166321256460836, "grad_norm": 0.09686186909675598, "learning_rate": 2.6716813937770657e-05, "loss": 10.1677, "step": 22360 }, { "epoch": 0.11171315138954781, "grad_norm": 0.09419489651918411, "learning_rate": 2.6715312022829108e-05, "loss": 10.1638, "step": 22370 }, { "epoch": 0.11176309021448726, "grad_norm": 0.09300322085618973, "learning_rate": 2.6713810107887558e-05, "loss": 10.1697, "step": 22380 }, { "epoch": 0.1118130290394267, "grad_norm": 0.09139790385961533, "learning_rate": 2.6712308192946008e-05, "loss": 10.165, "step": 22390 }, { "epoch": 0.11186296786436616, "grad_norm": 0.09501411765813828, "learning_rate": 2.6710806278004455e-05, "loss": 10.1646, "step": 22400 }, { "epoch": 0.1119129066893056, "grad_norm": 0.09894577413797379, "learning_rate": 2.6709304363062905e-05, "loss": 10.1673, "step": 22410 }, { "epoch": 0.11196284551424505, "grad_norm": 0.09686759859323502, "learning_rate": 2.6707802448121355e-05, "loss": 10.1675, "step": 22420 }, { "epoch": 0.1120127843391845, "grad_norm": 0.09430637210607529, "learning_rate": 2.6706300533179805e-05, "loss": 10.165, "step": 22430 }, { "epoch": 0.11206272316412395, "grad_norm": 0.09339676797389984, "learning_rate": 2.6704798618238256e-05, "loss": 10.1635, "step": 22440 }, { "epoch": 0.1121126619890634, "grad_norm": 0.09374126046895981, "learning_rate": 2.6703296703296702e-05, "loss": 10.1578, "step": 22450 }, { "epoch": 0.11216260081400285, "grad_norm": 0.09082947671413422, "learning_rate": 2.6701794788355152e-05, "loss": 10.1673, "step": 22460 }, { "epoch": 0.1122125396389423, "grad_norm": 0.0888797864317894, "learning_rate": 2.6700292873413603e-05, "loss": 10.1637, "step": 22470 }, { "epoch": 0.11226247846388175, "grad_norm": 0.09704219549894333, "learning_rate": 2.6698790958472053e-05, "loss": 10.1671, "step": 22480 }, { "epoch": 0.1123124172888212, "grad_norm": 0.09523064643144608, "learning_rate": 2.6697289043530503e-05, "loss": 10.1643, "step": 22490 }, { "epoch": 0.11236235611376064, "grad_norm": 0.09382279962301254, "learning_rate": 2.6695787128588953e-05, "loss": 10.1594, "step": 22500 }, { "epoch": 0.11241229493870009, "grad_norm": 0.09759511053562164, "learning_rate": 2.66942852136474e-05, "loss": 10.1612, "step": 22510 }, { "epoch": 0.11246223376363954, "grad_norm": 0.09149253368377686, "learning_rate": 2.669278329870585e-05, "loss": 10.1641, "step": 22520 }, { "epoch": 0.11251217258857898, "grad_norm": 0.09213311225175858, "learning_rate": 2.66912813837643e-05, "loss": 10.1583, "step": 22530 }, { "epoch": 0.11256211141351843, "grad_norm": 0.09670817106962204, "learning_rate": 2.668977946882275e-05, "loss": 10.1616, "step": 22540 }, { "epoch": 0.11261205023845788, "grad_norm": 0.09670945256948471, "learning_rate": 2.66882775538812e-05, "loss": 10.1624, "step": 22550 }, { "epoch": 0.11266198906339733, "grad_norm": 0.09598565846681595, "learning_rate": 2.6686775638939647e-05, "loss": 10.1624, "step": 22560 }, { "epoch": 0.11271192788833678, "grad_norm": 0.0956323966383934, "learning_rate": 2.6685273723998098e-05, "loss": 10.1591, "step": 22570 }, { "epoch": 0.11276186671327623, "grad_norm": 0.09476076811552048, "learning_rate": 2.6683771809056548e-05, "loss": 10.1595, "step": 22580 }, { "epoch": 0.11281180553821568, "grad_norm": 0.09837251901626587, "learning_rate": 2.6682269894114998e-05, "loss": 10.1638, "step": 22590 }, { "epoch": 0.11286174436315513, "grad_norm": 0.09544532746076584, "learning_rate": 2.6680767979173448e-05, "loss": 10.1615, "step": 22600 }, { "epoch": 0.11291168318809458, "grad_norm": 0.09193170815706253, "learning_rate": 2.6679266064231895e-05, "loss": 10.162, "step": 22610 }, { "epoch": 0.11296162201303403, "grad_norm": 0.09794174134731293, "learning_rate": 2.6677764149290345e-05, "loss": 10.1608, "step": 22620 }, { "epoch": 0.11301156083797348, "grad_norm": 0.08994992822408676, "learning_rate": 2.6676262234348795e-05, "loss": 10.1568, "step": 22630 }, { "epoch": 0.11306149966291293, "grad_norm": 0.09660772234201431, "learning_rate": 2.6674760319407246e-05, "loss": 10.166, "step": 22640 }, { "epoch": 0.11311143848785238, "grad_norm": 0.09649979323148727, "learning_rate": 2.6673258404465696e-05, "loss": 10.1625, "step": 22650 }, { "epoch": 0.11316137731279183, "grad_norm": 0.0917321965098381, "learning_rate": 2.6671756489524142e-05, "loss": 10.1596, "step": 22660 }, { "epoch": 0.11321131613773128, "grad_norm": 0.0942411795258522, "learning_rate": 2.6670254574582593e-05, "loss": 10.1607, "step": 22670 }, { "epoch": 0.11326125496267073, "grad_norm": 0.0937599241733551, "learning_rate": 2.6668752659641043e-05, "loss": 10.1558, "step": 22680 }, { "epoch": 0.11331119378761018, "grad_norm": 0.09375976026058197, "learning_rate": 2.6667250744699493e-05, "loss": 10.1595, "step": 22690 }, { "epoch": 0.11336113261254963, "grad_norm": 0.09384457021951675, "learning_rate": 2.6665748829757943e-05, "loss": 10.1543, "step": 22700 }, { "epoch": 0.11341107143748907, "grad_norm": 0.09696808457374573, "learning_rate": 2.666424691481639e-05, "loss": 10.1586, "step": 22710 }, { "epoch": 0.11346101026242852, "grad_norm": 0.09339691698551178, "learning_rate": 2.666274499987484e-05, "loss": 10.1598, "step": 22720 }, { "epoch": 0.11351094908736797, "grad_norm": 0.09268683195114136, "learning_rate": 2.666124308493329e-05, "loss": 10.1516, "step": 22730 }, { "epoch": 0.11356088791230742, "grad_norm": 0.10044291615486145, "learning_rate": 2.665974116999174e-05, "loss": 10.1582, "step": 22740 }, { "epoch": 0.11361082673724687, "grad_norm": 0.0959634929895401, "learning_rate": 2.665823925505019e-05, "loss": 10.1546, "step": 22750 }, { "epoch": 0.11366076556218632, "grad_norm": 0.09456854313611984, "learning_rate": 2.6656737340108637e-05, "loss": 10.154, "step": 22760 }, { "epoch": 0.11371070438712577, "grad_norm": 0.10183733701705933, "learning_rate": 2.6655235425167088e-05, "loss": 10.1546, "step": 22770 }, { "epoch": 0.11376064321206522, "grad_norm": 0.09504352509975433, "learning_rate": 2.6653733510225538e-05, "loss": 10.1579, "step": 22780 }, { "epoch": 0.11381058203700467, "grad_norm": 0.09749595075845718, "learning_rate": 2.6652231595283988e-05, "loss": 10.1589, "step": 22790 }, { "epoch": 0.11386052086194412, "grad_norm": 0.09672117978334427, "learning_rate": 2.6650729680342438e-05, "loss": 10.156, "step": 22800 }, { "epoch": 0.11391045968688357, "grad_norm": 0.10440251231193542, "learning_rate": 2.6649227765400885e-05, "loss": 10.1534, "step": 22810 }, { "epoch": 0.11396039851182302, "grad_norm": 0.09660029411315918, "learning_rate": 2.664772585045934e-05, "loss": 10.1507, "step": 22820 }, { "epoch": 0.11401033733676247, "grad_norm": 0.09805593639612198, "learning_rate": 2.6646223935517785e-05, "loss": 10.1509, "step": 22830 }, { "epoch": 0.11406027616170192, "grad_norm": 0.09688452631235123, "learning_rate": 2.6644722020576236e-05, "loss": 10.1586, "step": 22840 }, { "epoch": 0.11411021498664137, "grad_norm": 0.09908785670995712, "learning_rate": 2.6643220105634686e-05, "loss": 10.1529, "step": 22850 }, { "epoch": 0.11416015381158082, "grad_norm": 0.0970318540930748, "learning_rate": 2.6641718190693133e-05, "loss": 10.15, "step": 22860 }, { "epoch": 0.11421009263652027, "grad_norm": 0.0938095897436142, "learning_rate": 2.6640216275751586e-05, "loss": 10.1534, "step": 22870 }, { "epoch": 0.11426003146145972, "grad_norm": 0.09250219166278839, "learning_rate": 2.6638714360810033e-05, "loss": 10.1529, "step": 22880 }, { "epoch": 0.11430997028639917, "grad_norm": 0.09546291828155518, "learning_rate": 2.6637212445868483e-05, "loss": 10.1523, "step": 22890 }, { "epoch": 0.11435990911133861, "grad_norm": 0.09982664883136749, "learning_rate": 2.6635710530926933e-05, "loss": 10.1498, "step": 22900 }, { "epoch": 0.11440984793627806, "grad_norm": 0.0915086641907692, "learning_rate": 2.663420861598538e-05, "loss": 10.1501, "step": 22910 }, { "epoch": 0.11445978676121751, "grad_norm": 0.09142784029245377, "learning_rate": 2.6632706701043834e-05, "loss": 10.1543, "step": 22920 }, { "epoch": 0.11450972558615696, "grad_norm": 0.09327565878629684, "learning_rate": 2.663120478610228e-05, "loss": 10.1495, "step": 22930 }, { "epoch": 0.11455966441109641, "grad_norm": 0.10108178108930588, "learning_rate": 2.662970287116073e-05, "loss": 10.15, "step": 22940 }, { "epoch": 0.11460960323603586, "grad_norm": 0.10374581068754196, "learning_rate": 2.662820095621918e-05, "loss": 10.1521, "step": 22950 }, { "epoch": 0.11465954206097531, "grad_norm": 0.09310846030712128, "learning_rate": 2.6626699041277628e-05, "loss": 10.1512, "step": 22960 }, { "epoch": 0.11470948088591476, "grad_norm": 0.093265101313591, "learning_rate": 2.662519712633608e-05, "loss": 10.1533, "step": 22970 }, { "epoch": 0.11475941971085421, "grad_norm": 0.09261230379343033, "learning_rate": 2.6623695211394528e-05, "loss": 10.1498, "step": 22980 }, { "epoch": 0.11480935853579366, "grad_norm": 0.09824974089860916, "learning_rate": 2.6622193296452978e-05, "loss": 10.1488, "step": 22990 }, { "epoch": 0.1148592973607331, "grad_norm": 0.09499943256378174, "learning_rate": 2.6620691381511428e-05, "loss": 10.1487, "step": 23000 }, { "epoch": 0.11490923618567254, "grad_norm": 0.09166286885738373, "learning_rate": 2.6619189466569875e-05, "loss": 10.1469, "step": 23010 }, { "epoch": 0.114959175010612, "grad_norm": 0.09660668671131134, "learning_rate": 2.661768755162833e-05, "loss": 10.1496, "step": 23020 }, { "epoch": 0.11500911383555144, "grad_norm": 0.09428876638412476, "learning_rate": 2.6616185636686775e-05, "loss": 10.1469, "step": 23030 }, { "epoch": 0.1150590526604909, "grad_norm": 0.09304223209619522, "learning_rate": 2.6614683721745226e-05, "loss": 10.1458, "step": 23040 }, { "epoch": 0.11510899148543034, "grad_norm": 0.09473152458667755, "learning_rate": 2.6613181806803676e-05, "loss": 10.1446, "step": 23050 }, { "epoch": 0.11515893031036979, "grad_norm": 0.09486373513936996, "learning_rate": 2.6611679891862123e-05, "loss": 10.1459, "step": 23060 }, { "epoch": 0.11520886913530924, "grad_norm": 0.0942765548825264, "learning_rate": 2.6610177976920576e-05, "loss": 10.1487, "step": 23070 }, { "epoch": 0.11525880796024869, "grad_norm": 0.09221851825714111, "learning_rate": 2.6608676061979023e-05, "loss": 10.1516, "step": 23080 }, { "epoch": 0.11530874678518814, "grad_norm": 0.09266392886638641, "learning_rate": 2.6607174147037473e-05, "loss": 10.1478, "step": 23090 }, { "epoch": 0.11535868561012759, "grad_norm": 0.09701654314994812, "learning_rate": 2.6605672232095923e-05, "loss": 10.146, "step": 23100 }, { "epoch": 0.11540862443506704, "grad_norm": 0.09978927671909332, "learning_rate": 2.660417031715437e-05, "loss": 10.1477, "step": 23110 }, { "epoch": 0.11545856326000649, "grad_norm": 0.09633700549602509, "learning_rate": 2.6602668402212824e-05, "loss": 10.1477, "step": 23120 }, { "epoch": 0.11550850208494594, "grad_norm": 0.09246396273374557, "learning_rate": 2.660116648727127e-05, "loss": 10.1476, "step": 23130 }, { "epoch": 0.11555844090988539, "grad_norm": 0.09979529678821564, "learning_rate": 2.6599664572329724e-05, "loss": 10.1439, "step": 23140 }, { "epoch": 0.11560837973482484, "grad_norm": 0.09765896201133728, "learning_rate": 2.659816265738817e-05, "loss": 10.1442, "step": 23150 }, { "epoch": 0.11565831855976429, "grad_norm": 0.09836973249912262, "learning_rate": 2.6596660742446618e-05, "loss": 10.1468, "step": 23160 }, { "epoch": 0.11570825738470374, "grad_norm": 0.09357340633869171, "learning_rate": 2.659515882750507e-05, "loss": 10.1475, "step": 23170 }, { "epoch": 0.11575819620964319, "grad_norm": 0.09507403522729874, "learning_rate": 2.6593656912563518e-05, "loss": 10.1419, "step": 23180 }, { "epoch": 0.11580813503458263, "grad_norm": 0.09896954894065857, "learning_rate": 2.659215499762197e-05, "loss": 10.1431, "step": 23190 }, { "epoch": 0.11585807385952208, "grad_norm": 0.09166592359542847, "learning_rate": 2.6590653082680418e-05, "loss": 10.144, "step": 23200 }, { "epoch": 0.11590801268446153, "grad_norm": 0.09727290272712708, "learning_rate": 2.6589151167738865e-05, "loss": 10.1475, "step": 23210 }, { "epoch": 0.11595795150940098, "grad_norm": 0.09719046205282211, "learning_rate": 2.658764925279732e-05, "loss": 10.1424, "step": 23220 }, { "epoch": 0.11600789033434043, "grad_norm": 0.09605339914560318, "learning_rate": 2.6586147337855765e-05, "loss": 10.1377, "step": 23230 }, { "epoch": 0.11605782915927988, "grad_norm": 0.10000360757112503, "learning_rate": 2.658464542291422e-05, "loss": 10.1372, "step": 23240 }, { "epoch": 0.11610776798421933, "grad_norm": 0.09418469667434692, "learning_rate": 2.6583143507972666e-05, "loss": 10.1447, "step": 23250 }, { "epoch": 0.11615770680915878, "grad_norm": 0.09615140408277512, "learning_rate": 2.6581641593031113e-05, "loss": 10.1425, "step": 23260 }, { "epoch": 0.11620764563409823, "grad_norm": 0.0987091064453125, "learning_rate": 2.6580139678089566e-05, "loss": 10.1446, "step": 23270 }, { "epoch": 0.11625758445903768, "grad_norm": 0.09312444925308228, "learning_rate": 2.6578637763148013e-05, "loss": 10.1424, "step": 23280 }, { "epoch": 0.11630752328397713, "grad_norm": 0.09860526770353317, "learning_rate": 2.6577135848206466e-05, "loss": 10.143, "step": 23290 }, { "epoch": 0.11635746210891658, "grad_norm": 0.0991264283657074, "learning_rate": 2.6575633933264913e-05, "loss": 10.1415, "step": 23300 }, { "epoch": 0.11640740093385603, "grad_norm": 0.0935107171535492, "learning_rate": 2.657413201832336e-05, "loss": 10.1431, "step": 23310 }, { "epoch": 0.11645733975879548, "grad_norm": 0.09420657157897949, "learning_rate": 2.6572630103381814e-05, "loss": 10.1419, "step": 23320 }, { "epoch": 0.11650727858373493, "grad_norm": 0.09580549597740173, "learning_rate": 2.657112818844026e-05, "loss": 10.1444, "step": 23330 }, { "epoch": 0.11655721740867438, "grad_norm": 0.09820415079593658, "learning_rate": 2.6569626273498714e-05, "loss": 10.1408, "step": 23340 }, { "epoch": 0.11660715623361383, "grad_norm": 0.0958128347992897, "learning_rate": 2.656812435855716e-05, "loss": 10.143, "step": 23350 }, { "epoch": 0.11665709505855328, "grad_norm": 0.09436112642288208, "learning_rate": 2.6566622443615608e-05, "loss": 10.1437, "step": 23360 }, { "epoch": 0.11670703388349273, "grad_norm": 0.09518658369779587, "learning_rate": 2.656512052867406e-05, "loss": 10.1386, "step": 23370 }, { "epoch": 0.11675697270843217, "grad_norm": 0.08935555815696716, "learning_rate": 2.6563618613732508e-05, "loss": 10.1418, "step": 23380 }, { "epoch": 0.11680691153337162, "grad_norm": 0.09481310844421387, "learning_rate": 2.656211669879096e-05, "loss": 10.1424, "step": 23390 }, { "epoch": 0.11685685035831107, "grad_norm": 0.09546245634555817, "learning_rate": 2.6560614783849408e-05, "loss": 10.1411, "step": 23400 }, { "epoch": 0.11690678918325052, "grad_norm": 0.09654063731431961, "learning_rate": 2.6559112868907855e-05, "loss": 10.1385, "step": 23410 }, { "epoch": 0.11695672800818997, "grad_norm": 0.09841468185186386, "learning_rate": 2.655761095396631e-05, "loss": 10.1373, "step": 23420 }, { "epoch": 0.11700666683312942, "grad_norm": 0.09300010651350021, "learning_rate": 2.6556109039024755e-05, "loss": 10.1345, "step": 23430 }, { "epoch": 0.11705660565806887, "grad_norm": 0.09334631264209747, "learning_rate": 2.655460712408321e-05, "loss": 10.1359, "step": 23440 }, { "epoch": 0.11710654448300832, "grad_norm": 0.09312853217124939, "learning_rate": 2.6553105209141656e-05, "loss": 10.1425, "step": 23450 }, { "epoch": 0.11715648330794777, "grad_norm": 0.0956452414393425, "learning_rate": 2.6551603294200106e-05, "loss": 10.1425, "step": 23460 }, { "epoch": 0.11720642213288722, "grad_norm": 0.0932190865278244, "learning_rate": 2.6550101379258556e-05, "loss": 10.1389, "step": 23470 }, { "epoch": 0.11725636095782667, "grad_norm": 0.09214978665113449, "learning_rate": 2.6548599464317003e-05, "loss": 10.1381, "step": 23480 }, { "epoch": 0.1173062997827661, "grad_norm": 0.09422241896390915, "learning_rate": 2.6547097549375456e-05, "loss": 10.137, "step": 23490 }, { "epoch": 0.11735623860770555, "grad_norm": 0.09438539296388626, "learning_rate": 2.6545595634433903e-05, "loss": 10.1369, "step": 23500 }, { "epoch": 0.117406177432645, "grad_norm": 0.09662014991044998, "learning_rate": 2.6544093719492353e-05, "loss": 10.1367, "step": 23510 }, { "epoch": 0.11745611625758445, "grad_norm": 0.08834582567214966, "learning_rate": 2.6542591804550804e-05, "loss": 10.1387, "step": 23520 }, { "epoch": 0.1175060550825239, "grad_norm": 0.09564156085252762, "learning_rate": 2.654108988960925e-05, "loss": 10.1417, "step": 23530 }, { "epoch": 0.11755599390746335, "grad_norm": 0.09563954174518585, "learning_rate": 2.6539587974667704e-05, "loss": 10.1339, "step": 23540 }, { "epoch": 0.1176059327324028, "grad_norm": 0.0935792475938797, "learning_rate": 2.653808605972615e-05, "loss": 10.1307, "step": 23550 }, { "epoch": 0.11765587155734225, "grad_norm": 0.10603775829076767, "learning_rate": 2.65365841447846e-05, "loss": 10.1355, "step": 23560 }, { "epoch": 0.1177058103822817, "grad_norm": 0.09258319437503815, "learning_rate": 2.653508222984305e-05, "loss": 10.1355, "step": 23570 }, { "epoch": 0.11775574920722115, "grad_norm": 0.09184292703866959, "learning_rate": 2.6533580314901498e-05, "loss": 10.1333, "step": 23580 }, { "epoch": 0.1178056880321606, "grad_norm": 0.09775898605585098, "learning_rate": 2.653207839995995e-05, "loss": 10.137, "step": 23590 }, { "epoch": 0.11785562685710005, "grad_norm": 0.0923045426607132, "learning_rate": 2.6530576485018398e-05, "loss": 10.1308, "step": 23600 }, { "epoch": 0.1179055656820395, "grad_norm": 0.09056799858808517, "learning_rate": 2.652907457007685e-05, "loss": 10.1359, "step": 23610 }, { "epoch": 0.11795550450697895, "grad_norm": 0.09372033178806305, "learning_rate": 2.65275726551353e-05, "loss": 10.1317, "step": 23620 }, { "epoch": 0.1180054433319184, "grad_norm": 0.09503508359193802, "learning_rate": 2.6526070740193745e-05, "loss": 10.1331, "step": 23630 }, { "epoch": 0.11805538215685785, "grad_norm": 0.09287771582603455, "learning_rate": 2.65245688252522e-05, "loss": 10.1362, "step": 23640 }, { "epoch": 0.1181053209817973, "grad_norm": 0.09275928884744644, "learning_rate": 2.6523066910310646e-05, "loss": 10.1329, "step": 23650 }, { "epoch": 0.11815525980673675, "grad_norm": 0.09057994186878204, "learning_rate": 2.6521564995369096e-05, "loss": 10.1317, "step": 23660 }, { "epoch": 0.1182051986316762, "grad_norm": 0.09248855710029602, "learning_rate": 2.6520063080427546e-05, "loss": 10.1321, "step": 23670 }, { "epoch": 0.11825513745661564, "grad_norm": 0.09153629094362259, "learning_rate": 2.6518561165485993e-05, "loss": 10.1315, "step": 23680 }, { "epoch": 0.1183050762815551, "grad_norm": 0.08953340351581573, "learning_rate": 2.6517059250544446e-05, "loss": 10.1348, "step": 23690 }, { "epoch": 0.11835501510649454, "grad_norm": 0.09959051012992859, "learning_rate": 2.6515557335602893e-05, "loss": 10.1346, "step": 23700 }, { "epoch": 0.11840495393143399, "grad_norm": 0.09226525574922562, "learning_rate": 2.6514055420661343e-05, "loss": 10.1304, "step": 23710 }, { "epoch": 0.11845489275637344, "grad_norm": 0.09568504989147186, "learning_rate": 2.6512553505719794e-05, "loss": 10.1275, "step": 23720 }, { "epoch": 0.11850483158131289, "grad_norm": 0.0921492800116539, "learning_rate": 2.651105159077824e-05, "loss": 10.1283, "step": 23730 }, { "epoch": 0.11855477040625234, "grad_norm": 0.09444059431552887, "learning_rate": 2.6509549675836694e-05, "loss": 10.1336, "step": 23740 }, { "epoch": 0.11860470923119179, "grad_norm": 0.10168434679508209, "learning_rate": 2.650804776089514e-05, "loss": 10.1297, "step": 23750 }, { "epoch": 0.11865464805613124, "grad_norm": 0.09380350261926651, "learning_rate": 2.650654584595359e-05, "loss": 10.1278, "step": 23760 }, { "epoch": 0.11870458688107069, "grad_norm": 0.10053762793540955, "learning_rate": 2.650504393101204e-05, "loss": 10.1349, "step": 23770 }, { "epoch": 0.11875452570601014, "grad_norm": 0.09257914870977402, "learning_rate": 2.650354201607049e-05, "loss": 10.1311, "step": 23780 }, { "epoch": 0.11880446453094959, "grad_norm": 0.09091336280107498, "learning_rate": 2.650204010112894e-05, "loss": 10.1314, "step": 23790 }, { "epoch": 0.11885440335588904, "grad_norm": 0.09331763535737991, "learning_rate": 2.6500538186187388e-05, "loss": 10.1299, "step": 23800 }, { "epoch": 0.11890434218082849, "grad_norm": 0.09806916862726212, "learning_rate": 2.649903627124584e-05, "loss": 10.1285, "step": 23810 }, { "epoch": 0.11895428100576794, "grad_norm": 0.09568050503730774, "learning_rate": 2.649753435630429e-05, "loss": 10.1295, "step": 23820 }, { "epoch": 0.11900421983070739, "grad_norm": 0.09739750623703003, "learning_rate": 2.649603244136274e-05, "loss": 10.1288, "step": 23830 }, { "epoch": 0.11905415865564684, "grad_norm": 0.0919250100851059, "learning_rate": 2.649453052642119e-05, "loss": 10.1282, "step": 23840 }, { "epoch": 0.11910409748058629, "grad_norm": 0.09634441137313843, "learning_rate": 2.6493028611479636e-05, "loss": 10.1263, "step": 23850 }, { "epoch": 0.11915403630552573, "grad_norm": 0.09239911288022995, "learning_rate": 2.6491526696538086e-05, "loss": 10.1264, "step": 23860 }, { "epoch": 0.11920397513046518, "grad_norm": 0.09922140091657639, "learning_rate": 2.6490024781596536e-05, "loss": 10.1257, "step": 23870 }, { "epoch": 0.11925391395540463, "grad_norm": 0.09263952821493149, "learning_rate": 2.6488522866654986e-05, "loss": 10.127, "step": 23880 }, { "epoch": 0.11930385278034408, "grad_norm": 0.09615729004144669, "learning_rate": 2.6487020951713437e-05, "loss": 10.125, "step": 23890 }, { "epoch": 0.11935379160528353, "grad_norm": 0.09644229710102081, "learning_rate": 2.6485519036771883e-05, "loss": 10.1238, "step": 23900 }, { "epoch": 0.11940373043022298, "grad_norm": 0.09358090162277222, "learning_rate": 2.6484017121830333e-05, "loss": 10.1288, "step": 23910 }, { "epoch": 0.11945366925516243, "grad_norm": 0.0884949341416359, "learning_rate": 2.6482515206888784e-05, "loss": 10.1262, "step": 23920 }, { "epoch": 0.11950360808010188, "grad_norm": 0.09220154583454132, "learning_rate": 2.6481013291947234e-05, "loss": 10.1266, "step": 23930 }, { "epoch": 0.11955354690504133, "grad_norm": 0.09416556358337402, "learning_rate": 2.6479511377005684e-05, "loss": 10.1293, "step": 23940 }, { "epoch": 0.11960348572998078, "grad_norm": 0.09228892624378204, "learning_rate": 2.647800946206413e-05, "loss": 10.1261, "step": 23950 }, { "epoch": 0.11965342455492023, "grad_norm": 0.09278880804777145, "learning_rate": 2.647650754712258e-05, "loss": 10.1252, "step": 23960 }, { "epoch": 0.11970336337985968, "grad_norm": 0.09540235996246338, "learning_rate": 2.647500563218103e-05, "loss": 10.1248, "step": 23970 }, { "epoch": 0.11975330220479911, "grad_norm": 0.09458044171333313, "learning_rate": 2.647350371723948e-05, "loss": 10.1272, "step": 23980 }, { "epoch": 0.11980324102973856, "grad_norm": 0.09298811852931976, "learning_rate": 2.647200180229793e-05, "loss": 10.1196, "step": 23990 }, { "epoch": 0.11985317985467801, "grad_norm": 0.09707672894001007, "learning_rate": 2.6470499887356378e-05, "loss": 10.1218, "step": 24000 }, { "epoch": 0.11990311867961746, "grad_norm": 0.09375189244747162, "learning_rate": 2.646899797241483e-05, "loss": 10.1282, "step": 24010 }, { "epoch": 0.11995305750455691, "grad_norm": 0.1018557995557785, "learning_rate": 2.646749605747328e-05, "loss": 10.1204, "step": 24020 }, { "epoch": 0.12000299632949636, "grad_norm": 0.09262654185295105, "learning_rate": 2.646599414253173e-05, "loss": 10.1235, "step": 24030 }, { "epoch": 0.12005293515443581, "grad_norm": 0.09324292093515396, "learning_rate": 2.646449222759018e-05, "loss": 10.1253, "step": 24040 }, { "epoch": 0.12010287397937526, "grad_norm": 0.09518759697675705, "learning_rate": 2.6462990312648626e-05, "loss": 10.1231, "step": 24050 }, { "epoch": 0.12015281280431471, "grad_norm": 0.09306249022483826, "learning_rate": 2.6461488397707076e-05, "loss": 10.126, "step": 24060 }, { "epoch": 0.12020275162925416, "grad_norm": 0.09253344684839249, "learning_rate": 2.6459986482765526e-05, "loss": 10.1173, "step": 24070 }, { "epoch": 0.12025269045419361, "grad_norm": 0.09460706263780594, "learning_rate": 2.6458484567823976e-05, "loss": 10.1269, "step": 24080 }, { "epoch": 0.12030262927913306, "grad_norm": 0.08938644081354141, "learning_rate": 2.6456982652882427e-05, "loss": 10.1225, "step": 24090 }, { "epoch": 0.12035256810407251, "grad_norm": 0.09312314540147781, "learning_rate": 2.6455480737940873e-05, "loss": 10.122, "step": 24100 }, { "epoch": 0.12040250692901196, "grad_norm": 0.09598222374916077, "learning_rate": 2.6453978822999323e-05, "loss": 10.1195, "step": 24110 }, { "epoch": 0.1204524457539514, "grad_norm": 0.09335832297801971, "learning_rate": 2.6452476908057774e-05, "loss": 10.1204, "step": 24120 }, { "epoch": 0.12050238457889086, "grad_norm": 0.09654422104358673, "learning_rate": 2.6450974993116224e-05, "loss": 10.1206, "step": 24130 }, { "epoch": 0.1205523234038303, "grad_norm": 0.0975603386759758, "learning_rate": 2.6449473078174674e-05, "loss": 10.1211, "step": 24140 }, { "epoch": 0.12060226222876975, "grad_norm": 0.09276950359344482, "learning_rate": 2.6447971163233124e-05, "loss": 10.1188, "step": 24150 }, { "epoch": 0.1206522010537092, "grad_norm": 0.0958276093006134, "learning_rate": 2.644646924829157e-05, "loss": 10.1241, "step": 24160 }, { "epoch": 0.12070213987864865, "grad_norm": 0.09583685547113419, "learning_rate": 2.644496733335002e-05, "loss": 10.1209, "step": 24170 }, { "epoch": 0.1207520787035881, "grad_norm": 0.09288566559553146, "learning_rate": 2.644346541840847e-05, "loss": 10.1184, "step": 24180 }, { "epoch": 0.12080201752852755, "grad_norm": 0.0899089053273201, "learning_rate": 2.644196350346692e-05, "loss": 10.1188, "step": 24190 }, { "epoch": 0.120851956353467, "grad_norm": 0.09436280280351639, "learning_rate": 2.6440461588525372e-05, "loss": 10.12, "step": 24200 }, { "epoch": 0.12090189517840645, "grad_norm": 0.09277939051389694, "learning_rate": 2.643895967358382e-05, "loss": 10.1183, "step": 24210 }, { "epoch": 0.1209518340033459, "grad_norm": 0.08972795307636261, "learning_rate": 2.643745775864227e-05, "loss": 10.1201, "step": 24220 }, { "epoch": 0.12100177282828535, "grad_norm": 0.09541310369968414, "learning_rate": 2.643595584370072e-05, "loss": 10.1192, "step": 24230 }, { "epoch": 0.1210517116532248, "grad_norm": 0.08959456533193588, "learning_rate": 2.643445392875917e-05, "loss": 10.1224, "step": 24240 }, { "epoch": 0.12110165047816425, "grad_norm": 0.09786181151866913, "learning_rate": 2.643295201381762e-05, "loss": 10.117, "step": 24250 }, { "epoch": 0.1211515893031037, "grad_norm": 0.09291178733110428, "learning_rate": 2.6431450098876066e-05, "loss": 10.1203, "step": 24260 }, { "epoch": 0.12120152812804315, "grad_norm": 0.10062135756015778, "learning_rate": 2.6429948183934516e-05, "loss": 10.1179, "step": 24270 }, { "epoch": 0.1212514669529826, "grad_norm": 0.09641305357217789, "learning_rate": 2.6428446268992966e-05, "loss": 10.1198, "step": 24280 }, { "epoch": 0.12130140577792205, "grad_norm": 0.09644697606563568, "learning_rate": 2.6426944354051417e-05, "loss": 10.113, "step": 24290 }, { "epoch": 0.1213513446028615, "grad_norm": 0.0892978310585022, "learning_rate": 2.6425442439109867e-05, "loss": 10.118, "step": 24300 }, { "epoch": 0.12140128342780095, "grad_norm": 0.0910135954618454, "learning_rate": 2.6423940524168313e-05, "loss": 10.1194, "step": 24310 }, { "epoch": 0.1214512222527404, "grad_norm": 0.0961785763502121, "learning_rate": 2.6422438609226764e-05, "loss": 10.116, "step": 24320 }, { "epoch": 0.12150116107767985, "grad_norm": 0.09364509582519531, "learning_rate": 2.6420936694285214e-05, "loss": 10.111, "step": 24330 }, { "epoch": 0.1215510999026193, "grad_norm": 0.09308196604251862, "learning_rate": 2.6419434779343664e-05, "loss": 10.1159, "step": 24340 }, { "epoch": 0.12160103872755874, "grad_norm": 0.09322807192802429, "learning_rate": 2.6417932864402114e-05, "loss": 10.1159, "step": 24350 }, { "epoch": 0.1216509775524982, "grad_norm": 0.09460312128067017, "learning_rate": 2.641643094946056e-05, "loss": 10.1212, "step": 24360 }, { "epoch": 0.12170091637743764, "grad_norm": 0.0933726355433464, "learning_rate": 2.641492903451901e-05, "loss": 10.1137, "step": 24370 }, { "epoch": 0.12175085520237709, "grad_norm": 0.09838582575321198, "learning_rate": 2.641342711957746e-05, "loss": 10.1168, "step": 24380 }, { "epoch": 0.12180079402731654, "grad_norm": 0.09491819143295288, "learning_rate": 2.641192520463591e-05, "loss": 10.1143, "step": 24390 }, { "epoch": 0.12185073285225599, "grad_norm": 0.0954071581363678, "learning_rate": 2.6410423289694362e-05, "loss": 10.1135, "step": 24400 }, { "epoch": 0.12190067167719544, "grad_norm": 0.09948980063199997, "learning_rate": 2.6408921374752812e-05, "loss": 10.1176, "step": 24410 }, { "epoch": 0.12195061050213489, "grad_norm": 0.09514214843511581, "learning_rate": 2.640741945981126e-05, "loss": 10.1187, "step": 24420 }, { "epoch": 0.12200054932707434, "grad_norm": 0.09480323642492294, "learning_rate": 2.640591754486971e-05, "loss": 10.1122, "step": 24430 }, { "epoch": 0.12205048815201379, "grad_norm": 0.09891213476657867, "learning_rate": 2.640441562992816e-05, "loss": 10.1107, "step": 24440 }, { "epoch": 0.12210042697695324, "grad_norm": 0.09468024224042892, "learning_rate": 2.640291371498661e-05, "loss": 10.1156, "step": 24450 }, { "epoch": 0.12215036580189269, "grad_norm": 0.08860956132411957, "learning_rate": 2.640141180004506e-05, "loss": 10.1124, "step": 24460 }, { "epoch": 0.12220030462683214, "grad_norm": 0.09128830581903458, "learning_rate": 2.639990988510351e-05, "loss": 10.1173, "step": 24470 }, { "epoch": 0.12225024345177157, "grad_norm": 0.09434867650270462, "learning_rate": 2.6398407970161956e-05, "loss": 10.1109, "step": 24480 }, { "epoch": 0.12230018227671102, "grad_norm": 0.08857893943786621, "learning_rate": 2.6396906055220407e-05, "loss": 10.1099, "step": 24490 }, { "epoch": 0.12235012110165047, "grad_norm": 0.09494607895612717, "learning_rate": 2.6395404140278857e-05, "loss": 10.1118, "step": 24500 }, { "epoch": 0.12240005992658992, "grad_norm": 0.09442712366580963, "learning_rate": 2.6393902225337307e-05, "loss": 10.1108, "step": 24510 }, { "epoch": 0.12244999875152937, "grad_norm": 0.0935530737042427, "learning_rate": 2.6392400310395757e-05, "loss": 10.1098, "step": 24520 }, { "epoch": 0.12249993757646882, "grad_norm": 0.09096985310316086, "learning_rate": 2.6390898395454204e-05, "loss": 10.1132, "step": 24530 }, { "epoch": 0.12254987640140827, "grad_norm": 0.10358789563179016, "learning_rate": 2.6389396480512654e-05, "loss": 10.1113, "step": 24540 }, { "epoch": 0.12259981522634772, "grad_norm": 0.10106457769870758, "learning_rate": 2.6387894565571104e-05, "loss": 10.1086, "step": 24550 }, { "epoch": 0.12264975405128717, "grad_norm": 0.10155883431434631, "learning_rate": 2.6386392650629554e-05, "loss": 10.1106, "step": 24560 }, { "epoch": 0.12269969287622662, "grad_norm": 0.09161602705717087, "learning_rate": 2.6384890735688005e-05, "loss": 10.1158, "step": 24570 }, { "epoch": 0.12274963170116607, "grad_norm": 0.09335344284772873, "learning_rate": 2.638338882074645e-05, "loss": 10.1075, "step": 24580 }, { "epoch": 0.12279957052610552, "grad_norm": 0.0948810800909996, "learning_rate": 2.63818869058049e-05, "loss": 10.1117, "step": 24590 }, { "epoch": 0.12284950935104497, "grad_norm": 0.09355373680591583, "learning_rate": 2.6380384990863352e-05, "loss": 10.1119, "step": 24600 }, { "epoch": 0.12289944817598442, "grad_norm": 0.09632193297147751, "learning_rate": 2.6378883075921802e-05, "loss": 10.109, "step": 24610 }, { "epoch": 0.12294938700092387, "grad_norm": 0.09129033237695694, "learning_rate": 2.6377381160980252e-05, "loss": 10.1067, "step": 24620 }, { "epoch": 0.12299932582586331, "grad_norm": 0.09343235939741135, "learning_rate": 2.63758792460387e-05, "loss": 10.11, "step": 24630 }, { "epoch": 0.12304926465080276, "grad_norm": 0.0997580885887146, "learning_rate": 2.637437733109715e-05, "loss": 10.1113, "step": 24640 }, { "epoch": 0.12309920347574221, "grad_norm": 0.09580541402101517, "learning_rate": 2.63728754161556e-05, "loss": 10.1108, "step": 24650 }, { "epoch": 0.12314914230068166, "grad_norm": 0.0917908325791359, "learning_rate": 2.637137350121405e-05, "loss": 10.1129, "step": 24660 }, { "epoch": 0.12319908112562111, "grad_norm": 0.09625443816184998, "learning_rate": 2.63698715862725e-05, "loss": 10.1065, "step": 24670 }, { "epoch": 0.12324901995056056, "grad_norm": 0.0961778461933136, "learning_rate": 2.6368369671330946e-05, "loss": 10.1107, "step": 24680 }, { "epoch": 0.12329895877550001, "grad_norm": 0.09377621114253998, "learning_rate": 2.6366867756389397e-05, "loss": 10.1132, "step": 24690 }, { "epoch": 0.12334889760043946, "grad_norm": 0.09123267978429794, "learning_rate": 2.6365365841447847e-05, "loss": 10.1092, "step": 24700 }, { "epoch": 0.12339883642537891, "grad_norm": 0.09666462987661362, "learning_rate": 2.6363863926506297e-05, "loss": 10.106, "step": 24710 }, { "epoch": 0.12344877525031836, "grad_norm": 0.09382550418376923, "learning_rate": 2.6362362011564747e-05, "loss": 10.1071, "step": 24720 }, { "epoch": 0.12349871407525781, "grad_norm": 0.10108333826065063, "learning_rate": 2.6360860096623194e-05, "loss": 10.1022, "step": 24730 }, { "epoch": 0.12354865290019726, "grad_norm": 0.089703269302845, "learning_rate": 2.6359358181681644e-05, "loss": 10.1107, "step": 24740 }, { "epoch": 0.12359859172513671, "grad_norm": 0.09390883892774582, "learning_rate": 2.6357856266740094e-05, "loss": 10.1051, "step": 24750 }, { "epoch": 0.12364853055007616, "grad_norm": 0.0946129560470581, "learning_rate": 2.6356354351798544e-05, "loss": 10.1041, "step": 24760 }, { "epoch": 0.12369846937501561, "grad_norm": 0.09596185386180878, "learning_rate": 2.6354852436856995e-05, "loss": 10.1073, "step": 24770 }, { "epoch": 0.12374840819995506, "grad_norm": 0.09378567337989807, "learning_rate": 2.635335052191544e-05, "loss": 10.1047, "step": 24780 }, { "epoch": 0.1237983470248945, "grad_norm": 0.09474289417266846, "learning_rate": 2.6351848606973895e-05, "loss": 10.1043, "step": 24790 }, { "epoch": 0.12384828584983396, "grad_norm": 0.09176385402679443, "learning_rate": 2.6350346692032342e-05, "loss": 10.1048, "step": 24800 }, { "epoch": 0.1238982246747734, "grad_norm": 0.0884096696972847, "learning_rate": 2.6348844777090792e-05, "loss": 10.1021, "step": 24810 }, { "epoch": 0.12394816349971285, "grad_norm": 0.09617920964956284, "learning_rate": 2.6347342862149242e-05, "loss": 10.1041, "step": 24820 }, { "epoch": 0.1239981023246523, "grad_norm": 0.09283927828073502, "learning_rate": 2.634584094720769e-05, "loss": 10.1026, "step": 24830 }, { "epoch": 0.12404804114959175, "grad_norm": 0.09213951230049133, "learning_rate": 2.6344339032266142e-05, "loss": 10.1019, "step": 24840 }, { "epoch": 0.1240979799745312, "grad_norm": 0.09610681235790253, "learning_rate": 2.634283711732459e-05, "loss": 10.1075, "step": 24850 }, { "epoch": 0.12414791879947065, "grad_norm": 0.10006497800350189, "learning_rate": 2.634133520238304e-05, "loss": 10.103, "step": 24860 }, { "epoch": 0.1241978576244101, "grad_norm": 0.09113096445798874, "learning_rate": 2.633983328744149e-05, "loss": 10.1074, "step": 24870 }, { "epoch": 0.12424779644934955, "grad_norm": 0.10478292405605316, "learning_rate": 2.6338331372499936e-05, "loss": 10.0992, "step": 24880 }, { "epoch": 0.124297735274289, "grad_norm": 0.09672685712575912, "learning_rate": 2.633682945755839e-05, "loss": 10.1028, "step": 24890 }, { "epoch": 0.12434767409922845, "grad_norm": 0.0986834466457367, "learning_rate": 2.6335327542616837e-05, "loss": 10.0989, "step": 24900 }, { "epoch": 0.1243976129241679, "grad_norm": 0.10003640502691269, "learning_rate": 2.6333825627675287e-05, "loss": 10.1044, "step": 24910 }, { "epoch": 0.12444755174910735, "grad_norm": 0.09739897400140762, "learning_rate": 2.6332323712733737e-05, "loss": 10.1055, "step": 24920 }, { "epoch": 0.1244974905740468, "grad_norm": 0.09403092414140701, "learning_rate": 2.6330821797792184e-05, "loss": 10.1052, "step": 24930 }, { "epoch": 0.12454742939898625, "grad_norm": 0.0988364890217781, "learning_rate": 2.6329319882850637e-05, "loss": 10.1025, "step": 24940 }, { "epoch": 0.1245973682239257, "grad_norm": 0.09991788864135742, "learning_rate": 2.6327817967909084e-05, "loss": 10.1009, "step": 24950 }, { "epoch": 0.12464730704886515, "grad_norm": 0.09546318650245667, "learning_rate": 2.6326316052967534e-05, "loss": 10.0996, "step": 24960 }, { "epoch": 0.12469724587380458, "grad_norm": 0.09446793794631958, "learning_rate": 2.6324814138025985e-05, "loss": 10.1004, "step": 24970 }, { "epoch": 0.12474718469874403, "grad_norm": 0.09391462057828903, "learning_rate": 2.632331222308443e-05, "loss": 10.1005, "step": 24980 }, { "epoch": 0.12479712352368348, "grad_norm": 0.08938247710466385, "learning_rate": 2.6321810308142885e-05, "loss": 10.0976, "step": 24990 }, { "epoch": 0.12484706234862293, "grad_norm": 0.0972147136926651, "learning_rate": 2.6320308393201332e-05, "loss": 10.0994, "step": 25000 }, { "epoch": 0.12489700117356238, "grad_norm": 0.09196930378675461, "learning_rate": 2.6318806478259782e-05, "loss": 10.0997, "step": 25010 }, { "epoch": 0.12494693999850183, "grad_norm": 0.10306332260370255, "learning_rate": 2.6317304563318232e-05, "loss": 10.0984, "step": 25020 }, { "epoch": 0.12499687882344128, "grad_norm": 0.09357580542564392, "learning_rate": 2.631580264837668e-05, "loss": 10.1022, "step": 25030 }, { "epoch": 0.12504681764838074, "grad_norm": 0.09815985709428787, "learning_rate": 2.6314300733435132e-05, "loss": 10.0966, "step": 25040 }, { "epoch": 0.1250967564733202, "grad_norm": 0.09139269590377808, "learning_rate": 2.631279881849358e-05, "loss": 10.0957, "step": 25050 }, { "epoch": 0.12514669529825964, "grad_norm": 0.10160529613494873, "learning_rate": 2.631129690355203e-05, "loss": 10.098, "step": 25060 }, { "epoch": 0.1251966341231991, "grad_norm": 0.0978497862815857, "learning_rate": 2.630979498861048e-05, "loss": 10.1, "step": 25070 }, { "epoch": 0.12524657294813854, "grad_norm": 0.09045752882957458, "learning_rate": 2.6308293073668926e-05, "loss": 10.1027, "step": 25080 }, { "epoch": 0.125296511773078, "grad_norm": 0.08881735056638718, "learning_rate": 2.630679115872738e-05, "loss": 10.0955, "step": 25090 }, { "epoch": 0.12534645059801744, "grad_norm": 0.0939692035317421, "learning_rate": 2.6305289243785827e-05, "loss": 10.101, "step": 25100 }, { "epoch": 0.1253963894229569, "grad_norm": 0.09511993080377579, "learning_rate": 2.630378732884428e-05, "loss": 10.0969, "step": 25110 }, { "epoch": 0.12544632824789634, "grad_norm": 0.1041262075304985, "learning_rate": 2.6302285413902727e-05, "loss": 10.0912, "step": 25120 }, { "epoch": 0.1254962670728358, "grad_norm": 0.09648961573839188, "learning_rate": 2.6300783498961174e-05, "loss": 10.0957, "step": 25130 }, { "epoch": 0.12554620589777524, "grad_norm": 0.0973757728934288, "learning_rate": 2.6299281584019627e-05, "loss": 10.1005, "step": 25140 }, { "epoch": 0.1255961447227147, "grad_norm": 0.08688199520111084, "learning_rate": 2.6297779669078074e-05, "loss": 10.0966, "step": 25150 }, { "epoch": 0.12564608354765414, "grad_norm": 0.09321999549865723, "learning_rate": 2.6296277754136528e-05, "loss": 10.0924, "step": 25160 }, { "epoch": 0.12569602237259359, "grad_norm": 0.09925235062837601, "learning_rate": 2.6294775839194975e-05, "loss": 10.0918, "step": 25170 }, { "epoch": 0.12574596119753303, "grad_norm": 0.096058689057827, "learning_rate": 2.629327392425342e-05, "loss": 10.0918, "step": 25180 }, { "epoch": 0.12579590002247248, "grad_norm": 0.09322872012853622, "learning_rate": 2.6291772009311875e-05, "loss": 10.0941, "step": 25190 }, { "epoch": 0.12584583884741193, "grad_norm": 0.09035428613424301, "learning_rate": 2.6290270094370322e-05, "loss": 10.09, "step": 25200 }, { "epoch": 0.12589577767235138, "grad_norm": 0.09396304190158844, "learning_rate": 2.6288768179428775e-05, "loss": 10.0977, "step": 25210 }, { "epoch": 0.1259457164972908, "grad_norm": 0.09849023818969727, "learning_rate": 2.6287266264487222e-05, "loss": 10.0954, "step": 25220 }, { "epoch": 0.12599565532223025, "grad_norm": 0.09524643421173096, "learning_rate": 2.628576434954567e-05, "loss": 10.0929, "step": 25230 }, { "epoch": 0.1260455941471697, "grad_norm": 0.09641117602586746, "learning_rate": 2.6284262434604122e-05, "loss": 10.093, "step": 25240 }, { "epoch": 0.12609553297210915, "grad_norm": 0.0981585755944252, "learning_rate": 2.628276051966257e-05, "loss": 10.0965, "step": 25250 }, { "epoch": 0.1261454717970486, "grad_norm": 0.09640777111053467, "learning_rate": 2.6281258604721023e-05, "loss": 10.0929, "step": 25260 }, { "epoch": 0.12619541062198805, "grad_norm": 0.0920519009232521, "learning_rate": 2.627975668977947e-05, "loss": 10.0947, "step": 25270 }, { "epoch": 0.1262453494469275, "grad_norm": 0.09729231894016266, "learning_rate": 2.6278254774837916e-05, "loss": 10.0888, "step": 25280 }, { "epoch": 0.12629528827186695, "grad_norm": 0.09746874123811722, "learning_rate": 2.627675285989637e-05, "loss": 10.0952, "step": 25290 }, { "epoch": 0.1263452270968064, "grad_norm": 0.09574176371097565, "learning_rate": 2.6275250944954817e-05, "loss": 10.088, "step": 25300 }, { "epoch": 0.12639516592174585, "grad_norm": 0.09351847320795059, "learning_rate": 2.627374903001327e-05, "loss": 10.0934, "step": 25310 }, { "epoch": 0.1264451047466853, "grad_norm": 0.09245570003986359, "learning_rate": 2.6272247115071717e-05, "loss": 10.0963, "step": 25320 }, { "epoch": 0.12649504357162475, "grad_norm": 0.0946178138256073, "learning_rate": 2.6270745200130164e-05, "loss": 10.0888, "step": 25330 }, { "epoch": 0.1265449823965642, "grad_norm": 0.09379423409700394, "learning_rate": 2.6269243285188617e-05, "loss": 10.088, "step": 25340 }, { "epoch": 0.12659492122150365, "grad_norm": 0.09336696565151215, "learning_rate": 2.6267741370247064e-05, "loss": 10.0913, "step": 25350 }, { "epoch": 0.1266448600464431, "grad_norm": 0.09480159729719162, "learning_rate": 2.6266239455305518e-05, "loss": 10.0915, "step": 25360 }, { "epoch": 0.12669479887138255, "grad_norm": 0.09639475494623184, "learning_rate": 2.6264737540363965e-05, "loss": 10.0904, "step": 25370 }, { "epoch": 0.126744737696322, "grad_norm": 0.09714771062135696, "learning_rate": 2.626323562542241e-05, "loss": 10.0899, "step": 25380 }, { "epoch": 0.12679467652126145, "grad_norm": 0.09559177607297897, "learning_rate": 2.6261733710480865e-05, "loss": 10.0885, "step": 25390 }, { "epoch": 0.1268446153462009, "grad_norm": 0.09046468883752823, "learning_rate": 2.6260231795539312e-05, "loss": 10.0975, "step": 25400 }, { "epoch": 0.12689455417114034, "grad_norm": 0.0968378558754921, "learning_rate": 2.6258729880597765e-05, "loss": 10.0879, "step": 25410 }, { "epoch": 0.1269444929960798, "grad_norm": 0.09533467143774033, "learning_rate": 2.6257227965656212e-05, "loss": 10.0857, "step": 25420 }, { "epoch": 0.12699443182101924, "grad_norm": 0.0917276069521904, "learning_rate": 2.6255726050714662e-05, "loss": 10.0927, "step": 25430 }, { "epoch": 0.1270443706459587, "grad_norm": 0.096129409968853, "learning_rate": 2.6254224135773113e-05, "loss": 10.0886, "step": 25440 }, { "epoch": 0.12709430947089814, "grad_norm": 0.09592961519956589, "learning_rate": 2.625272222083156e-05, "loss": 10.0869, "step": 25450 }, { "epoch": 0.1271442482958376, "grad_norm": 0.1041722372174263, "learning_rate": 2.6251220305890013e-05, "loss": 10.0925, "step": 25460 }, { "epoch": 0.12719418712077704, "grad_norm": 0.09002278000116348, "learning_rate": 2.624971839094846e-05, "loss": 10.0886, "step": 25470 }, { "epoch": 0.1272441259457165, "grad_norm": 0.09098079055547714, "learning_rate": 2.624821647600691e-05, "loss": 10.0871, "step": 25480 }, { "epoch": 0.12729406477065594, "grad_norm": 0.09563304483890533, "learning_rate": 2.624671456106536e-05, "loss": 10.0872, "step": 25490 }, { "epoch": 0.1273440035955954, "grad_norm": 0.09505288302898407, "learning_rate": 2.6245212646123807e-05, "loss": 10.0871, "step": 25500 }, { "epoch": 0.12739394242053484, "grad_norm": 0.09651105850934982, "learning_rate": 2.624371073118226e-05, "loss": 10.0902, "step": 25510 }, { "epoch": 0.1274438812454743, "grad_norm": 0.09501788765192032, "learning_rate": 2.6242208816240707e-05, "loss": 10.0867, "step": 25520 }, { "epoch": 0.12749382007041374, "grad_norm": 0.09667658805847168, "learning_rate": 2.6240706901299157e-05, "loss": 10.0871, "step": 25530 }, { "epoch": 0.1275437588953532, "grad_norm": 0.09683676809072495, "learning_rate": 2.6239204986357608e-05, "loss": 10.0856, "step": 25540 }, { "epoch": 0.12759369772029264, "grad_norm": 0.09272299706935883, "learning_rate": 2.6237703071416054e-05, "loss": 10.0899, "step": 25550 }, { "epoch": 0.1276436365452321, "grad_norm": 0.08883216977119446, "learning_rate": 2.6236201156474508e-05, "loss": 10.0848, "step": 25560 }, { "epoch": 0.12769357537017154, "grad_norm": 0.0918259397149086, "learning_rate": 2.6234699241532955e-05, "loss": 10.0828, "step": 25570 }, { "epoch": 0.12774351419511099, "grad_norm": 0.0947398915886879, "learning_rate": 2.6233197326591405e-05, "loss": 10.0866, "step": 25580 }, { "epoch": 0.12779345302005043, "grad_norm": 0.09740963578224182, "learning_rate": 2.6231695411649855e-05, "loss": 10.0829, "step": 25590 }, { "epoch": 0.12784339184498988, "grad_norm": 0.09066878259181976, "learning_rate": 2.6230193496708302e-05, "loss": 10.0876, "step": 25600 }, { "epoch": 0.12789333066992933, "grad_norm": 0.09469142556190491, "learning_rate": 2.6228691581766755e-05, "loss": 10.0785, "step": 25610 }, { "epoch": 0.12794326949486878, "grad_norm": 0.09159637987613678, "learning_rate": 2.6227189666825202e-05, "loss": 10.0811, "step": 25620 }, { "epoch": 0.12799320831980823, "grad_norm": 0.09090546518564224, "learning_rate": 2.6225687751883652e-05, "loss": 10.0842, "step": 25630 }, { "epoch": 0.12804314714474768, "grad_norm": 0.0922180786728859, "learning_rate": 2.6224185836942103e-05, "loss": 10.0828, "step": 25640 }, { "epoch": 0.12809308596968713, "grad_norm": 0.09305445849895477, "learning_rate": 2.622268392200055e-05, "loss": 10.0872, "step": 25650 }, { "epoch": 0.12814302479462658, "grad_norm": 0.09471817314624786, "learning_rate": 2.6221182007059003e-05, "loss": 10.0857, "step": 25660 }, { "epoch": 0.12819296361956603, "grad_norm": 0.09348314255475998, "learning_rate": 2.621968009211745e-05, "loss": 10.0884, "step": 25670 }, { "epoch": 0.12824290244450548, "grad_norm": 0.09191960841417313, "learning_rate": 2.62181781771759e-05, "loss": 10.0895, "step": 25680 }, { "epoch": 0.12829284126944493, "grad_norm": 0.09789571166038513, "learning_rate": 2.621667626223435e-05, "loss": 10.0818, "step": 25690 }, { "epoch": 0.12834278009438438, "grad_norm": 0.09516225755214691, "learning_rate": 2.6215174347292797e-05, "loss": 10.0831, "step": 25700 }, { "epoch": 0.12839271891932383, "grad_norm": 0.09300491213798523, "learning_rate": 2.621367243235125e-05, "loss": 10.0844, "step": 25710 }, { "epoch": 0.12844265774426328, "grad_norm": 0.09599104523658752, "learning_rate": 2.6212170517409697e-05, "loss": 10.0853, "step": 25720 }, { "epoch": 0.12849259656920273, "grad_norm": 0.09136704355478287, "learning_rate": 2.6210668602468147e-05, "loss": 10.0873, "step": 25730 }, { "epoch": 0.12854253539414218, "grad_norm": 0.09239959716796875, "learning_rate": 2.6209166687526598e-05, "loss": 10.0794, "step": 25740 }, { "epoch": 0.12859247421908163, "grad_norm": 0.09590020775794983, "learning_rate": 2.6207664772585048e-05, "loss": 10.0835, "step": 25750 }, { "epoch": 0.12864241304402108, "grad_norm": 0.09230869263410568, "learning_rate": 2.6206162857643498e-05, "loss": 10.0833, "step": 25760 }, { "epoch": 0.12869235186896053, "grad_norm": 0.0960649698972702, "learning_rate": 2.6204660942701945e-05, "loss": 10.0822, "step": 25770 }, { "epoch": 0.12874229069389997, "grad_norm": 0.09332367777824402, "learning_rate": 2.6203159027760395e-05, "loss": 10.0848, "step": 25780 }, { "epoch": 0.12879222951883942, "grad_norm": 0.09813748300075531, "learning_rate": 2.6201657112818845e-05, "loss": 10.0807, "step": 25790 }, { "epoch": 0.12884216834377887, "grad_norm": 0.09281601011753082, "learning_rate": 2.6200155197877295e-05, "loss": 10.0862, "step": 25800 }, { "epoch": 0.12889210716871832, "grad_norm": 0.09574627131223679, "learning_rate": 2.6198653282935745e-05, "loss": 10.0784, "step": 25810 }, { "epoch": 0.12894204599365777, "grad_norm": 0.0939335897564888, "learning_rate": 2.6197151367994192e-05, "loss": 10.0865, "step": 25820 }, { "epoch": 0.12899198481859722, "grad_norm": 0.0975639671087265, "learning_rate": 2.6195649453052642e-05, "loss": 10.0783, "step": 25830 }, { "epoch": 0.12904192364353667, "grad_norm": 0.0939013734459877, "learning_rate": 2.6194147538111093e-05, "loss": 10.0837, "step": 25840 }, { "epoch": 0.12909186246847612, "grad_norm": 0.09316052496433258, "learning_rate": 2.6192645623169543e-05, "loss": 10.0781, "step": 25850 }, { "epoch": 0.12914180129341557, "grad_norm": 0.09889581054449081, "learning_rate": 2.6191143708227993e-05, "loss": 10.0742, "step": 25860 }, { "epoch": 0.12919174011835502, "grad_norm": 0.09303954988718033, "learning_rate": 2.618964179328644e-05, "loss": 10.0812, "step": 25870 }, { "epoch": 0.12924167894329447, "grad_norm": 0.09554919600486755, "learning_rate": 2.618813987834489e-05, "loss": 10.0706, "step": 25880 }, { "epoch": 0.12929161776823392, "grad_norm": 0.09778593480587006, "learning_rate": 2.618663796340334e-05, "loss": 10.0771, "step": 25890 }, { "epoch": 0.12934155659317337, "grad_norm": 0.09445203095674515, "learning_rate": 2.618513604846179e-05, "loss": 10.0793, "step": 25900 }, { "epoch": 0.12939149541811282, "grad_norm": 0.09079139679670334, "learning_rate": 2.618363413352024e-05, "loss": 10.0755, "step": 25910 }, { "epoch": 0.12944143424305227, "grad_norm": 5018.11767578125, "learning_rate": 2.6182132218578687e-05, "loss": 10.1051, "step": 25920 }, { "epoch": 0.12949137306799172, "grad_norm": 0.09410411864519119, "learning_rate": 2.6180630303637137e-05, "loss": 10.0779, "step": 25930 }, { "epoch": 0.12954131189293117, "grad_norm": 0.09051043540239334, "learning_rate": 2.6179128388695588e-05, "loss": 10.0751, "step": 25940 }, { "epoch": 0.12959125071787062, "grad_norm": 0.09015491604804993, "learning_rate": 2.6177626473754038e-05, "loss": 10.0803, "step": 25950 }, { "epoch": 0.12964118954281006, "grad_norm": 0.09758006781339645, "learning_rate": 2.6176124558812488e-05, "loss": 10.0734, "step": 25960 }, { "epoch": 0.12969112836774951, "grad_norm": 0.0914853885769844, "learning_rate": 2.6174622643870935e-05, "loss": 10.0797, "step": 25970 }, { "epoch": 0.12974106719268896, "grad_norm": 0.09737791121006012, "learning_rate": 2.6173120728929385e-05, "loss": 10.0749, "step": 25980 }, { "epoch": 0.1297910060176284, "grad_norm": 0.09865185618400574, "learning_rate": 2.6171618813987835e-05, "loss": 10.0764, "step": 25990 }, { "epoch": 0.12984094484256786, "grad_norm": 0.0985475480556488, "learning_rate": 2.6170116899046285e-05, "loss": 10.075, "step": 26000 }, { "epoch": 0.1298908836675073, "grad_norm": 0.09121818840503693, "learning_rate": 2.6168614984104735e-05, "loss": 10.0797, "step": 26010 }, { "epoch": 0.12994082249244676, "grad_norm": 0.10556277632713318, "learning_rate": 2.6167113069163182e-05, "loss": 10.0783, "step": 26020 }, { "epoch": 0.1299907613173862, "grad_norm": 0.0959169790148735, "learning_rate": 2.6165611154221632e-05, "loss": 10.0753, "step": 26030 }, { "epoch": 0.13004070014232566, "grad_norm": 0.09328239411115646, "learning_rate": 2.6164109239280083e-05, "loss": 10.0748, "step": 26040 }, { "epoch": 0.1300906389672651, "grad_norm": 0.08964414894580841, "learning_rate": 2.6162607324338533e-05, "loss": 10.075, "step": 26050 }, { "epoch": 0.13014057779220456, "grad_norm": 0.1068107932806015, "learning_rate": 2.6161105409396983e-05, "loss": 10.0722, "step": 26060 }, { "epoch": 0.130190516617144, "grad_norm": 0.09689830243587494, "learning_rate": 2.6159603494455433e-05, "loss": 10.0727, "step": 26070 }, { "epoch": 0.13024045544208346, "grad_norm": 0.09589334577322006, "learning_rate": 2.615810157951388e-05, "loss": 10.0663, "step": 26080 }, { "epoch": 0.1302903942670229, "grad_norm": 0.09473288059234619, "learning_rate": 2.615659966457233e-05, "loss": 10.0675, "step": 26090 }, { "epoch": 0.13034033309196236, "grad_norm": 0.09076739102602005, "learning_rate": 2.615509774963078e-05, "loss": 10.0772, "step": 26100 }, { "epoch": 0.1303902719169018, "grad_norm": 0.09733912348747253, "learning_rate": 2.615359583468923e-05, "loss": 10.0746, "step": 26110 }, { "epoch": 0.13044021074184126, "grad_norm": 0.10011307895183563, "learning_rate": 2.615209391974768e-05, "loss": 10.0657, "step": 26120 }, { "epoch": 0.1304901495667807, "grad_norm": 0.09807174652814865, "learning_rate": 2.6150592004806127e-05, "loss": 10.0688, "step": 26130 }, { "epoch": 0.13054008839172015, "grad_norm": 0.092112235724926, "learning_rate": 2.6149090089864578e-05, "loss": 10.0723, "step": 26140 }, { "epoch": 0.1305900272166596, "grad_norm": 0.09255208820104599, "learning_rate": 2.6147588174923028e-05, "loss": 10.0682, "step": 26150 }, { "epoch": 0.13063996604159905, "grad_norm": 0.09178891032934189, "learning_rate": 2.6146086259981478e-05, "loss": 10.0694, "step": 26160 }, { "epoch": 0.1306899048665385, "grad_norm": 0.09472060203552246, "learning_rate": 2.6144584345039928e-05, "loss": 10.0669, "step": 26170 }, { "epoch": 0.13073984369147795, "grad_norm": 0.08864433318376541, "learning_rate": 2.6143082430098375e-05, "loss": 10.0716, "step": 26180 }, { "epoch": 0.1307897825164174, "grad_norm": 0.09347887337207794, "learning_rate": 2.6141580515156825e-05, "loss": 10.0755, "step": 26190 }, { "epoch": 0.13083972134135685, "grad_norm": 0.09428052604198456, "learning_rate": 2.6140078600215275e-05, "loss": 10.0727, "step": 26200 }, { "epoch": 0.13088966016629627, "grad_norm": 0.09582565724849701, "learning_rate": 2.6138576685273725e-05, "loss": 10.0665, "step": 26210 }, { "epoch": 0.13093959899123572, "grad_norm": 0.09516657888889313, "learning_rate": 2.6137074770332176e-05, "loss": 10.0645, "step": 26220 }, { "epoch": 0.13098953781617517, "grad_norm": 0.10089933127164841, "learning_rate": 2.6135572855390622e-05, "loss": 10.0687, "step": 26230 }, { "epoch": 0.13103947664111462, "grad_norm": 0.09348998963832855, "learning_rate": 2.6134070940449073e-05, "loss": 10.0749, "step": 26240 }, { "epoch": 0.13108941546605407, "grad_norm": 0.09638164937496185, "learning_rate": 2.6132569025507523e-05, "loss": 10.0666, "step": 26250 }, { "epoch": 0.13113935429099352, "grad_norm": 0.08938256651163101, "learning_rate": 2.6131067110565973e-05, "loss": 10.0736, "step": 26260 }, { "epoch": 0.13118929311593297, "grad_norm": 0.09375530481338501, "learning_rate": 2.6129565195624423e-05, "loss": 10.0723, "step": 26270 }, { "epoch": 0.13123923194087242, "grad_norm": 0.09841980785131454, "learning_rate": 2.612806328068287e-05, "loss": 10.0654, "step": 26280 }, { "epoch": 0.13128917076581187, "grad_norm": 0.0966048538684845, "learning_rate": 2.612656136574132e-05, "loss": 10.0719, "step": 26290 }, { "epoch": 0.13133910959075132, "grad_norm": 0.09200683981180191, "learning_rate": 2.612505945079977e-05, "loss": 10.072, "step": 26300 }, { "epoch": 0.13138904841569077, "grad_norm": 0.09424161165952682, "learning_rate": 2.612355753585822e-05, "loss": 10.067, "step": 26310 }, { "epoch": 0.13143898724063022, "grad_norm": 0.09710439294576645, "learning_rate": 2.612205562091667e-05, "loss": 10.0681, "step": 26320 }, { "epoch": 0.13148892606556967, "grad_norm": 0.09168367087841034, "learning_rate": 2.6120553705975117e-05, "loss": 10.0653, "step": 26330 }, { "epoch": 0.13153886489050912, "grad_norm": 0.09984847903251648, "learning_rate": 2.6119051791033568e-05, "loss": 10.0658, "step": 26340 }, { "epoch": 0.13158880371544857, "grad_norm": 0.09909839183092117, "learning_rate": 2.6117549876092018e-05, "loss": 10.0639, "step": 26350 }, { "epoch": 0.13163874254038802, "grad_norm": 0.09866788238286972, "learning_rate": 2.6116047961150468e-05, "loss": 10.0642, "step": 26360 }, { "epoch": 0.13168868136532746, "grad_norm": 0.0965615063905716, "learning_rate": 2.6114546046208918e-05, "loss": 10.0713, "step": 26370 }, { "epoch": 0.13173862019026691, "grad_norm": 0.09519165009260178, "learning_rate": 2.6113044131267365e-05, "loss": 10.0636, "step": 26380 }, { "epoch": 0.13178855901520636, "grad_norm": 0.10046073794364929, "learning_rate": 2.611154221632582e-05, "loss": 10.0641, "step": 26390 }, { "epoch": 0.1318384978401458, "grad_norm": 0.09428393840789795, "learning_rate": 2.6110040301384265e-05, "loss": 10.0645, "step": 26400 }, { "epoch": 0.13188843666508526, "grad_norm": 0.09596385806798935, "learning_rate": 2.6108538386442715e-05, "loss": 10.0841, "step": 26410 }, { "epoch": 0.1319383754900247, "grad_norm": 0.0943203791975975, "learning_rate": 2.6107036471501166e-05, "loss": 10.0695, "step": 26420 }, { "epoch": 0.13198831431496416, "grad_norm": 0.0989902913570404, "learning_rate": 2.6105534556559612e-05, "loss": 10.0657, "step": 26430 }, { "epoch": 0.1320382531399036, "grad_norm": 0.0922727957367897, "learning_rate": 2.6104032641618066e-05, "loss": 10.5972, "step": 26440 }, { "epoch": 0.13208819196484306, "grad_norm": 0.09232750535011292, "learning_rate": 2.6102530726676513e-05, "loss": 10.063, "step": 26450 }, { "epoch": 0.1321381307897825, "grad_norm": 0.09481442719697952, "learning_rate": 2.6101028811734963e-05, "loss": 10.0732, "step": 26460 }, { "epoch": 0.13218806961472196, "grad_norm": 0.09335809201002121, "learning_rate": 2.6099526896793413e-05, "loss": 10.0669, "step": 26470 }, { "epoch": 0.1322380084396614, "grad_norm": 0.10007120668888092, "learning_rate": 2.609802498185186e-05, "loss": 10.0633, "step": 26480 }, { "epoch": 0.13228794726460086, "grad_norm": 0.09182542562484741, "learning_rate": 2.6096523066910313e-05, "loss": 10.0626, "step": 26490 }, { "epoch": 0.1323378860895403, "grad_norm": 0.08880601823329926, "learning_rate": 2.609502115196876e-05, "loss": 10.0639, "step": 26500 }, { "epoch": 0.13238782491447976, "grad_norm": 0.09500660002231598, "learning_rate": 2.609351923702721e-05, "loss": 10.0594, "step": 26510 }, { "epoch": 0.1324377637394192, "grad_norm": 0.09459840506315231, "learning_rate": 2.609201732208566e-05, "loss": 10.0689, "step": 26520 }, { "epoch": 0.13248770256435866, "grad_norm": 0.09772436320781708, "learning_rate": 2.6090515407144107e-05, "loss": 10.0646, "step": 26530 }, { "epoch": 0.1325376413892981, "grad_norm": 0.09241725504398346, "learning_rate": 2.608901349220256e-05, "loss": 10.0626, "step": 26540 }, { "epoch": 0.13258758021423755, "grad_norm": 0.09605549275875092, "learning_rate": 2.6087511577261008e-05, "loss": 10.062, "step": 26550 }, { "epoch": 0.132637519039177, "grad_norm": 0.08990667015314102, "learning_rate": 2.6086009662319458e-05, "loss": 10.0607, "step": 26560 }, { "epoch": 0.13268745786411645, "grad_norm": 0.09539429098367691, "learning_rate": 2.6084507747377908e-05, "loss": 10.0607, "step": 26570 }, { "epoch": 0.1327373966890559, "grad_norm": 0.09879159927368164, "learning_rate": 2.6083005832436355e-05, "loss": 10.059, "step": 26580 }, { "epoch": 0.13278733551399535, "grad_norm": 0.09552617371082306, "learning_rate": 2.608150391749481e-05, "loss": 10.0612, "step": 26590 }, { "epoch": 0.1328372743389348, "grad_norm": 0.09348921477794647, "learning_rate": 2.6080002002553255e-05, "loss": 10.062, "step": 26600 }, { "epoch": 0.13288721316387425, "grad_norm": 0.0956648588180542, "learning_rate": 2.6078500087611705e-05, "loss": 10.0591, "step": 26610 }, { "epoch": 0.1329371519888137, "grad_norm": 0.09512264281511307, "learning_rate": 2.6076998172670156e-05, "loss": 10.0588, "step": 26620 }, { "epoch": 0.13298709081375315, "grad_norm": 0.09354131668806076, "learning_rate": 2.6075496257728602e-05, "loss": 10.0635, "step": 26630 }, { "epoch": 0.1330370296386926, "grad_norm": 0.09954062849283218, "learning_rate": 2.6073994342787056e-05, "loss": 10.0639, "step": 26640 }, { "epoch": 0.13308696846363205, "grad_norm": 0.09436924755573273, "learning_rate": 2.6072492427845503e-05, "loss": 10.0608, "step": 26650 }, { "epoch": 0.1331369072885715, "grad_norm": 0.09259260445833206, "learning_rate": 2.6070990512903953e-05, "loss": 10.0609, "step": 26660 }, { "epoch": 0.13318684611351095, "grad_norm": 0.09337363392114639, "learning_rate": 2.6069488597962403e-05, "loss": 10.0629, "step": 26670 }, { "epoch": 0.1332367849384504, "grad_norm": 0.09187287837266922, "learning_rate": 2.606798668302085e-05, "loss": 10.0571, "step": 26680 }, { "epoch": 0.13328672376338985, "grad_norm": 0.09519325196743011, "learning_rate": 2.6066484768079303e-05, "loss": 10.063, "step": 26690 }, { "epoch": 0.1333366625883293, "grad_norm": 0.09201279282569885, "learning_rate": 2.606498285313775e-05, "loss": 10.0609, "step": 26700 }, { "epoch": 0.13338660141326875, "grad_norm": 0.09440702199935913, "learning_rate": 2.6063480938196204e-05, "loss": 10.0543, "step": 26710 }, { "epoch": 0.1334365402382082, "grad_norm": 0.09421040117740631, "learning_rate": 2.606197902325465e-05, "loss": 10.0605, "step": 26720 }, { "epoch": 0.13348647906314764, "grad_norm": 0.09064962714910507, "learning_rate": 2.6060477108313097e-05, "loss": 10.0651, "step": 26730 }, { "epoch": 0.1335364178880871, "grad_norm": 0.10387849062681198, "learning_rate": 2.605897519337155e-05, "loss": 10.0539, "step": 26740 }, { "epoch": 0.13358635671302654, "grad_norm": 0.09637302160263062, "learning_rate": 2.6057473278429998e-05, "loss": 10.0546, "step": 26750 }, { "epoch": 0.133636295537966, "grad_norm": 0.09184058010578156, "learning_rate": 2.605597136348845e-05, "loss": 10.0554, "step": 26760 }, { "epoch": 0.13368623436290544, "grad_norm": 0.09594525396823883, "learning_rate": 2.6054469448546898e-05, "loss": 10.0525, "step": 26770 }, { "epoch": 0.1337361731878449, "grad_norm": 0.09645157307386398, "learning_rate": 2.6052967533605345e-05, "loss": 10.0595, "step": 26780 }, { "epoch": 0.13378611201278434, "grad_norm": 0.09152582287788391, "learning_rate": 2.60514656186638e-05, "loss": 10.0652, "step": 26790 }, { "epoch": 0.1338360508377238, "grad_norm": 0.09593851864337921, "learning_rate": 2.6049963703722245e-05, "loss": 10.0514, "step": 26800 }, { "epoch": 0.13388598966266324, "grad_norm": 0.09910731017589569, "learning_rate": 2.60484617887807e-05, "loss": 10.0577, "step": 26810 }, { "epoch": 0.1339359284876027, "grad_norm": 0.09356366842985153, "learning_rate": 2.6046959873839146e-05, "loss": 10.0535, "step": 26820 }, { "epoch": 0.13398586731254214, "grad_norm": 0.09344334155321121, "learning_rate": 2.6045457958897592e-05, "loss": 10.0619, "step": 26830 }, { "epoch": 0.1340358061374816, "grad_norm": 0.09502086043357849, "learning_rate": 2.6043956043956046e-05, "loss": 10.0514, "step": 26840 }, { "epoch": 0.13408574496242104, "grad_norm": 0.0968647301197052, "learning_rate": 2.6042454129014493e-05, "loss": 10.0599, "step": 26850 }, { "epoch": 0.1341356837873605, "grad_norm": 0.09345351159572601, "learning_rate": 2.6040952214072946e-05, "loss": 10.0544, "step": 26860 }, { "epoch": 0.13418562261229994, "grad_norm": 0.09822539240121841, "learning_rate": 2.6039450299131393e-05, "loss": 10.0519, "step": 26870 }, { "epoch": 0.1342355614372394, "grad_norm": 0.09218066930770874, "learning_rate": 2.603794838418984e-05, "loss": 10.0573, "step": 26880 }, { "epoch": 0.13428550026217884, "grad_norm": 0.09610606729984283, "learning_rate": 2.6036446469248293e-05, "loss": 10.0547, "step": 26890 }, { "epoch": 0.13433543908711829, "grad_norm": 0.09326432645320892, "learning_rate": 2.603494455430674e-05, "loss": 10.0527, "step": 26900 }, { "epoch": 0.13438537791205774, "grad_norm": 0.0933103859424591, "learning_rate": 2.6033442639365194e-05, "loss": 10.0503, "step": 26910 }, { "epoch": 0.13443531673699718, "grad_norm": 0.09903916716575623, "learning_rate": 2.603194072442364e-05, "loss": 10.0514, "step": 26920 }, { "epoch": 0.13448525556193663, "grad_norm": 0.10167834907770157, "learning_rate": 2.6030438809482087e-05, "loss": 10.0525, "step": 26930 }, { "epoch": 0.13453519438687608, "grad_norm": 0.09627428650856018, "learning_rate": 2.602893689454054e-05, "loss": 10.0544, "step": 26940 }, { "epoch": 0.13458513321181553, "grad_norm": 0.09547838568687439, "learning_rate": 2.6027434979598988e-05, "loss": 10.0523, "step": 26950 }, { "epoch": 0.13463507203675498, "grad_norm": 0.0934177041053772, "learning_rate": 2.602593306465744e-05, "loss": 10.0577, "step": 26960 }, { "epoch": 0.13468501086169443, "grad_norm": 0.09201879799365997, "learning_rate": 2.6024431149715888e-05, "loss": 10.0561, "step": 26970 }, { "epoch": 0.13473494968663388, "grad_norm": 0.10229359567165375, "learning_rate": 2.6022929234774335e-05, "loss": 10.0537, "step": 26980 }, { "epoch": 0.13478488851157333, "grad_norm": 0.0951157957315445, "learning_rate": 2.602142731983279e-05, "loss": 10.0489, "step": 26990 }, { "epoch": 0.13483482733651278, "grad_norm": 0.09461617469787598, "learning_rate": 2.6019925404891235e-05, "loss": 10.0504, "step": 27000 }, { "epoch": 0.13488476616145223, "grad_norm": 0.09404211491346359, "learning_rate": 2.601842348994969e-05, "loss": 10.05, "step": 27010 }, { "epoch": 0.13493470498639168, "grad_norm": 0.09388327598571777, "learning_rate": 2.6016921575008136e-05, "loss": 10.0507, "step": 27020 }, { "epoch": 0.13498464381133113, "grad_norm": 0.08963081240653992, "learning_rate": 2.6015419660066582e-05, "loss": 10.0509, "step": 27030 }, { "epoch": 0.13503458263627058, "grad_norm": 0.09284106642007828, "learning_rate": 2.6013917745125036e-05, "loss": 10.0477, "step": 27040 }, { "epoch": 0.13508452146121003, "grad_norm": 0.09641976654529572, "learning_rate": 2.6012415830183483e-05, "loss": 10.0484, "step": 27050 }, { "epoch": 0.13513446028614948, "grad_norm": 0.09596869349479675, "learning_rate": 2.6010913915241936e-05, "loss": 10.0532, "step": 27060 }, { "epoch": 0.13518439911108893, "grad_norm": 0.09437578171491623, "learning_rate": 2.6009412000300383e-05, "loss": 10.0535, "step": 27070 }, { "epoch": 0.13523433793602838, "grad_norm": 0.09599731862545013, "learning_rate": 2.6007910085358833e-05, "loss": 10.0469, "step": 27080 }, { "epoch": 0.13528427676096783, "grad_norm": 0.09609924256801605, "learning_rate": 2.6006408170417284e-05, "loss": 10.0484, "step": 27090 }, { "epoch": 0.13533421558590727, "grad_norm": 0.09731438755989075, "learning_rate": 2.600490625547573e-05, "loss": 10.0502, "step": 27100 }, { "epoch": 0.13538415441084672, "grad_norm": 0.09791937470436096, "learning_rate": 2.6003404340534184e-05, "loss": 10.0518, "step": 27110 }, { "epoch": 0.13543409323578617, "grad_norm": 0.10084173083305359, "learning_rate": 2.600190242559263e-05, "loss": 10.0464, "step": 27120 }, { "epoch": 0.13548403206072562, "grad_norm": 0.09352555871009827, "learning_rate": 2.600040051065108e-05, "loss": 10.0546, "step": 27130 }, { "epoch": 0.13553397088566507, "grad_norm": 0.09164499491453171, "learning_rate": 2.599889859570953e-05, "loss": 10.05, "step": 27140 }, { "epoch": 0.13558390971060452, "grad_norm": 0.09976156800985336, "learning_rate": 2.5997396680767978e-05, "loss": 10.0509, "step": 27150 }, { "epoch": 0.13563384853554397, "grad_norm": 0.09886104613542557, "learning_rate": 2.599589476582643e-05, "loss": 10.0528, "step": 27160 }, { "epoch": 0.13568378736048342, "grad_norm": 0.09629525989294052, "learning_rate": 2.5994392850884878e-05, "loss": 10.0424, "step": 27170 }, { "epoch": 0.13573372618542287, "grad_norm": 0.09974990785121918, "learning_rate": 2.599289093594333e-05, "loss": 10.0463, "step": 27180 }, { "epoch": 0.13578366501036232, "grad_norm": 0.09218470752239227, "learning_rate": 2.599138902100178e-05, "loss": 10.0519, "step": 27190 }, { "epoch": 0.13583360383530174, "grad_norm": 0.09512577950954437, "learning_rate": 2.5989887106060225e-05, "loss": 10.0465, "step": 27200 }, { "epoch": 0.1358835426602412, "grad_norm": 0.09620299190282822, "learning_rate": 2.598838519111868e-05, "loss": 10.0529, "step": 27210 }, { "epoch": 0.13593348148518064, "grad_norm": 0.09048976749181747, "learning_rate": 2.5986883276177126e-05, "loss": 10.0434, "step": 27220 }, { "epoch": 0.1359834203101201, "grad_norm": 0.09220554679632187, "learning_rate": 2.5985381361235576e-05, "loss": 10.049, "step": 27230 }, { "epoch": 0.13603335913505954, "grad_norm": 0.0914304107427597, "learning_rate": 2.5983879446294026e-05, "loss": 10.0438, "step": 27240 }, { "epoch": 0.136083297959999, "grad_norm": 0.09436330199241638, "learning_rate": 2.5982377531352473e-05, "loss": 10.047, "step": 27250 }, { "epoch": 0.13613323678493844, "grad_norm": 0.09602109342813492, "learning_rate": 2.5980875616410926e-05, "loss": 10.0436, "step": 27260 }, { "epoch": 0.1361831756098779, "grad_norm": 0.09520605206489563, "learning_rate": 2.5979373701469373e-05, "loss": 10.0481, "step": 27270 }, { "epoch": 0.13623311443481734, "grad_norm": 0.09378485381603241, "learning_rate": 2.5977871786527823e-05, "loss": 10.049, "step": 27280 }, { "epoch": 0.1362830532597568, "grad_norm": 0.08965323120355606, "learning_rate": 2.5976369871586274e-05, "loss": 10.0505, "step": 27290 }, { "epoch": 0.13633299208469624, "grad_norm": 0.09475590288639069, "learning_rate": 2.597486795664472e-05, "loss": 10.0487, "step": 27300 }, { "epoch": 0.13638293090963569, "grad_norm": 0.09472818672657013, "learning_rate": 2.5973366041703174e-05, "loss": 10.0429, "step": 27310 }, { "epoch": 0.13643286973457514, "grad_norm": 0.09863143414258957, "learning_rate": 2.597186412676162e-05, "loss": 10.0474, "step": 27320 }, { "epoch": 0.13648280855951458, "grad_norm": 0.0966014415025711, "learning_rate": 2.597036221182007e-05, "loss": 10.0459, "step": 27330 }, { "epoch": 0.13653274738445403, "grad_norm": 0.09497446566820145, "learning_rate": 2.596886029687852e-05, "loss": 10.0439, "step": 27340 }, { "epoch": 0.13658268620939348, "grad_norm": 0.09334766864776611, "learning_rate": 2.5967358381936968e-05, "loss": 10.0404, "step": 27350 }, { "epoch": 0.13663262503433293, "grad_norm": 0.09661349654197693, "learning_rate": 2.596585646699542e-05, "loss": 10.0469, "step": 27360 }, { "epoch": 0.13668256385927238, "grad_norm": 0.0978384017944336, "learning_rate": 2.5964354552053868e-05, "loss": 10.0472, "step": 27370 }, { "epoch": 0.13673250268421183, "grad_norm": 0.09664426743984222, "learning_rate": 2.596285263711232e-05, "loss": 10.0426, "step": 27380 }, { "epoch": 0.13678244150915128, "grad_norm": 0.09428177773952484, "learning_rate": 2.596135072217077e-05, "loss": 10.0432, "step": 27390 }, { "epoch": 0.13683238033409073, "grad_norm": 0.09939315170049667, "learning_rate": 2.595984880722922e-05, "loss": 10.0388, "step": 27400 }, { "epoch": 0.13688231915903018, "grad_norm": 0.09356003999710083, "learning_rate": 2.595834689228767e-05, "loss": 10.0395, "step": 27410 }, { "epoch": 0.13693225798396963, "grad_norm": 0.09405787289142609, "learning_rate": 2.5956844977346116e-05, "loss": 10.0424, "step": 27420 }, { "epoch": 0.13698219680890908, "grad_norm": 0.09802021831274033, "learning_rate": 2.5955343062404566e-05, "loss": 10.0428, "step": 27430 }, { "epoch": 0.13703213563384853, "grad_norm": 0.10091257095336914, "learning_rate": 2.5953841147463016e-05, "loss": 10.0415, "step": 27440 }, { "epoch": 0.13708207445878798, "grad_norm": 0.09429014474153519, "learning_rate": 2.5952339232521466e-05, "loss": 10.0421, "step": 27450 }, { "epoch": 0.13713201328372743, "grad_norm": 0.09676757454872131, "learning_rate": 2.5950837317579916e-05, "loss": 10.0448, "step": 27460 }, { "epoch": 0.13718195210866688, "grad_norm": 0.09487222135066986, "learning_rate": 2.5949335402638363e-05, "loss": 10.0386, "step": 27470 }, { "epoch": 0.13723189093360633, "grad_norm": 0.09765653312206268, "learning_rate": 2.5947833487696813e-05, "loss": 10.0413, "step": 27480 }, { "epoch": 0.13728182975854578, "grad_norm": 0.09836971014738083, "learning_rate": 2.5946331572755264e-05, "loss": 10.0376, "step": 27490 }, { "epoch": 0.13733176858348523, "grad_norm": 0.09549671411514282, "learning_rate": 2.5944829657813714e-05, "loss": 10.0385, "step": 27500 }, { "epoch": 0.13738170740842467, "grad_norm": 0.08956001698970795, "learning_rate": 2.5943327742872164e-05, "loss": 10.0366, "step": 27510 }, { "epoch": 0.13743164623336412, "grad_norm": 0.09554048627614975, "learning_rate": 2.594182582793061e-05, "loss": 10.0428, "step": 27520 }, { "epoch": 0.13748158505830357, "grad_norm": 0.10129525512456894, "learning_rate": 2.594032391298906e-05, "loss": 10.0432, "step": 27530 }, { "epoch": 0.13753152388324302, "grad_norm": 0.09291373938322067, "learning_rate": 2.593882199804751e-05, "loss": 10.0404, "step": 27540 }, { "epoch": 0.13758146270818247, "grad_norm": 0.09440364688634872, "learning_rate": 2.593732008310596e-05, "loss": 10.0399, "step": 27550 }, { "epoch": 0.13763140153312192, "grad_norm": 0.09612912684679031, "learning_rate": 2.593581816816441e-05, "loss": 10.0394, "step": 27560 }, { "epoch": 0.13768134035806137, "grad_norm": 0.09454838931560516, "learning_rate": 2.5934316253222858e-05, "loss": 10.0397, "step": 27570 }, { "epoch": 0.13773127918300082, "grad_norm": 0.10840394347906113, "learning_rate": 2.593281433828131e-05, "loss": 10.0346, "step": 27580 }, { "epoch": 0.13778121800794027, "grad_norm": 0.09229467809200287, "learning_rate": 2.593131242333976e-05, "loss": 10.0374, "step": 27590 }, { "epoch": 0.13783115683287972, "grad_norm": 0.09308604896068573, "learning_rate": 2.592981050839821e-05, "loss": 10.0424, "step": 27600 }, { "epoch": 0.13788109565781917, "grad_norm": 0.09472574293613434, "learning_rate": 2.592830859345666e-05, "loss": 10.0377, "step": 27610 }, { "epoch": 0.13793103448275862, "grad_norm": 0.09195975959300995, "learning_rate": 2.5926806678515106e-05, "loss": 10.0403, "step": 27620 }, { "epoch": 0.13798097330769807, "grad_norm": 0.09835057705640793, "learning_rate": 2.5925304763573556e-05, "loss": 10.0352, "step": 27630 }, { "epoch": 0.13803091213263752, "grad_norm": 0.10062667727470398, "learning_rate": 2.5923802848632006e-05, "loss": 10.0336, "step": 27640 }, { "epoch": 0.13808085095757697, "grad_norm": 0.08686701953411102, "learning_rate": 2.5922300933690456e-05, "loss": 10.0383, "step": 27650 }, { "epoch": 0.13813078978251642, "grad_norm": 0.09707959741353989, "learning_rate": 2.5920799018748906e-05, "loss": 10.0345, "step": 27660 }, { "epoch": 0.13818072860745587, "grad_norm": 0.09171800315380096, "learning_rate": 2.5919297103807353e-05, "loss": 10.0336, "step": 27670 }, { "epoch": 0.13823066743239532, "grad_norm": 0.0923093855381012, "learning_rate": 2.5917795188865803e-05, "loss": 10.0352, "step": 27680 }, { "epoch": 0.13828060625733476, "grad_norm": 0.0912717804312706, "learning_rate": 2.5916293273924254e-05, "loss": 10.0339, "step": 27690 }, { "epoch": 0.13833054508227421, "grad_norm": 0.09480732679367065, "learning_rate": 2.5914791358982704e-05, "loss": 10.0389, "step": 27700 }, { "epoch": 0.13838048390721366, "grad_norm": 0.09554393589496613, "learning_rate": 2.5913289444041154e-05, "loss": 10.0352, "step": 27710 }, { "epoch": 0.1384304227321531, "grad_norm": 0.09049583226442337, "learning_rate": 2.5911787529099604e-05, "loss": 10.032, "step": 27720 }, { "epoch": 0.13848036155709256, "grad_norm": 0.09413400292396545, "learning_rate": 2.591028561415805e-05, "loss": 10.0393, "step": 27730 }, { "epoch": 0.138530300382032, "grad_norm": 0.09317301958799362, "learning_rate": 2.59087836992165e-05, "loss": 10.0328, "step": 27740 }, { "epoch": 0.13858023920697146, "grad_norm": 0.09751195460557938, "learning_rate": 2.590728178427495e-05, "loss": 10.0299, "step": 27750 }, { "epoch": 0.1386301780319109, "grad_norm": 0.09604477882385254, "learning_rate": 2.59057798693334e-05, "loss": 10.0348, "step": 27760 }, { "epoch": 0.13868011685685036, "grad_norm": 0.09635339677333832, "learning_rate": 2.590427795439185e-05, "loss": 10.0346, "step": 27770 }, { "epoch": 0.1387300556817898, "grad_norm": 0.10057932138442993, "learning_rate": 2.59027760394503e-05, "loss": 10.0319, "step": 27780 }, { "epoch": 0.13877999450672926, "grad_norm": 0.09397371113300323, "learning_rate": 2.590127412450875e-05, "loss": 10.0331, "step": 27790 }, { "epoch": 0.1388299333316687, "grad_norm": 0.09505651146173477, "learning_rate": 2.58997722095672e-05, "loss": 10.0293, "step": 27800 }, { "epoch": 0.13887987215660816, "grad_norm": 0.09163686633110046, "learning_rate": 2.589827029462565e-05, "loss": 10.0265, "step": 27810 }, { "epoch": 0.1389298109815476, "grad_norm": 0.09803500026464462, "learning_rate": 2.58967683796841e-05, "loss": 10.0362, "step": 27820 }, { "epoch": 0.13897974980648706, "grad_norm": 0.1022925078868866, "learning_rate": 2.5895266464742546e-05, "loss": 10.033, "step": 27830 }, { "epoch": 0.1390296886314265, "grad_norm": 0.09815297275781631, "learning_rate": 2.5893764549800996e-05, "loss": 10.0308, "step": 27840 }, { "epoch": 0.13907962745636596, "grad_norm": 0.09278080612421036, "learning_rate": 2.5892262634859446e-05, "loss": 10.0297, "step": 27850 }, { "epoch": 0.1391295662813054, "grad_norm": 0.09531033784151077, "learning_rate": 2.5890760719917896e-05, "loss": 10.0336, "step": 27860 }, { "epoch": 0.13917950510624486, "grad_norm": 0.1017637774348259, "learning_rate": 2.5889258804976347e-05, "loss": 10.0323, "step": 27870 }, { "epoch": 0.1392294439311843, "grad_norm": 0.08980844169855118, "learning_rate": 2.5887756890034793e-05, "loss": 10.0351, "step": 27880 }, { "epoch": 0.13927938275612375, "grad_norm": 0.09667633473873138, "learning_rate": 2.5886254975093244e-05, "loss": 10.0344, "step": 27890 }, { "epoch": 0.1393293215810632, "grad_norm": 0.09221838414669037, "learning_rate": 2.5884753060151694e-05, "loss": 10.0287, "step": 27900 }, { "epoch": 0.13937926040600265, "grad_norm": 0.09094412624835968, "learning_rate": 2.5883251145210144e-05, "loss": 10.0363, "step": 27910 }, { "epoch": 0.1394291992309421, "grad_norm": 0.09816347807645798, "learning_rate": 2.5881749230268594e-05, "loss": 10.0317, "step": 27920 }, { "epoch": 0.13947913805588155, "grad_norm": 0.09702466428279877, "learning_rate": 2.588024731532704e-05, "loss": 10.0289, "step": 27930 }, { "epoch": 0.139529076880821, "grad_norm": 0.09658083319664001, "learning_rate": 2.587874540038549e-05, "loss": 10.0304, "step": 27940 }, { "epoch": 0.13957901570576045, "grad_norm": 0.09620265662670135, "learning_rate": 2.587724348544394e-05, "loss": 10.0229, "step": 27950 }, { "epoch": 0.1396289545306999, "grad_norm": 0.09803494811058044, "learning_rate": 2.587574157050239e-05, "loss": 10.0264, "step": 27960 }, { "epoch": 0.13967889335563935, "grad_norm": 0.09247355163097382, "learning_rate": 2.587423965556084e-05, "loss": 10.0331, "step": 27970 }, { "epoch": 0.1397288321805788, "grad_norm": 0.09678816050291061, "learning_rate": 2.587273774061929e-05, "loss": 10.0242, "step": 27980 }, { "epoch": 0.13977877100551825, "grad_norm": 0.0913391187787056, "learning_rate": 2.587123582567774e-05, "loss": 10.0266, "step": 27990 }, { "epoch": 0.1398287098304577, "grad_norm": 0.0930318757891655, "learning_rate": 2.586973391073619e-05, "loss": 10.0247, "step": 28000 }, { "epoch": 0.13987864865539715, "grad_norm": 0.09507840126752853, "learning_rate": 2.586823199579464e-05, "loss": 10.0282, "step": 28010 }, { "epoch": 0.1399285874803366, "grad_norm": 0.09485821425914764, "learning_rate": 2.586673008085309e-05, "loss": 10.0286, "step": 28020 }, { "epoch": 0.13997852630527605, "grad_norm": 0.09455297142267227, "learning_rate": 2.5865228165911536e-05, "loss": 10.0309, "step": 28030 }, { "epoch": 0.1400284651302155, "grad_norm": 0.0947793647646904, "learning_rate": 2.586372625096999e-05, "loss": 10.027, "step": 28040 }, { "epoch": 0.14007840395515495, "grad_norm": 0.09246467053890228, "learning_rate": 2.5862224336028436e-05, "loss": 10.0264, "step": 28050 }, { "epoch": 0.1401283427800944, "grad_norm": 0.09456957876682281, "learning_rate": 2.5860722421086886e-05, "loss": 10.0264, "step": 28060 }, { "epoch": 0.14017828160503384, "grad_norm": 0.09399653971195221, "learning_rate": 2.5859220506145337e-05, "loss": 10.0288, "step": 28070 }, { "epoch": 0.1402282204299733, "grad_norm": 0.09784771502017975, "learning_rate": 2.5857718591203783e-05, "loss": 10.0218, "step": 28080 }, { "epoch": 0.14027815925491274, "grad_norm": 0.09228380024433136, "learning_rate": 2.5856216676262237e-05, "loss": 10.0276, "step": 28090 }, { "epoch": 0.1403280980798522, "grad_norm": 0.09873951226472855, "learning_rate": 2.5854714761320684e-05, "loss": 10.0257, "step": 28100 }, { "epoch": 0.14037803690479164, "grad_norm": 0.09930938482284546, "learning_rate": 2.5853212846379134e-05, "loss": 10.0304, "step": 28110 }, { "epoch": 0.1404279757297311, "grad_norm": 0.09772654622793198, "learning_rate": 2.5851710931437584e-05, "loss": 10.0231, "step": 28120 }, { "epoch": 0.14047791455467054, "grad_norm": 0.08995553106069565, "learning_rate": 2.585020901649603e-05, "loss": 10.0225, "step": 28130 }, { "epoch": 0.14052785337961, "grad_norm": 0.09744606912136078, "learning_rate": 2.5848707101554484e-05, "loss": 10.0267, "step": 28140 }, { "epoch": 0.14057779220454944, "grad_norm": 0.09355470538139343, "learning_rate": 2.584720518661293e-05, "loss": 10.0261, "step": 28150 }, { "epoch": 0.1406277310294889, "grad_norm": 0.097645103931427, "learning_rate": 2.584570327167138e-05, "loss": 10.0217, "step": 28160 }, { "epoch": 0.14067766985442834, "grad_norm": 0.09685888141393661, "learning_rate": 2.584420135672983e-05, "loss": 10.0217, "step": 28170 }, { "epoch": 0.1407276086793678, "grad_norm": 0.09667977690696716, "learning_rate": 2.584269944178828e-05, "loss": 10.0277, "step": 28180 }, { "epoch": 0.1407775475043072, "grad_norm": 0.09020134806632996, "learning_rate": 2.5841197526846732e-05, "loss": 10.0203, "step": 28190 }, { "epoch": 0.14082748632924666, "grad_norm": 0.09370194375514984, "learning_rate": 2.583969561190518e-05, "loss": 10.0225, "step": 28200 }, { "epoch": 0.1408774251541861, "grad_norm": 0.09775953739881516, "learning_rate": 2.583819369696363e-05, "loss": 10.0206, "step": 28210 }, { "epoch": 0.14092736397912556, "grad_norm": 0.09635622054338455, "learning_rate": 2.583669178202208e-05, "loss": 10.0231, "step": 28220 }, { "epoch": 0.140977302804065, "grad_norm": 0.09073793143033981, "learning_rate": 2.5835189867080526e-05, "loss": 10.0269, "step": 28230 }, { "epoch": 0.14102724162900446, "grad_norm": 0.10052619129419327, "learning_rate": 2.583368795213898e-05, "loss": 10.0258, "step": 28240 }, { "epoch": 0.1410771804539439, "grad_norm": 0.09381050616502762, "learning_rate": 2.5832186037197426e-05, "loss": 10.0208, "step": 28250 }, { "epoch": 0.14112711927888336, "grad_norm": 0.10019423812627792, "learning_rate": 2.5830684122255876e-05, "loss": 10.0195, "step": 28260 }, { "epoch": 0.1411770581038228, "grad_norm": 0.09306546300649643, "learning_rate": 2.5829182207314327e-05, "loss": 10.0191, "step": 28270 }, { "epoch": 0.14122699692876226, "grad_norm": 0.09635606408119202, "learning_rate": 2.5827680292372773e-05, "loss": 10.0268, "step": 28280 }, { "epoch": 0.1412769357537017, "grad_norm": 0.10159686952829361, "learning_rate": 2.5826178377431227e-05, "loss": 10.0214, "step": 28290 }, { "epoch": 0.14132687457864115, "grad_norm": 0.09453240036964417, "learning_rate": 2.5824676462489674e-05, "loss": 10.0215, "step": 28300 }, { "epoch": 0.1413768134035806, "grad_norm": 0.09013237059116364, "learning_rate": 2.5823174547548124e-05, "loss": 10.024, "step": 28310 }, { "epoch": 0.14142675222852005, "grad_norm": 0.0938793197274208, "learning_rate": 2.5821672632606574e-05, "loss": 10.0176, "step": 28320 }, { "epoch": 0.1414766910534595, "grad_norm": 0.09485707432031631, "learning_rate": 2.582017071766502e-05, "loss": 10.0171, "step": 28330 }, { "epoch": 0.14152662987839895, "grad_norm": 0.10150688141584396, "learning_rate": 2.5818668802723474e-05, "loss": 10.019, "step": 28340 }, { "epoch": 0.1415765687033384, "grad_norm": 0.09411424398422241, "learning_rate": 2.581716688778192e-05, "loss": 10.0231, "step": 28350 }, { "epoch": 0.14162650752827785, "grad_norm": 0.09432416409254074, "learning_rate": 2.5815664972840375e-05, "loss": 10.0187, "step": 28360 }, { "epoch": 0.1416764463532173, "grad_norm": 0.09197516739368439, "learning_rate": 2.581416305789882e-05, "loss": 10.0196, "step": 28370 }, { "epoch": 0.14172638517815675, "grad_norm": 0.09398947656154633, "learning_rate": 2.581266114295727e-05, "loss": 10.0159, "step": 28380 }, { "epoch": 0.1417763240030962, "grad_norm": 0.10007443279027939, "learning_rate": 2.5811159228015722e-05, "loss": 10.0165, "step": 28390 }, { "epoch": 0.14182626282803565, "grad_norm": 0.09710991382598877, "learning_rate": 2.580965731307417e-05, "loss": 10.0245, "step": 28400 }, { "epoch": 0.1418762016529751, "grad_norm": 0.09509746730327606, "learning_rate": 2.5808155398132622e-05, "loss": 10.0178, "step": 28410 }, { "epoch": 0.14192614047791455, "grad_norm": 0.09114133566617966, "learning_rate": 2.580665348319107e-05, "loss": 10.0192, "step": 28420 }, { "epoch": 0.141976079302854, "grad_norm": 0.0989239513874054, "learning_rate": 2.5805151568249516e-05, "loss": 10.017, "step": 28430 }, { "epoch": 0.14202601812779345, "grad_norm": 0.0925961434841156, "learning_rate": 2.580364965330797e-05, "loss": 10.0186, "step": 28440 }, { "epoch": 0.1420759569527329, "grad_norm": 0.09893997758626938, "learning_rate": 2.5802147738366416e-05, "loss": 10.0155, "step": 28450 }, { "epoch": 0.14212589577767235, "grad_norm": 0.0943673700094223, "learning_rate": 2.580064582342487e-05, "loss": 10.0159, "step": 28460 }, { "epoch": 0.1421758346026118, "grad_norm": 0.09693938493728638, "learning_rate": 2.5799143908483317e-05, "loss": 10.0115, "step": 28470 }, { "epoch": 0.14222577342755124, "grad_norm": 0.09356331825256348, "learning_rate": 2.5797641993541763e-05, "loss": 10.014, "step": 28480 }, { "epoch": 0.1422757122524907, "grad_norm": 0.09694939106702805, "learning_rate": 2.5796140078600217e-05, "loss": 10.0138, "step": 28490 }, { "epoch": 0.14232565107743014, "grad_norm": 0.09721967577934265, "learning_rate": 2.5794638163658664e-05, "loss": 10.0084, "step": 28500 }, { "epoch": 0.1423755899023696, "grad_norm": 0.09602759033441544, "learning_rate": 2.5793136248717117e-05, "loss": 10.015, "step": 28510 }, { "epoch": 0.14242552872730904, "grad_norm": 0.09390842169523239, "learning_rate": 2.5791634333775564e-05, "loss": 10.0172, "step": 28520 }, { "epoch": 0.1424754675522485, "grad_norm": 0.09624374657869339, "learning_rate": 2.579013241883401e-05, "loss": 10.0161, "step": 28530 }, { "epoch": 0.14252540637718794, "grad_norm": 0.09962889552116394, "learning_rate": 2.5788630503892465e-05, "loss": 10.0149, "step": 28540 }, { "epoch": 0.1425753452021274, "grad_norm": 0.09729303419589996, "learning_rate": 2.578712858895091e-05, "loss": 10.0157, "step": 28550 }, { "epoch": 0.14262528402706684, "grad_norm": 0.09288518875837326, "learning_rate": 2.5785626674009365e-05, "loss": 10.0173, "step": 28560 }, { "epoch": 0.1426752228520063, "grad_norm": 0.09366150200366974, "learning_rate": 2.578412475906781e-05, "loss": 10.0122, "step": 28570 }, { "epoch": 0.14272516167694574, "grad_norm": 0.0944962128996849, "learning_rate": 2.5782622844126262e-05, "loss": 10.0112, "step": 28580 }, { "epoch": 0.1427751005018852, "grad_norm": 0.09178702533245087, "learning_rate": 2.5781120929184712e-05, "loss": 10.0179, "step": 28590 }, { "epoch": 0.14282503932682464, "grad_norm": 0.0905400812625885, "learning_rate": 2.577961901424316e-05, "loss": 10.0134, "step": 28600 }, { "epoch": 0.1428749781517641, "grad_norm": 0.09418443590402603, "learning_rate": 2.5778117099301612e-05, "loss": 10.0168, "step": 28610 }, { "epoch": 0.14292491697670354, "grad_norm": 0.0970693901181221, "learning_rate": 2.577661518436006e-05, "loss": 10.0162, "step": 28620 }, { "epoch": 0.14297485580164299, "grad_norm": 0.09255459159612656, "learning_rate": 2.577511326941851e-05, "loss": 10.0129, "step": 28630 }, { "epoch": 0.14302479462658244, "grad_norm": 0.10080841928720474, "learning_rate": 2.577361135447696e-05, "loss": 10.0144, "step": 28640 }, { "epoch": 0.14307473345152188, "grad_norm": 0.09627335518598557, "learning_rate": 2.5772109439535406e-05, "loss": 10.0095, "step": 28650 }, { "epoch": 0.14312467227646133, "grad_norm": 0.09631509333848953, "learning_rate": 2.577060752459386e-05, "loss": 10.0167, "step": 28660 }, { "epoch": 0.14317461110140078, "grad_norm": 0.09696370363235474, "learning_rate": 2.5769105609652307e-05, "loss": 10.0145, "step": 28670 }, { "epoch": 0.14322454992634023, "grad_norm": 0.08783876895904541, "learning_rate": 2.5767603694710757e-05, "loss": 10.0139, "step": 28680 }, { "epoch": 0.14327448875127968, "grad_norm": 0.09430178999900818, "learning_rate": 2.5766101779769207e-05, "loss": 10.0151, "step": 28690 }, { "epoch": 0.14332442757621913, "grad_norm": 0.09581022709608078, "learning_rate": 2.5764599864827654e-05, "loss": 10.0074, "step": 28700 }, { "epoch": 0.14337436640115858, "grad_norm": 0.10260827839374542, "learning_rate": 2.5763097949886107e-05, "loss": 10.0112, "step": 28710 }, { "epoch": 0.14342430522609803, "grad_norm": 0.09252434223890305, "learning_rate": 2.5761596034944554e-05, "loss": 10.0146, "step": 28720 }, { "epoch": 0.14347424405103748, "grad_norm": 0.08901256322860718, "learning_rate": 2.5760094120003004e-05, "loss": 10.0066, "step": 28730 }, { "epoch": 0.14352418287597693, "grad_norm": 0.09571991860866547, "learning_rate": 2.5758592205061455e-05, "loss": 10.0124, "step": 28740 }, { "epoch": 0.14357412170091638, "grad_norm": 0.09751693159341812, "learning_rate": 2.57570902901199e-05, "loss": 10.0108, "step": 28750 }, { "epoch": 0.14362406052585583, "grad_norm": 0.09803242981433868, "learning_rate": 2.5755588375178355e-05, "loss": 10.0063, "step": 28760 }, { "epoch": 0.14367399935079528, "grad_norm": 0.0984642431139946, "learning_rate": 2.57540864602368e-05, "loss": 10.0058, "step": 28770 }, { "epoch": 0.14372393817573473, "grad_norm": 0.0973120778799057, "learning_rate": 2.5752584545295252e-05, "loss": 10.0062, "step": 28780 }, { "epoch": 0.14377387700067418, "grad_norm": 0.09399261325597763, "learning_rate": 2.5751082630353702e-05, "loss": 10.0063, "step": 28790 }, { "epoch": 0.14382381582561363, "grad_norm": 0.09200508892536163, "learning_rate": 2.574958071541215e-05, "loss": 10.0079, "step": 28800 }, { "epoch": 0.14387375465055308, "grad_norm": 0.09684068709611893, "learning_rate": 2.5748078800470602e-05, "loss": 10.004, "step": 28810 }, { "epoch": 0.14392369347549253, "grad_norm": 0.09978806972503662, "learning_rate": 2.574657688552905e-05, "loss": 10.0058, "step": 28820 }, { "epoch": 0.14397363230043198, "grad_norm": 0.09788274019956589, "learning_rate": 2.57450749705875e-05, "loss": 10.0059, "step": 28830 }, { "epoch": 0.14402357112537142, "grad_norm": 0.09136934578418732, "learning_rate": 2.574357305564595e-05, "loss": 10.0068, "step": 28840 }, { "epoch": 0.14407350995031087, "grad_norm": 0.09080319106578827, "learning_rate": 2.5742071140704396e-05, "loss": 10.0057, "step": 28850 }, { "epoch": 0.14412344877525032, "grad_norm": 0.09714391082525253, "learning_rate": 2.574056922576285e-05, "loss": 10.0084, "step": 28860 }, { "epoch": 0.14417338760018977, "grad_norm": 0.1014237254858017, "learning_rate": 2.5739067310821297e-05, "loss": 10.0073, "step": 28870 }, { "epoch": 0.14422332642512922, "grad_norm": 0.09329380840063095, "learning_rate": 2.573756539587975e-05, "loss": 10.0075, "step": 28880 }, { "epoch": 0.14427326525006867, "grad_norm": 0.09168153256177902, "learning_rate": 2.5736063480938197e-05, "loss": 10.0039, "step": 28890 }, { "epoch": 0.14432320407500812, "grad_norm": 0.09341901540756226, "learning_rate": 2.5734561565996644e-05, "loss": 10.0078, "step": 28900 }, { "epoch": 0.14437314289994757, "grad_norm": 0.08910293877124786, "learning_rate": 2.5733059651055097e-05, "loss": 10.0063, "step": 28910 }, { "epoch": 0.14442308172488702, "grad_norm": 0.09554349631071091, "learning_rate": 2.5731557736113544e-05, "loss": 10.0101, "step": 28920 }, { "epoch": 0.14447302054982647, "grad_norm": 0.09408532083034515, "learning_rate": 2.5730055821171998e-05, "loss": 10.0043, "step": 28930 }, { "epoch": 0.14452295937476592, "grad_norm": 0.09786105901002884, "learning_rate": 2.5728553906230445e-05, "loss": 10.0024, "step": 28940 }, { "epoch": 0.14457289819970537, "grad_norm": 0.09660477936267853, "learning_rate": 2.572705199128889e-05, "loss": 10.0064, "step": 28950 }, { "epoch": 0.14462283702464482, "grad_norm": 0.088541179895401, "learning_rate": 2.5725550076347345e-05, "loss": 10.0054, "step": 28960 }, { "epoch": 0.14467277584958427, "grad_norm": 0.09211627393960953, "learning_rate": 2.572404816140579e-05, "loss": 10.0047, "step": 28970 }, { "epoch": 0.14472271467452372, "grad_norm": 0.10103971511125565, "learning_rate": 2.5722546246464245e-05, "loss": 10.0037, "step": 28980 }, { "epoch": 0.14477265349946317, "grad_norm": 0.09156627953052521, "learning_rate": 2.5721044331522692e-05, "loss": 10.0016, "step": 28990 }, { "epoch": 0.14482259232440262, "grad_norm": 0.08952153474092484, "learning_rate": 2.5719542416581142e-05, "loss": 10.0043, "step": 29000 }, { "epoch": 0.14487253114934207, "grad_norm": 0.09983877092599869, "learning_rate": 2.5718040501639592e-05, "loss": 10.0048, "step": 29010 }, { "epoch": 0.14492246997428151, "grad_norm": 0.09366819262504578, "learning_rate": 2.571653858669804e-05, "loss": 10.0025, "step": 29020 }, { "epoch": 0.14497240879922096, "grad_norm": 0.09434466063976288, "learning_rate": 2.5715036671756493e-05, "loss": 10.0053, "step": 29030 }, { "epoch": 0.1450223476241604, "grad_norm": 0.09528578817844391, "learning_rate": 2.571353475681494e-05, "loss": 10.0025, "step": 29040 }, { "epoch": 0.14507228644909986, "grad_norm": 0.09328389912843704, "learning_rate": 2.571203284187339e-05, "loss": 9.9988, "step": 29050 }, { "epoch": 0.1451222252740393, "grad_norm": 0.09483915567398071, "learning_rate": 2.571053092693184e-05, "loss": 10.0042, "step": 29060 }, { "epoch": 0.14517216409897876, "grad_norm": 0.09249535948038101, "learning_rate": 2.5709029011990287e-05, "loss": 10.0043, "step": 29070 }, { "epoch": 0.1452221029239182, "grad_norm": 0.09545839577913284, "learning_rate": 2.570752709704874e-05, "loss": 10.0014, "step": 29080 }, { "epoch": 0.14527204174885766, "grad_norm": 0.0961858257651329, "learning_rate": 2.5706025182107187e-05, "loss": 10.0063, "step": 29090 }, { "epoch": 0.1453219805737971, "grad_norm": 0.09866221994161606, "learning_rate": 2.5704523267165637e-05, "loss": 9.9957, "step": 29100 }, { "epoch": 0.14537191939873656, "grad_norm": 0.0952538251876831, "learning_rate": 2.5703021352224087e-05, "loss": 9.9998, "step": 29110 }, { "epoch": 0.145421858223676, "grad_norm": 0.09659268707036972, "learning_rate": 2.5701519437282534e-05, "loss": 10.0082, "step": 29120 }, { "epoch": 0.14547179704861546, "grad_norm": 0.09689272195100784, "learning_rate": 2.5700017522340988e-05, "loss": 10.0042, "step": 29130 }, { "epoch": 0.1455217358735549, "grad_norm": 0.09408452361822128, "learning_rate": 2.5698515607399435e-05, "loss": 9.9958, "step": 29140 }, { "epoch": 0.14557167469849436, "grad_norm": 0.09306766837835312, "learning_rate": 2.5697013692457885e-05, "loss": 9.9998, "step": 29150 }, { "epoch": 0.1456216135234338, "grad_norm": 0.10271524637937546, "learning_rate": 2.5695511777516335e-05, "loss": 9.9997, "step": 29160 }, { "epoch": 0.14567155234837326, "grad_norm": 0.09216585755348206, "learning_rate": 2.5694009862574782e-05, "loss": 9.9944, "step": 29170 }, { "epoch": 0.14572149117331268, "grad_norm": 0.09553508460521698, "learning_rate": 2.5692507947633235e-05, "loss": 10.0022, "step": 29180 }, { "epoch": 0.14577142999825213, "grad_norm": 0.09927748143672943, "learning_rate": 2.5691006032691682e-05, "loss": 9.9974, "step": 29190 }, { "epoch": 0.14582136882319158, "grad_norm": 0.09433567523956299, "learning_rate": 2.5689504117750132e-05, "loss": 10.0014, "step": 29200 }, { "epoch": 0.14587130764813103, "grad_norm": 0.0930803120136261, "learning_rate": 2.5688002202808582e-05, "loss": 10.0037, "step": 29210 }, { "epoch": 0.14592124647307048, "grad_norm": 0.09351327270269394, "learning_rate": 2.568650028786703e-05, "loss": 9.9983, "step": 29220 }, { "epoch": 0.14597118529800993, "grad_norm": 0.09411612898111343, "learning_rate": 2.5684998372925483e-05, "loss": 10.002, "step": 29230 }, { "epoch": 0.14602112412294938, "grad_norm": 0.09385941177606583, "learning_rate": 2.568349645798393e-05, "loss": 10.0001, "step": 29240 }, { "epoch": 0.14607106294788882, "grad_norm": 0.09528261423110962, "learning_rate": 2.568199454304238e-05, "loss": 9.9901, "step": 29250 }, { "epoch": 0.14612100177282827, "grad_norm": 0.09263186156749725, "learning_rate": 2.568049262810083e-05, "loss": 10.0004, "step": 29260 }, { "epoch": 0.14617094059776772, "grad_norm": 0.09151321649551392, "learning_rate": 2.5678990713159277e-05, "loss": 10.0003, "step": 29270 }, { "epoch": 0.14622087942270717, "grad_norm": 0.09789709746837616, "learning_rate": 2.567748879821773e-05, "loss": 9.9961, "step": 29280 }, { "epoch": 0.14627081824764662, "grad_norm": 0.0894283652305603, "learning_rate": 2.5675986883276177e-05, "loss": 9.9954, "step": 29290 }, { "epoch": 0.14632075707258607, "grad_norm": 0.10029701143503189, "learning_rate": 2.5674484968334627e-05, "loss": 9.9962, "step": 29300 }, { "epoch": 0.14637069589752552, "grad_norm": 0.09673379361629486, "learning_rate": 2.5672983053393077e-05, "loss": 9.9969, "step": 29310 }, { "epoch": 0.14642063472246497, "grad_norm": 0.09456515312194824, "learning_rate": 2.5671481138451528e-05, "loss": 9.9946, "step": 29320 }, { "epoch": 0.14647057354740442, "grad_norm": 0.09132513403892517, "learning_rate": 2.5669979223509978e-05, "loss": 9.994, "step": 29330 }, { "epoch": 0.14652051237234387, "grad_norm": 0.09338773041963577, "learning_rate": 2.5668477308568425e-05, "loss": 9.9982, "step": 29340 }, { "epoch": 0.14657045119728332, "grad_norm": 0.0984477698802948, "learning_rate": 2.5666975393626875e-05, "loss": 9.9914, "step": 29350 }, { "epoch": 0.14662039002222277, "grad_norm": 0.09212279319763184, "learning_rate": 2.5665473478685325e-05, "loss": 9.9958, "step": 29360 }, { "epoch": 0.14667032884716222, "grad_norm": 0.10177190601825714, "learning_rate": 2.5663971563743775e-05, "loss": 9.9991, "step": 29370 }, { "epoch": 0.14672026767210167, "grad_norm": 0.09410154819488525, "learning_rate": 2.5662469648802225e-05, "loss": 9.9934, "step": 29380 }, { "epoch": 0.14677020649704112, "grad_norm": 0.09799119085073471, "learning_rate": 2.5660967733860672e-05, "loss": 9.9874, "step": 29390 }, { "epoch": 0.14682014532198057, "grad_norm": 0.10114170610904694, "learning_rate": 2.5659465818919122e-05, "loss": 9.9944, "step": 29400 }, { "epoch": 0.14687008414692002, "grad_norm": 0.09417644143104553, "learning_rate": 2.5657963903977572e-05, "loss": 9.9959, "step": 29410 }, { "epoch": 0.14692002297185947, "grad_norm": 0.09454021602869034, "learning_rate": 2.5656461989036023e-05, "loss": 9.9909, "step": 29420 }, { "epoch": 0.14696996179679891, "grad_norm": 0.09609093517065048, "learning_rate": 2.5654960074094473e-05, "loss": 9.9893, "step": 29430 }, { "epoch": 0.14701990062173836, "grad_norm": 0.09451326727867126, "learning_rate": 2.565345815915292e-05, "loss": 9.9925, "step": 29440 }, { "epoch": 0.1470698394466778, "grad_norm": 0.09044196456670761, "learning_rate": 2.565195624421137e-05, "loss": 9.9939, "step": 29450 }, { "epoch": 0.14711977827161726, "grad_norm": 0.10042398422956467, "learning_rate": 2.565045432926982e-05, "loss": 9.9976, "step": 29460 }, { "epoch": 0.1471697170965567, "grad_norm": 743.2418823242188, "learning_rate": 2.564895241432827e-05, "loss": 9.9953, "step": 29470 }, { "epoch": 0.14721965592149616, "grad_norm": 0.10025718063116074, "learning_rate": 2.564745049938672e-05, "loss": 10.5647, "step": 29480 }, { "epoch": 0.1472695947464356, "grad_norm": 0.0930456891655922, "learning_rate": 2.5645948584445167e-05, "loss": 9.9927, "step": 29490 }, { "epoch": 0.14731953357137506, "grad_norm": 0.09810245782136917, "learning_rate": 2.5644446669503617e-05, "loss": 9.9906, "step": 29500 }, { "epoch": 0.1473694723963145, "grad_norm": 0.0949150025844574, "learning_rate": 2.5642944754562067e-05, "loss": 11.4577, "step": 29510 }, { "epoch": 0.14741941122125396, "grad_norm": 0.09857135266065598, "learning_rate": 2.5641442839620518e-05, "loss": 9.9955, "step": 29520 }, { "epoch": 0.1474693500461934, "grad_norm": 0.09233613312244415, "learning_rate": 2.5639940924678968e-05, "loss": 9.9922, "step": 29530 }, { "epoch": 0.14751928887113286, "grad_norm": 0.0993812084197998, "learning_rate": 2.5638439009737415e-05, "loss": 9.9864, "step": 29540 }, { "epoch": 0.1475692276960723, "grad_norm": 0.09710720926523209, "learning_rate": 2.5636937094795865e-05, "loss": 9.9892, "step": 29550 }, { "epoch": 0.14761916652101176, "grad_norm": 0.09210072457790375, "learning_rate": 2.5635435179854315e-05, "loss": 9.9928, "step": 29560 }, { "epoch": 0.1476691053459512, "grad_norm": 0.0917966440320015, "learning_rate": 2.5633933264912765e-05, "loss": 9.9889, "step": 29570 }, { "epoch": 0.14771904417089066, "grad_norm": 0.09141924977302551, "learning_rate": 2.5632431349971215e-05, "loss": 9.9931, "step": 29580 }, { "epoch": 0.1477689829958301, "grad_norm": 0.09177865833044052, "learning_rate": 2.5630929435029662e-05, "loss": 9.9971, "step": 29590 }, { "epoch": 0.14781892182076956, "grad_norm": 0.09628382325172424, "learning_rate": 2.5629427520088112e-05, "loss": 9.991, "step": 29600 }, { "epoch": 0.147868860645709, "grad_norm": 0.10353731364011765, "learning_rate": 2.5627925605146562e-05, "loss": 9.9869, "step": 29610 }, { "epoch": 0.14791879947064845, "grad_norm": 0.08936607092618942, "learning_rate": 2.5626423690205013e-05, "loss": 9.991, "step": 29620 }, { "epoch": 0.1479687382955879, "grad_norm": 0.09186360985040665, "learning_rate": 2.5624921775263463e-05, "loss": 9.9913, "step": 29630 }, { "epoch": 0.14801867712052735, "grad_norm": 0.09984660148620605, "learning_rate": 2.5623419860321913e-05, "loss": 9.9862, "step": 29640 }, { "epoch": 0.1480686159454668, "grad_norm": 0.0906200036406517, "learning_rate": 2.562191794538036e-05, "loss": 9.9897, "step": 29650 }, { "epoch": 0.14811855477040625, "grad_norm": 0.1004503145813942, "learning_rate": 2.562041603043881e-05, "loss": 9.9894, "step": 29660 }, { "epoch": 0.1481684935953457, "grad_norm": 0.09145411103963852, "learning_rate": 2.561891411549726e-05, "loss": 9.9877, "step": 29670 }, { "epoch": 0.14821843242028515, "grad_norm": 0.09083334356546402, "learning_rate": 2.561741220055571e-05, "loss": 9.9825, "step": 29680 }, { "epoch": 0.1482683712452246, "grad_norm": 0.1027732640504837, "learning_rate": 2.561591028561416e-05, "loss": 9.988, "step": 29690 }, { "epoch": 0.14831831007016405, "grad_norm": 0.09426605701446533, "learning_rate": 2.5614408370672607e-05, "loss": 9.986, "step": 29700 }, { "epoch": 0.1483682488951035, "grad_norm": 0.09159605205059052, "learning_rate": 2.5612906455731057e-05, "loss": 9.9925, "step": 29710 }, { "epoch": 0.14841818772004295, "grad_norm": 0.09627106785774231, "learning_rate": 2.5611404540789508e-05, "loss": 9.9841, "step": 29720 }, { "epoch": 0.1484681265449824, "grad_norm": 0.09180369973182678, "learning_rate": 2.5609902625847958e-05, "loss": 9.992, "step": 29730 }, { "epoch": 0.14851806536992185, "grad_norm": 0.093332439661026, "learning_rate": 2.5608400710906408e-05, "loss": 9.9877, "step": 29740 }, { "epoch": 0.1485680041948613, "grad_norm": 0.09420640766620636, "learning_rate": 2.5606898795964855e-05, "loss": 9.9862, "step": 29750 }, { "epoch": 0.14861794301980075, "grad_norm": 0.08834953606128693, "learning_rate": 2.5605396881023305e-05, "loss": 9.9859, "step": 29760 }, { "epoch": 0.1486678818447402, "grad_norm": 0.09357737004756927, "learning_rate": 2.5603894966081755e-05, "loss": 9.9881, "step": 29770 }, { "epoch": 0.14871782066967965, "grad_norm": 0.09611498564481735, "learning_rate": 2.5602393051140205e-05, "loss": 9.9835, "step": 29780 }, { "epoch": 0.1487677594946191, "grad_norm": 0.09604179859161377, "learning_rate": 2.5600891136198655e-05, "loss": 9.9854, "step": 29790 }, { "epoch": 0.14881769831955854, "grad_norm": 0.0894085168838501, "learning_rate": 2.5599389221257102e-05, "loss": 9.9843, "step": 29800 }, { "epoch": 0.148867637144498, "grad_norm": 0.0987863540649414, "learning_rate": 2.5597887306315552e-05, "loss": 9.9861, "step": 29810 }, { "epoch": 0.14891757596943744, "grad_norm": 0.09360215812921524, "learning_rate": 2.5596385391374003e-05, "loss": 9.979, "step": 29820 }, { "epoch": 0.1489675147943769, "grad_norm": 0.09655702859163284, "learning_rate": 2.5594883476432453e-05, "loss": 9.9865, "step": 29830 }, { "epoch": 0.14901745361931634, "grad_norm": 0.09509619325399399, "learning_rate": 2.5593381561490903e-05, "loss": 9.982, "step": 29840 }, { "epoch": 0.1490673924442558, "grad_norm": 0.09575647860765457, "learning_rate": 2.559187964654935e-05, "loss": 9.9806, "step": 29850 }, { "epoch": 0.14911733126919524, "grad_norm": 0.09219710528850555, "learning_rate": 2.55903777316078e-05, "loss": 9.9857, "step": 29860 }, { "epoch": 0.1491672700941347, "grad_norm": 0.09096106886863708, "learning_rate": 2.558887581666625e-05, "loss": 9.9862, "step": 29870 }, { "epoch": 0.14921720891907414, "grad_norm": 0.09332380443811417, "learning_rate": 2.55873739017247e-05, "loss": 9.9837, "step": 29880 }, { "epoch": 0.1492671477440136, "grad_norm": 0.09098540246486664, "learning_rate": 2.558587198678315e-05, "loss": 9.9893, "step": 29890 }, { "epoch": 0.14931708656895304, "grad_norm": 0.09403214603662491, "learning_rate": 2.5584370071841597e-05, "loss": 9.9855, "step": 29900 }, { "epoch": 0.1493670253938925, "grad_norm": 0.10132903605699539, "learning_rate": 2.5582868156900047e-05, "loss": 9.9836, "step": 29910 }, { "epoch": 0.14941696421883194, "grad_norm": 0.09413734078407288, "learning_rate": 2.5581366241958498e-05, "loss": 9.9824, "step": 29920 }, { "epoch": 0.1494669030437714, "grad_norm": 0.09860097616910934, "learning_rate": 2.5579864327016948e-05, "loss": 9.9844, "step": 29930 }, { "epoch": 0.14951684186871084, "grad_norm": 0.09427344053983688, "learning_rate": 2.5578362412075398e-05, "loss": 9.9842, "step": 29940 }, { "epoch": 0.1495667806936503, "grad_norm": 0.09929951280355453, "learning_rate": 2.5576860497133845e-05, "loss": 9.9852, "step": 29950 }, { "epoch": 0.14961671951858974, "grad_norm": 0.08913746476173401, "learning_rate": 2.5575358582192295e-05, "loss": 9.988, "step": 29960 }, { "epoch": 0.14966665834352919, "grad_norm": 0.09487251192331314, "learning_rate": 2.5573856667250745e-05, "loss": 9.9882, "step": 29970 }, { "epoch": 0.14971659716846863, "grad_norm": 0.0945911556482315, "learning_rate": 2.5572354752309195e-05, "loss": 9.9848, "step": 29980 }, { "epoch": 0.14976653599340808, "grad_norm": 0.08827967196702957, "learning_rate": 2.5570852837367645e-05, "loss": 9.9878, "step": 29990 }, { "epoch": 0.14981647481834753, "grad_norm": 0.09094835072755814, "learning_rate": 2.5569350922426092e-05, "loss": 9.9809, "step": 30000 }, { "epoch": 0.14986641364328698, "grad_norm": 0.0922902449965477, "learning_rate": 2.5567849007484546e-05, "loss": 9.9868, "step": 30010 }, { "epoch": 0.14991635246822643, "grad_norm": 0.09045281261205673, "learning_rate": 2.5566347092542993e-05, "loss": 9.9841, "step": 30020 }, { "epoch": 0.14996629129316588, "grad_norm": 0.09216750413179398, "learning_rate": 2.5564845177601443e-05, "loss": 9.9797, "step": 30030 }, { "epoch": 0.15001623011810533, "grad_norm": 0.09659641981124878, "learning_rate": 2.5563343262659893e-05, "loss": 9.9785, "step": 30040 }, { "epoch": 0.15006616894304478, "grad_norm": 0.09455203264951706, "learning_rate": 2.556184134771834e-05, "loss": 9.9815, "step": 30050 }, { "epoch": 0.15011610776798423, "grad_norm": 0.089305080473423, "learning_rate": 2.5560339432776793e-05, "loss": 9.9792, "step": 30060 }, { "epoch": 0.15016604659292368, "grad_norm": 0.09118703007698059, "learning_rate": 2.555883751783524e-05, "loss": 9.9802, "step": 30070 }, { "epoch": 0.15021598541786313, "grad_norm": 0.09706766903400421, "learning_rate": 2.555733560289369e-05, "loss": 9.9764, "step": 30080 }, { "epoch": 0.15026592424280258, "grad_norm": 0.09246896207332611, "learning_rate": 2.555583368795214e-05, "loss": 9.981, "step": 30090 }, { "epoch": 0.15031586306774203, "grad_norm": 0.09420756250619888, "learning_rate": 2.5554331773010587e-05, "loss": 9.9817, "step": 30100 }, { "epoch": 0.15036580189268148, "grad_norm": 0.0959128737449646, "learning_rate": 2.555282985806904e-05, "loss": 9.9834, "step": 30110 }, { "epoch": 0.15041574071762093, "grad_norm": 0.09950920939445496, "learning_rate": 2.5551327943127488e-05, "loss": 9.979, "step": 30120 }, { "epoch": 0.15046567954256038, "grad_norm": 0.09626981616020203, "learning_rate": 2.5549826028185938e-05, "loss": 9.974, "step": 30130 }, { "epoch": 0.15051561836749983, "grad_norm": 0.09594815224409103, "learning_rate": 2.5548324113244388e-05, "loss": 9.9792, "step": 30140 }, { "epoch": 0.15056555719243928, "grad_norm": 0.10035426914691925, "learning_rate": 2.5546822198302835e-05, "loss": 9.976, "step": 30150 }, { "epoch": 0.15061549601737872, "grad_norm": 0.09873773902654648, "learning_rate": 2.554532028336129e-05, "loss": 9.9735, "step": 30160 }, { "epoch": 0.15066543484231815, "grad_norm": 0.09737424552440643, "learning_rate": 2.5543818368419735e-05, "loss": 9.9808, "step": 30170 }, { "epoch": 0.1507153736672576, "grad_norm": 0.09332314133644104, "learning_rate": 2.5542316453478185e-05, "loss": 9.9723, "step": 30180 }, { "epoch": 0.15076531249219705, "grad_norm": 0.09951841086149216, "learning_rate": 2.5540814538536636e-05, "loss": 9.9775, "step": 30190 }, { "epoch": 0.1508152513171365, "grad_norm": 0.09631986916065216, "learning_rate": 2.5539312623595082e-05, "loss": 9.9765, "step": 30200 }, { "epoch": 0.15086519014207594, "grad_norm": 0.08957190811634064, "learning_rate": 2.5537810708653536e-05, "loss": 9.985, "step": 30210 }, { "epoch": 0.1509151289670154, "grad_norm": 0.09298118948936462, "learning_rate": 2.5536308793711983e-05, "loss": 9.9777, "step": 30220 }, { "epoch": 0.15096506779195484, "grad_norm": 0.09950188547372818, "learning_rate": 2.5534806878770433e-05, "loss": 9.9758, "step": 30230 }, { "epoch": 0.1510150066168943, "grad_norm": 0.09786109626293182, "learning_rate": 2.5533304963828883e-05, "loss": 9.9709, "step": 30240 }, { "epoch": 0.15106494544183374, "grad_norm": 0.09600091725587845, "learning_rate": 2.553180304888733e-05, "loss": 9.9772, "step": 30250 }, { "epoch": 0.1511148842667732, "grad_norm": 0.0918637216091156, "learning_rate": 2.5530301133945783e-05, "loss": 9.9781, "step": 30260 }, { "epoch": 0.15116482309171264, "grad_norm": 0.09260197728872299, "learning_rate": 2.552879921900423e-05, "loss": 9.9701, "step": 30270 }, { "epoch": 0.1512147619166521, "grad_norm": 0.0909905806183815, "learning_rate": 2.552729730406268e-05, "loss": 9.9747, "step": 30280 }, { "epoch": 0.15126470074159154, "grad_norm": 0.09207629412412643, "learning_rate": 2.552579538912113e-05, "loss": 9.9713, "step": 30290 }, { "epoch": 0.151314639566531, "grad_norm": 0.09557502716779709, "learning_rate": 2.5524293474179577e-05, "loss": 9.9844, "step": 30300 }, { "epoch": 0.15136457839147044, "grad_norm": 0.0928947702050209, "learning_rate": 2.552279155923803e-05, "loss": 9.9739, "step": 30310 }, { "epoch": 0.1514145172164099, "grad_norm": 0.09521695226430893, "learning_rate": 2.5521289644296478e-05, "loss": 9.9733, "step": 30320 }, { "epoch": 0.15146445604134934, "grad_norm": 0.0963277593255043, "learning_rate": 2.551978772935493e-05, "loss": 9.969, "step": 30330 }, { "epoch": 0.1515143948662888, "grad_norm": 0.10125837475061417, "learning_rate": 2.5518285814413378e-05, "loss": 9.9739, "step": 30340 }, { "epoch": 0.15156433369122824, "grad_norm": 0.09548031538724899, "learning_rate": 2.5516783899471825e-05, "loss": 9.9702, "step": 30350 }, { "epoch": 0.1516142725161677, "grad_norm": 0.09445524960756302, "learning_rate": 2.551528198453028e-05, "loss": 9.9767, "step": 30360 }, { "epoch": 0.15166421134110714, "grad_norm": 0.09280666708946228, "learning_rate": 2.5513780069588725e-05, "loss": 9.9779, "step": 30370 }, { "epoch": 0.15171415016604659, "grad_norm": 0.09591789543628693, "learning_rate": 2.551227815464718e-05, "loss": 9.9722, "step": 30380 }, { "epoch": 0.15176408899098603, "grad_norm": 0.09540422260761261, "learning_rate": 2.5510776239705626e-05, "loss": 9.9732, "step": 30390 }, { "epoch": 0.15181402781592548, "grad_norm": 0.0966714397072792, "learning_rate": 2.5509274324764072e-05, "loss": 9.9744, "step": 30400 }, { "epoch": 0.15186396664086493, "grad_norm": 0.09428321570158005, "learning_rate": 2.5507772409822526e-05, "loss": 9.9751, "step": 30410 }, { "epoch": 0.15191390546580438, "grad_norm": 0.09702390432357788, "learning_rate": 2.5506270494880973e-05, "loss": 9.9724, "step": 30420 }, { "epoch": 0.15196384429074383, "grad_norm": 0.09882203489542007, "learning_rate": 2.5504768579939426e-05, "loss": 9.9698, "step": 30430 }, { "epoch": 0.15201378311568328, "grad_norm": 0.09188614040613174, "learning_rate": 2.5503266664997873e-05, "loss": 9.9745, "step": 30440 }, { "epoch": 0.15206372194062273, "grad_norm": 0.09487085789442062, "learning_rate": 2.550176475005632e-05, "loss": 9.9672, "step": 30450 }, { "epoch": 0.15211366076556218, "grad_norm": 0.09500377625226974, "learning_rate": 2.5500262835114773e-05, "loss": 9.9702, "step": 30460 }, { "epoch": 0.15216359959050163, "grad_norm": 0.09745945781469345, "learning_rate": 2.549876092017322e-05, "loss": 9.9739, "step": 30470 }, { "epoch": 0.15221353841544108, "grad_norm": 0.0949743390083313, "learning_rate": 2.5497259005231674e-05, "loss": 9.9646, "step": 30480 }, { "epoch": 0.15226347724038053, "grad_norm": 0.09280611574649811, "learning_rate": 2.549575709029012e-05, "loss": 9.967, "step": 30490 }, { "epoch": 0.15231341606531998, "grad_norm": 0.09759785979986191, "learning_rate": 2.5494255175348567e-05, "loss": 9.9691, "step": 30500 }, { "epoch": 0.15236335489025943, "grad_norm": 0.10251536220312119, "learning_rate": 2.549275326040702e-05, "loss": 9.968, "step": 30510 }, { "epoch": 0.15241329371519888, "grad_norm": 0.09339610487222672, "learning_rate": 2.5491251345465468e-05, "loss": 9.9709, "step": 30520 }, { "epoch": 0.15246323254013833, "grad_norm": 0.09631470590829849, "learning_rate": 2.548974943052392e-05, "loss": 9.9649, "step": 30530 }, { "epoch": 0.15251317136507778, "grad_norm": 0.09462681412696838, "learning_rate": 2.5488247515582368e-05, "loss": 9.9656, "step": 30540 }, { "epoch": 0.15256311019001723, "grad_norm": 0.09926269203424454, "learning_rate": 2.5486745600640815e-05, "loss": 9.9666, "step": 30550 }, { "epoch": 0.15261304901495668, "grad_norm": 0.09245786815881729, "learning_rate": 2.548524368569927e-05, "loss": 9.9648, "step": 30560 }, { "epoch": 0.15266298783989612, "grad_norm": 0.09163583815097809, "learning_rate": 2.5483741770757715e-05, "loss": 9.9667, "step": 30570 }, { "epoch": 0.15271292666483557, "grad_norm": 0.0959586575627327, "learning_rate": 2.548223985581617e-05, "loss": 9.9713, "step": 30580 }, { "epoch": 0.15276286548977502, "grad_norm": 0.09638317674398422, "learning_rate": 2.5480737940874616e-05, "loss": 9.9625, "step": 30590 }, { "epoch": 0.15281280431471447, "grad_norm": 0.0959702804684639, "learning_rate": 2.5479236025933062e-05, "loss": 9.9676, "step": 30600 }, { "epoch": 0.15286274313965392, "grad_norm": 0.09670331329107285, "learning_rate": 2.5477734110991516e-05, "loss": 9.9673, "step": 30610 }, { "epoch": 0.15291268196459337, "grad_norm": 0.09731889516115189, "learning_rate": 2.5476232196049963e-05, "loss": 9.9669, "step": 30620 }, { "epoch": 0.15296262078953282, "grad_norm": 0.09183140099048615, "learning_rate": 2.5474730281108416e-05, "loss": 9.9672, "step": 30630 }, { "epoch": 0.15301255961447227, "grad_norm": 0.09476214647293091, "learning_rate": 2.5473228366166863e-05, "loss": 9.9662, "step": 30640 }, { "epoch": 0.15306249843941172, "grad_norm": 0.09765252470970154, "learning_rate": 2.5471726451225313e-05, "loss": 9.9693, "step": 30650 }, { "epoch": 0.15311243726435117, "grad_norm": 0.0965784415602684, "learning_rate": 2.5470224536283763e-05, "loss": 9.9612, "step": 30660 }, { "epoch": 0.15316237608929062, "grad_norm": 0.0938754677772522, "learning_rate": 2.546872262134221e-05, "loss": 9.9638, "step": 30670 }, { "epoch": 0.15321231491423007, "grad_norm": 0.09392806887626648, "learning_rate": 2.5467220706400664e-05, "loss": 9.9641, "step": 30680 }, { "epoch": 0.15326225373916952, "grad_norm": 0.09800152480602264, "learning_rate": 2.546571879145911e-05, "loss": 9.9662, "step": 30690 }, { "epoch": 0.15331219256410897, "grad_norm": 0.09477967023849487, "learning_rate": 2.546421687651756e-05, "loss": 9.9667, "step": 30700 }, { "epoch": 0.15336213138904842, "grad_norm": 0.09264533221721649, "learning_rate": 2.546271496157601e-05, "loss": 9.966, "step": 30710 }, { "epoch": 0.15341207021398787, "grad_norm": 0.09792211651802063, "learning_rate": 2.5461213046634458e-05, "loss": 9.9652, "step": 30720 }, { "epoch": 0.15346200903892732, "grad_norm": 0.09196563065052032, "learning_rate": 2.545971113169291e-05, "loss": 9.9634, "step": 30730 }, { "epoch": 0.15351194786386677, "grad_norm": 0.09375791996717453, "learning_rate": 2.5458209216751358e-05, "loss": 9.9615, "step": 30740 }, { "epoch": 0.15356188668880622, "grad_norm": 0.10490723699331284, "learning_rate": 2.5456707301809808e-05, "loss": 9.9654, "step": 30750 }, { "epoch": 0.15361182551374566, "grad_norm": 0.08919631689786911, "learning_rate": 2.545520538686826e-05, "loss": 9.9659, "step": 30760 }, { "epoch": 0.15366176433868511, "grad_norm": 0.09268412739038467, "learning_rate": 2.5453703471926705e-05, "loss": 9.9595, "step": 30770 }, { "epoch": 0.15371170316362456, "grad_norm": 0.09661845862865448, "learning_rate": 2.545220155698516e-05, "loss": 9.9571, "step": 30780 }, { "epoch": 0.153761641988564, "grad_norm": 0.0946783795952797, "learning_rate": 2.5450699642043606e-05, "loss": 9.9624, "step": 30790 }, { "epoch": 0.15381158081350346, "grad_norm": 0.08772801607847214, "learning_rate": 2.5449197727102056e-05, "loss": 9.9651, "step": 30800 }, { "epoch": 0.1538615196384429, "grad_norm": 0.09275208413600922, "learning_rate": 2.5447695812160506e-05, "loss": 9.9595, "step": 30810 }, { "epoch": 0.15391145846338236, "grad_norm": 0.09553563594818115, "learning_rate": 2.5446193897218953e-05, "loss": 9.9569, "step": 30820 }, { "epoch": 0.1539613972883218, "grad_norm": 0.09462607651948929, "learning_rate": 2.5444691982277406e-05, "loss": 9.9615, "step": 30830 }, { "epoch": 0.15401133611326126, "grad_norm": 0.09698895364999771, "learning_rate": 2.5443190067335853e-05, "loss": 9.9661, "step": 30840 }, { "epoch": 0.1540612749382007, "grad_norm": 0.09504923969507217, "learning_rate": 2.5441688152394303e-05, "loss": 9.9681, "step": 30850 }, { "epoch": 0.15411121376314016, "grad_norm": 0.10031065344810486, "learning_rate": 2.5440186237452753e-05, "loss": 9.9588, "step": 30860 }, { "epoch": 0.1541611525880796, "grad_norm": 0.09581837058067322, "learning_rate": 2.54386843225112e-05, "loss": 9.959, "step": 30870 }, { "epoch": 0.15421109141301906, "grad_norm": 0.09290026128292084, "learning_rate": 2.5437182407569654e-05, "loss": 9.9649, "step": 30880 }, { "epoch": 0.1542610302379585, "grad_norm": 0.0961495116353035, "learning_rate": 2.54356804926281e-05, "loss": 9.9566, "step": 30890 }, { "epoch": 0.15431096906289796, "grad_norm": 0.09705748409032822, "learning_rate": 2.543417857768655e-05, "loss": 9.9632, "step": 30900 }, { "epoch": 0.1543609078878374, "grad_norm": 0.09860498458147049, "learning_rate": 2.5432676662745e-05, "loss": 9.957, "step": 30910 }, { "epoch": 0.15441084671277686, "grad_norm": 0.09649617969989777, "learning_rate": 2.5431174747803448e-05, "loss": 9.9609, "step": 30920 }, { "epoch": 0.1544607855377163, "grad_norm": 0.09514536708593369, "learning_rate": 2.54296728328619e-05, "loss": 9.9607, "step": 30930 }, { "epoch": 0.15451072436265575, "grad_norm": 0.09036850929260254, "learning_rate": 2.5428170917920348e-05, "loss": 9.9562, "step": 30940 }, { "epoch": 0.1545606631875952, "grad_norm": 0.09239833056926727, "learning_rate": 2.5426669002978798e-05, "loss": 9.9597, "step": 30950 }, { "epoch": 0.15461060201253465, "grad_norm": 0.09781666100025177, "learning_rate": 2.542516708803725e-05, "loss": 9.9674, "step": 30960 }, { "epoch": 0.1546605408374741, "grad_norm": 0.09337397664785385, "learning_rate": 2.54236651730957e-05, "loss": 9.9614, "step": 30970 }, { "epoch": 0.15471047966241355, "grad_norm": 0.09049335867166519, "learning_rate": 2.542216325815415e-05, "loss": 9.9614, "step": 30980 }, { "epoch": 0.154760418487353, "grad_norm": 0.09918399900197983, "learning_rate": 2.5420661343212596e-05, "loss": 9.9559, "step": 30990 }, { "epoch": 0.15481035731229245, "grad_norm": 0.09200762212276459, "learning_rate": 2.5419159428271046e-05, "loss": 9.9508, "step": 31000 }, { "epoch": 0.1548602961372319, "grad_norm": 0.09439615160226822, "learning_rate": 2.5417657513329496e-05, "loss": 9.9625, "step": 31010 }, { "epoch": 0.15491023496217135, "grad_norm": 0.09249376505613327, "learning_rate": 2.5416155598387946e-05, "loss": 9.9598, "step": 31020 }, { "epoch": 0.1549601737871108, "grad_norm": 0.09385478496551514, "learning_rate": 2.5414653683446396e-05, "loss": 9.9589, "step": 31030 }, { "epoch": 0.15501011261205025, "grad_norm": 0.0966297909617424, "learning_rate": 2.5413151768504843e-05, "loss": 9.9642, "step": 31040 }, { "epoch": 0.1550600514369897, "grad_norm": 0.09643752872943878, "learning_rate": 2.5411649853563293e-05, "loss": 9.9559, "step": 31050 }, { "epoch": 0.15510999026192915, "grad_norm": 0.09509705007076263, "learning_rate": 2.5410147938621743e-05, "loss": 9.9555, "step": 31060 }, { "epoch": 0.1551599290868686, "grad_norm": 0.09453869611024857, "learning_rate": 2.5408646023680194e-05, "loss": 9.9591, "step": 31070 }, { "epoch": 0.15520986791180805, "grad_norm": 0.09096340090036392, "learning_rate": 2.5407144108738644e-05, "loss": 9.9628, "step": 31080 }, { "epoch": 0.1552598067367475, "grad_norm": 0.09323681145906448, "learning_rate": 2.540564219379709e-05, "loss": 9.959, "step": 31090 }, { "epoch": 0.15530974556168695, "grad_norm": 0.09170509874820709, "learning_rate": 2.540414027885554e-05, "loss": 9.9538, "step": 31100 }, { "epoch": 0.1553596843866264, "grad_norm": 0.09655750542879105, "learning_rate": 2.540263836391399e-05, "loss": 9.9561, "step": 31110 }, { "epoch": 0.15540962321156584, "grad_norm": 0.09387839585542679, "learning_rate": 2.540113644897244e-05, "loss": 9.9571, "step": 31120 }, { "epoch": 0.1554595620365053, "grad_norm": 0.09941628575325012, "learning_rate": 2.539963453403089e-05, "loss": 9.9566, "step": 31130 }, { "epoch": 0.15550950086144474, "grad_norm": 0.09646540135145187, "learning_rate": 2.5398132619089338e-05, "loss": 9.9543, "step": 31140 }, { "epoch": 0.15555943968638417, "grad_norm": 0.10054589062929153, "learning_rate": 2.5396630704147788e-05, "loss": 9.9528, "step": 31150 }, { "epoch": 0.15560937851132361, "grad_norm": 0.09326030313968658, "learning_rate": 2.539512878920624e-05, "loss": 9.962, "step": 31160 }, { "epoch": 0.15565931733626306, "grad_norm": 0.08991136401891708, "learning_rate": 2.539362687426469e-05, "loss": 9.9522, "step": 31170 }, { "epoch": 0.15570925616120251, "grad_norm": 0.09380258619785309, "learning_rate": 2.539212495932314e-05, "loss": 9.9584, "step": 31180 }, { "epoch": 0.15575919498614196, "grad_norm": 0.09635492414236069, "learning_rate": 2.5390623044381586e-05, "loss": 9.9486, "step": 31190 }, { "epoch": 0.1558091338110814, "grad_norm": 0.09452281892299652, "learning_rate": 2.5389121129440036e-05, "loss": 9.9505, "step": 31200 }, { "epoch": 0.15585907263602086, "grad_norm": 0.09984589368104935, "learning_rate": 2.5387619214498486e-05, "loss": 9.9507, "step": 31210 }, { "epoch": 0.1559090114609603, "grad_norm": 0.09607294946908951, "learning_rate": 2.5386117299556936e-05, "loss": 9.9526, "step": 31220 }, { "epoch": 0.15595895028589976, "grad_norm": 0.0886487290263176, "learning_rate": 2.5384615384615386e-05, "loss": 9.9552, "step": 31230 }, { "epoch": 0.1560088891108392, "grad_norm": 0.09027554839849472, "learning_rate": 2.5383113469673833e-05, "loss": 9.958, "step": 31240 }, { "epoch": 0.15605882793577866, "grad_norm": 0.09098269045352936, "learning_rate": 2.5381611554732283e-05, "loss": 9.9465, "step": 31250 }, { "epoch": 0.1561087667607181, "grad_norm": 0.10451994836330414, "learning_rate": 2.5380109639790733e-05, "loss": 9.9555, "step": 31260 }, { "epoch": 0.15615870558565756, "grad_norm": 0.09637783467769623, "learning_rate": 2.5378607724849184e-05, "loss": 9.9493, "step": 31270 }, { "epoch": 0.156208644410597, "grad_norm": 0.09026754647493362, "learning_rate": 2.5377105809907634e-05, "loss": 9.9526, "step": 31280 }, { "epoch": 0.15625858323553646, "grad_norm": 0.09922222793102264, "learning_rate": 2.5375603894966084e-05, "loss": 9.9455, "step": 31290 }, { "epoch": 0.1563085220604759, "grad_norm": 0.09385956078767776, "learning_rate": 2.537410198002453e-05, "loss": 9.9496, "step": 31300 }, { "epoch": 0.15635846088541536, "grad_norm": 0.09471774101257324, "learning_rate": 2.537260006508298e-05, "loss": 9.9536, "step": 31310 }, { "epoch": 0.1564083997103548, "grad_norm": 0.09749903529882431, "learning_rate": 2.537109815014143e-05, "loss": 9.9501, "step": 31320 }, { "epoch": 0.15645833853529426, "grad_norm": 0.09855715185403824, "learning_rate": 2.536959623519988e-05, "loss": 9.9486, "step": 31330 }, { "epoch": 0.1565082773602337, "grad_norm": 0.0999755784869194, "learning_rate": 2.536809432025833e-05, "loss": 9.9482, "step": 31340 }, { "epoch": 0.15655821618517315, "grad_norm": 0.10329025983810425, "learning_rate": 2.5366592405316778e-05, "loss": 9.9535, "step": 31350 }, { "epoch": 0.1566081550101126, "grad_norm": 0.09251412004232407, "learning_rate": 2.536509049037523e-05, "loss": 9.9509, "step": 31360 }, { "epoch": 0.15665809383505205, "grad_norm": 0.09849464893341064, "learning_rate": 2.536358857543368e-05, "loss": 9.9476, "step": 31370 }, { "epoch": 0.1567080326599915, "grad_norm": 0.09341175109148026, "learning_rate": 2.536208666049213e-05, "loss": 9.9519, "step": 31380 }, { "epoch": 0.15675797148493095, "grad_norm": 0.09484104812145233, "learning_rate": 2.536058474555058e-05, "loss": 9.9555, "step": 31390 }, { "epoch": 0.1568079103098704, "grad_norm": 0.09805377572774887, "learning_rate": 2.5359082830609026e-05, "loss": 9.948, "step": 31400 }, { "epoch": 0.15685784913480985, "grad_norm": 0.09305404126644135, "learning_rate": 2.5357580915667476e-05, "loss": 9.95, "step": 31410 }, { "epoch": 0.1569077879597493, "grad_norm": 0.09033924341201782, "learning_rate": 2.5356079000725926e-05, "loss": 9.949, "step": 31420 }, { "epoch": 0.15695772678468875, "grad_norm": 0.094557024538517, "learning_rate": 2.5354577085784376e-05, "loss": 9.9521, "step": 31430 }, { "epoch": 0.1570076656096282, "grad_norm": 0.09300687164068222, "learning_rate": 2.5353075170842826e-05, "loss": 9.945, "step": 31440 }, { "epoch": 0.15705760443456765, "grad_norm": 0.09191596508026123, "learning_rate": 2.5351573255901273e-05, "loss": 9.948, "step": 31450 }, { "epoch": 0.1571075432595071, "grad_norm": 0.0944577232003212, "learning_rate": 2.5350071340959723e-05, "loss": 9.9515, "step": 31460 }, { "epoch": 0.15715748208444655, "grad_norm": 0.09393108636140823, "learning_rate": 2.5348569426018174e-05, "loss": 9.9459, "step": 31470 }, { "epoch": 0.157207420909386, "grad_norm": 0.09408476948738098, "learning_rate": 2.5347067511076624e-05, "loss": 9.9547, "step": 31480 }, { "epoch": 0.15725735973432545, "grad_norm": 0.09239377081394196, "learning_rate": 2.5345565596135074e-05, "loss": 9.9479, "step": 31490 }, { "epoch": 0.1573072985592649, "grad_norm": 0.09746348112821579, "learning_rate": 2.534406368119352e-05, "loss": 9.9445, "step": 31500 }, { "epoch": 0.15735723738420435, "grad_norm": 0.09604940563440323, "learning_rate": 2.534256176625197e-05, "loss": 9.9443, "step": 31510 }, { "epoch": 0.1574071762091438, "grad_norm": 0.09521540254354477, "learning_rate": 2.534105985131042e-05, "loss": 9.9499, "step": 31520 }, { "epoch": 0.15745711503408324, "grad_norm": 0.09604847431182861, "learning_rate": 2.533955793636887e-05, "loss": 9.9507, "step": 31530 }, { "epoch": 0.1575070538590227, "grad_norm": 0.09625346958637238, "learning_rate": 2.533805602142732e-05, "loss": 9.9499, "step": 31540 }, { "epoch": 0.15755699268396214, "grad_norm": 0.09137043356895447, "learning_rate": 2.5336554106485768e-05, "loss": 9.9387, "step": 31550 }, { "epoch": 0.1576069315089016, "grad_norm": 0.09646125137805939, "learning_rate": 2.533505219154422e-05, "loss": 9.9431, "step": 31560 }, { "epoch": 0.15765687033384104, "grad_norm": 0.0926939845085144, "learning_rate": 2.533355027660267e-05, "loss": 9.9423, "step": 31570 }, { "epoch": 0.1577068091587805, "grad_norm": 0.09652858227491379, "learning_rate": 2.533204836166112e-05, "loss": 9.9464, "step": 31580 }, { "epoch": 0.15775674798371994, "grad_norm": 0.09520108252763748, "learning_rate": 2.533054644671957e-05, "loss": 9.946, "step": 31590 }, { "epoch": 0.1578066868086594, "grad_norm": 0.0973607674241066, "learning_rate": 2.5329044531778016e-05, "loss": 9.94, "step": 31600 }, { "epoch": 0.15785662563359884, "grad_norm": 0.09493687748908997, "learning_rate": 2.532754261683647e-05, "loss": 9.944, "step": 31610 }, { "epoch": 0.1579065644585383, "grad_norm": 0.09770392626523972, "learning_rate": 2.5326040701894916e-05, "loss": 9.9393, "step": 31620 }, { "epoch": 0.15795650328347774, "grad_norm": 0.09622366726398468, "learning_rate": 2.5324538786953366e-05, "loss": 9.9456, "step": 31630 }, { "epoch": 0.1580064421084172, "grad_norm": 0.09520258009433746, "learning_rate": 2.5323036872011817e-05, "loss": 9.9408, "step": 31640 }, { "epoch": 0.15805638093335664, "grad_norm": 0.09456585347652435, "learning_rate": 2.5321534957070263e-05, "loss": 9.9386, "step": 31650 }, { "epoch": 0.1581063197582961, "grad_norm": 0.09494321048259735, "learning_rate": 2.5320033042128717e-05, "loss": 9.9401, "step": 31660 }, { "epoch": 0.15815625858323554, "grad_norm": 0.09259496629238129, "learning_rate": 2.5318531127187164e-05, "loss": 9.9453, "step": 31670 }, { "epoch": 0.158206197408175, "grad_norm": 0.0940915197134018, "learning_rate": 2.5317029212245614e-05, "loss": 9.95, "step": 31680 }, { "epoch": 0.15825613623311444, "grad_norm": 0.09100774675607681, "learning_rate": 2.5315527297304064e-05, "loss": 9.9416, "step": 31690 }, { "epoch": 0.15830607505805389, "grad_norm": 0.09511730074882507, "learning_rate": 2.531402538236251e-05, "loss": 9.9399, "step": 31700 }, { "epoch": 0.15835601388299334, "grad_norm": 0.0985988974571228, "learning_rate": 2.5312523467420964e-05, "loss": 11.4693, "step": 31710 }, { "epoch": 0.15840595270793278, "grad_norm": 0.09434106200933456, "learning_rate": 2.531102155247941e-05, "loss": 20.0531, "step": 31720 }, { "epoch": 0.15845589153287223, "grad_norm": 0.0996764674782753, "learning_rate": 2.530951963753786e-05, "loss": 9.9376, "step": 31730 }, { "epoch": 0.15850583035781168, "grad_norm": 0.09661601483821869, "learning_rate": 2.530801772259631e-05, "loss": 13.1668, "step": 31740 }, { "epoch": 0.15855576918275113, "grad_norm": 0.09104971587657928, "learning_rate": 2.530651580765476e-05, "loss": 10.18, "step": 31750 }, { "epoch": 0.15860570800769058, "grad_norm": 0.09955008327960968, "learning_rate": 2.5305013892713212e-05, "loss": 9.9397, "step": 31760 }, { "epoch": 0.15865564683263003, "grad_norm": 0.09806697070598602, "learning_rate": 2.530351197777166e-05, "loss": 9.9451, "step": 31770 }, { "epoch": 0.15870558565756948, "grad_norm": 0.09614180773496628, "learning_rate": 2.530201006283011e-05, "loss": 9.9402, "step": 31780 }, { "epoch": 0.15875552448250893, "grad_norm": 0.09188991039991379, "learning_rate": 2.530050814788856e-05, "loss": 9.9389, "step": 31790 }, { "epoch": 0.15880546330744838, "grad_norm": 0.0986422747373581, "learning_rate": 2.5299006232947006e-05, "loss": 9.9408, "step": 31800 }, { "epoch": 0.15885540213238783, "grad_norm": 0.09337294101715088, "learning_rate": 2.529750431800546e-05, "loss": 9.9416, "step": 31810 }, { "epoch": 0.15890534095732728, "grad_norm": 0.09738942235708237, "learning_rate": 2.5296002403063906e-05, "loss": 9.9414, "step": 31820 }, { "epoch": 0.15895527978226673, "grad_norm": 0.09532233327627182, "learning_rate": 2.5294500488122356e-05, "loss": 9.9396, "step": 31830 }, { "epoch": 0.15900521860720618, "grad_norm": 0.09321427345275879, "learning_rate": 2.5292998573180807e-05, "loss": 9.9449, "step": 31840 }, { "epoch": 0.15905515743214563, "grad_norm": 0.09735007584095001, "learning_rate": 2.5291496658239253e-05, "loss": 9.9388, "step": 31850 }, { "epoch": 0.15910509625708508, "grad_norm": 0.09496832638978958, "learning_rate": 2.5289994743297707e-05, "loss": 9.9346, "step": 31860 }, { "epoch": 0.15915503508202453, "grad_norm": 0.09069550782442093, "learning_rate": 2.5288492828356154e-05, "loss": 9.9392, "step": 31870 }, { "epoch": 0.15920497390696398, "grad_norm": 0.09822098165750504, "learning_rate": 2.5286990913414604e-05, "loss": 9.9366, "step": 31880 }, { "epoch": 0.15925491273190343, "grad_norm": 0.09519645571708679, "learning_rate": 2.5285488998473054e-05, "loss": 9.9355, "step": 31890 }, { "epoch": 0.15930485155684287, "grad_norm": 0.09621410071849823, "learning_rate": 2.52839870835315e-05, "loss": 9.9392, "step": 31900 }, { "epoch": 0.15935479038178232, "grad_norm": 0.0956474095582962, "learning_rate": 2.5282485168589954e-05, "loss": 9.9361, "step": 31910 }, { "epoch": 0.15940472920672177, "grad_norm": 0.09148707240819931, "learning_rate": 2.52809832536484e-05, "loss": 9.9364, "step": 31920 }, { "epoch": 0.15945466803166122, "grad_norm": 0.09263239800930023, "learning_rate": 2.5279481338706855e-05, "loss": 9.9372, "step": 31930 }, { "epoch": 0.15950460685660067, "grad_norm": 0.09924571961164474, "learning_rate": 2.52779794237653e-05, "loss": 9.9365, "step": 31940 }, { "epoch": 0.15955454568154012, "grad_norm": 0.09445744752883911, "learning_rate": 2.527647750882375e-05, "loss": 9.9395, "step": 31950 }, { "epoch": 0.15960448450647957, "grad_norm": 0.09523255378007889, "learning_rate": 2.5274975593882202e-05, "loss": 9.9367, "step": 31960 }, { "epoch": 0.15965442333141902, "grad_norm": 0.09308159351348877, "learning_rate": 2.527347367894065e-05, "loss": 9.9337, "step": 31970 }, { "epoch": 0.15970436215635847, "grad_norm": 0.09193790704011917, "learning_rate": 2.5271971763999102e-05, "loss": 9.9315, "step": 31980 }, { "epoch": 0.15975430098129792, "grad_norm": 0.09079098701477051, "learning_rate": 2.527046984905755e-05, "loss": 9.942, "step": 31990 }, { "epoch": 0.15980423980623737, "grad_norm": 0.09207965433597565, "learning_rate": 2.5268967934115996e-05, "loss": 9.9348, "step": 32000 }, { "epoch": 0.15985417863117682, "grad_norm": 0.09450741112232208, "learning_rate": 2.526746601917445e-05, "loss": 9.938, "step": 32010 }, { "epoch": 0.15990411745611627, "grad_norm": 0.08837747573852539, "learning_rate": 2.5265964104232896e-05, "loss": 9.9355, "step": 32020 }, { "epoch": 0.15995405628105572, "grad_norm": 0.09545224159955978, "learning_rate": 2.526446218929135e-05, "loss": 9.9378, "step": 32030 }, { "epoch": 0.16000399510599517, "grad_norm": 0.09526308625936508, "learning_rate": 2.5262960274349797e-05, "loss": 9.9396, "step": 32040 }, { "epoch": 0.16005393393093462, "grad_norm": 0.09556493163108826, "learning_rate": 2.5261458359408243e-05, "loss": 9.929, "step": 32050 }, { "epoch": 0.16010387275587407, "grad_norm": 0.09006509184837341, "learning_rate": 2.5259956444466697e-05, "loss": 9.9293, "step": 32060 }, { "epoch": 0.16015381158081352, "grad_norm": 0.09654060006141663, "learning_rate": 2.5258454529525144e-05, "loss": 9.9321, "step": 32070 }, { "epoch": 0.16020375040575296, "grad_norm": 0.09235110133886337, "learning_rate": 2.5256952614583597e-05, "loss": 9.9315, "step": 32080 }, { "epoch": 0.16025368923069241, "grad_norm": 0.09377003461122513, "learning_rate": 2.5255450699642044e-05, "loss": 9.9327, "step": 32090 }, { "epoch": 0.16030362805563186, "grad_norm": 0.0920942947268486, "learning_rate": 2.525394878470049e-05, "loss": 9.93, "step": 32100 }, { "epoch": 0.1603535668805713, "grad_norm": 0.09704404324293137, "learning_rate": 2.5252446869758944e-05, "loss": 9.9293, "step": 32110 }, { "epoch": 0.16040350570551076, "grad_norm": 0.09537354111671448, "learning_rate": 2.525094495481739e-05, "loss": 9.9361, "step": 32120 }, { "epoch": 0.1604534445304502, "grad_norm": 0.09306449443101883, "learning_rate": 2.5249443039875845e-05, "loss": 9.9365, "step": 32130 }, { "epoch": 0.16050338335538963, "grad_norm": 0.09331690520048141, "learning_rate": 2.524794112493429e-05, "loss": 9.9333, "step": 32140 }, { "epoch": 0.16055332218032908, "grad_norm": 0.09822415560483932, "learning_rate": 2.524643920999274e-05, "loss": 9.9411, "step": 32150 }, { "epoch": 0.16060326100526853, "grad_norm": 0.0978788435459137, "learning_rate": 2.5244937295051192e-05, "loss": 9.9302, "step": 32160 }, { "epoch": 0.16065319983020798, "grad_norm": 0.09318576008081436, "learning_rate": 2.524343538010964e-05, "loss": 9.9322, "step": 32170 }, { "epoch": 0.16070313865514743, "grad_norm": 0.09461916238069534, "learning_rate": 2.5241933465168092e-05, "loss": 9.9289, "step": 32180 }, { "epoch": 0.16075307748008688, "grad_norm": 0.08854075521230698, "learning_rate": 2.524043155022654e-05, "loss": 9.9322, "step": 32190 }, { "epoch": 0.16080301630502633, "grad_norm": 0.09415885806083679, "learning_rate": 2.5238929635284986e-05, "loss": 9.9298, "step": 32200 }, { "epoch": 0.16085295512996578, "grad_norm": 0.09000342339277267, "learning_rate": 2.523742772034344e-05, "loss": 9.9248, "step": 32210 }, { "epoch": 0.16090289395490523, "grad_norm": 0.09261956810951233, "learning_rate": 2.5235925805401886e-05, "loss": 9.9282, "step": 32220 }, { "epoch": 0.16095283277984468, "grad_norm": 0.09217025339603424, "learning_rate": 2.523442389046034e-05, "loss": 9.9289, "step": 32230 }, { "epoch": 0.16100277160478413, "grad_norm": 0.09619983285665512, "learning_rate": 2.5232921975518787e-05, "loss": 9.9366, "step": 32240 }, { "epoch": 0.16105271042972358, "grad_norm": 0.0968618094921112, "learning_rate": 2.5231420060577237e-05, "loss": 9.9347, "step": 32250 }, { "epoch": 0.16110264925466303, "grad_norm": 0.09648020565509796, "learning_rate": 2.5229918145635687e-05, "loss": 9.929, "step": 32260 }, { "epoch": 0.16115258807960248, "grad_norm": 0.09538908302783966, "learning_rate": 2.5228416230694134e-05, "loss": 9.9289, "step": 32270 }, { "epoch": 0.16120252690454193, "grad_norm": 0.09745009243488312, "learning_rate": 2.5226914315752587e-05, "loss": 9.9325, "step": 32280 }, { "epoch": 0.16125246572948138, "grad_norm": 0.09166844934225082, "learning_rate": 2.5225412400811034e-05, "loss": 9.9295, "step": 32290 }, { "epoch": 0.16130240455442083, "grad_norm": 0.09489347785711288, "learning_rate": 2.5223910485869484e-05, "loss": 9.9295, "step": 32300 }, { "epoch": 0.16135234337936027, "grad_norm": 0.09680917114019394, "learning_rate": 2.5222408570927934e-05, "loss": 9.926, "step": 32310 }, { "epoch": 0.16140228220429972, "grad_norm": 0.08917848020792007, "learning_rate": 2.522090665598638e-05, "loss": 9.9332, "step": 32320 }, { "epoch": 0.16145222102923917, "grad_norm": 0.0952332615852356, "learning_rate": 2.5219404741044835e-05, "loss": 9.922, "step": 32330 }, { "epoch": 0.16150215985417862, "grad_norm": 0.095283642411232, "learning_rate": 2.521790282610328e-05, "loss": 9.9313, "step": 32340 }, { "epoch": 0.16155209867911807, "grad_norm": 0.09335513412952423, "learning_rate": 2.5216400911161732e-05, "loss": 9.9285, "step": 32350 }, { "epoch": 0.16160203750405752, "grad_norm": 0.09106207638978958, "learning_rate": 2.5214898996220182e-05, "loss": 9.9263, "step": 32360 }, { "epoch": 0.16165197632899697, "grad_norm": 0.08792679011821747, "learning_rate": 2.521339708127863e-05, "loss": 9.9225, "step": 32370 }, { "epoch": 0.16170191515393642, "grad_norm": 0.08863825350999832, "learning_rate": 2.5211895166337082e-05, "loss": 9.9298, "step": 32380 }, { "epoch": 0.16175185397887587, "grad_norm": 0.08740777522325516, "learning_rate": 2.521039325139553e-05, "loss": 9.9241, "step": 32390 }, { "epoch": 0.16180179280381532, "grad_norm": 0.10181976854801178, "learning_rate": 2.520889133645398e-05, "loss": 9.9275, "step": 32400 }, { "epoch": 0.16185173162875477, "grad_norm": 0.09686804562807083, "learning_rate": 2.520738942151243e-05, "loss": 9.9219, "step": 32410 }, { "epoch": 0.16190167045369422, "grad_norm": 0.09239942580461502, "learning_rate": 2.5205887506570876e-05, "loss": 9.9238, "step": 32420 }, { "epoch": 0.16195160927863367, "grad_norm": 0.09450032562017441, "learning_rate": 2.520438559162933e-05, "loss": 9.9273, "step": 32430 }, { "epoch": 0.16200154810357312, "grad_norm": 0.09564870595932007, "learning_rate": 2.5202883676687777e-05, "loss": 9.919, "step": 32440 }, { "epoch": 0.16205148692851257, "grad_norm": 0.09656788408756256, "learning_rate": 2.5201381761746227e-05, "loss": 9.9207, "step": 32450 }, { "epoch": 0.16210142575345202, "grad_norm": 0.09183277189731598, "learning_rate": 2.5199879846804677e-05, "loss": 9.928, "step": 32460 }, { "epoch": 0.16215136457839147, "grad_norm": 0.09268153458833694, "learning_rate": 2.5198377931863124e-05, "loss": 9.9258, "step": 32470 }, { "epoch": 0.16220130340333092, "grad_norm": 0.09812065958976746, "learning_rate": 2.5196876016921577e-05, "loss": 9.9289, "step": 32480 }, { "epoch": 0.16225124222827036, "grad_norm": 0.09496765583753586, "learning_rate": 2.5195374101980024e-05, "loss": 9.9278, "step": 32490 }, { "epoch": 0.16230118105320981, "grad_norm": 0.09568601101636887, "learning_rate": 2.5193872187038474e-05, "loss": 9.9279, "step": 32500 }, { "epoch": 0.16235111987814926, "grad_norm": 0.0902891531586647, "learning_rate": 2.5192370272096924e-05, "loss": 9.9261, "step": 32510 }, { "epoch": 0.1624010587030887, "grad_norm": 0.09505823254585266, "learning_rate": 2.519086835715537e-05, "loss": 9.921, "step": 32520 }, { "epoch": 0.16245099752802816, "grad_norm": 0.09195969253778458, "learning_rate": 2.5189366442213825e-05, "loss": 9.925, "step": 32530 }, { "epoch": 0.1625009363529676, "grad_norm": 0.09071581065654755, "learning_rate": 2.518786452727227e-05, "loss": 9.9256, "step": 32540 }, { "epoch": 0.16255087517790706, "grad_norm": 0.0933861956000328, "learning_rate": 2.5186362612330722e-05, "loss": 9.9248, "step": 32550 }, { "epoch": 0.1626008140028465, "grad_norm": 0.09781572967767715, "learning_rate": 2.5184860697389172e-05, "loss": 9.9215, "step": 32560 }, { "epoch": 0.16265075282778596, "grad_norm": 0.09224456548690796, "learning_rate": 2.5183358782447622e-05, "loss": 9.925, "step": 32570 }, { "epoch": 0.1627006916527254, "grad_norm": 0.09544172137975693, "learning_rate": 2.5181856867506072e-05, "loss": 9.9167, "step": 32580 }, { "epoch": 0.16275063047766486, "grad_norm": 0.09155009686946869, "learning_rate": 2.518035495256452e-05, "loss": 9.9184, "step": 32590 }, { "epoch": 0.1628005693026043, "grad_norm": 0.10186931490898132, "learning_rate": 2.517885303762297e-05, "loss": 9.9262, "step": 32600 }, { "epoch": 0.16285050812754376, "grad_norm": 0.09243344515562057, "learning_rate": 2.517735112268142e-05, "loss": 9.9216, "step": 32610 }, { "epoch": 0.1629004469524832, "grad_norm": 0.08900917321443558, "learning_rate": 2.517584920773987e-05, "loss": 9.9274, "step": 32620 }, { "epoch": 0.16295038577742266, "grad_norm": 0.09651615470647812, "learning_rate": 2.517434729279832e-05, "loss": 9.9232, "step": 32630 }, { "epoch": 0.1630003246023621, "grad_norm": 0.09579268842935562, "learning_rate": 2.5172845377856767e-05, "loss": 9.9149, "step": 32640 }, { "epoch": 0.16305026342730156, "grad_norm": 0.09731176495552063, "learning_rate": 2.5171343462915217e-05, "loss": 9.9206, "step": 32650 }, { "epoch": 0.163100202252241, "grad_norm": 0.10048079490661621, "learning_rate": 2.5169841547973667e-05, "loss": 9.9182, "step": 32660 }, { "epoch": 0.16315014107718045, "grad_norm": 0.0903536006808281, "learning_rate": 2.5168339633032117e-05, "loss": 9.9161, "step": 32670 }, { "epoch": 0.1632000799021199, "grad_norm": 0.096148781478405, "learning_rate": 2.5166837718090567e-05, "loss": 9.9185, "step": 32680 }, { "epoch": 0.16325001872705935, "grad_norm": 0.09729062020778656, "learning_rate": 2.5165335803149014e-05, "loss": 9.9234, "step": 32690 }, { "epoch": 0.1632999575519988, "grad_norm": 0.09655334800481796, "learning_rate": 2.5163833888207464e-05, "loss": 9.9228, "step": 32700 }, { "epoch": 0.16334989637693825, "grad_norm": 0.09325189888477325, "learning_rate": 2.5162331973265914e-05, "loss": 9.9221, "step": 32710 }, { "epoch": 0.1633998352018777, "grad_norm": 0.09188661724328995, "learning_rate": 2.5160830058324365e-05, "loss": 9.9184, "step": 32720 }, { "epoch": 0.16344977402681715, "grad_norm": 0.08944937586784363, "learning_rate": 2.5159328143382815e-05, "loss": 9.9146, "step": 32730 }, { "epoch": 0.1634997128517566, "grad_norm": 0.09515637904405594, "learning_rate": 2.515782622844126e-05, "loss": 9.9245, "step": 32740 }, { "epoch": 0.16354965167669605, "grad_norm": 0.09636104851961136, "learning_rate": 2.5156324313499712e-05, "loss": 9.9186, "step": 32750 }, { "epoch": 0.1635995905016355, "grad_norm": 0.09423839300870895, "learning_rate": 2.5154822398558162e-05, "loss": 9.9193, "step": 32760 }, { "epoch": 0.16364952932657495, "grad_norm": 0.09561903774738312, "learning_rate": 2.5153320483616612e-05, "loss": 9.9095, "step": 32770 }, { "epoch": 0.1636994681515144, "grad_norm": 0.0979316234588623, "learning_rate": 2.5151818568675062e-05, "loss": 9.9165, "step": 32780 }, { "epoch": 0.16374940697645385, "grad_norm": 0.09158878028392792, "learning_rate": 2.515031665373351e-05, "loss": 9.9188, "step": 32790 }, { "epoch": 0.1637993458013933, "grad_norm": 0.09471914172172546, "learning_rate": 2.514881473879196e-05, "loss": 9.9175, "step": 32800 }, { "epoch": 0.16384928462633275, "grad_norm": 0.08805624395608902, "learning_rate": 2.514731282385041e-05, "loss": 9.9205, "step": 32810 }, { "epoch": 0.1638992234512722, "grad_norm": 0.09481382369995117, "learning_rate": 2.514581090890886e-05, "loss": 9.9162, "step": 32820 }, { "epoch": 0.16394916227621165, "grad_norm": 0.09198730438947678, "learning_rate": 2.514430899396731e-05, "loss": 9.9177, "step": 32830 }, { "epoch": 0.1639991011011511, "grad_norm": 0.09711703658103943, "learning_rate": 2.5142807079025757e-05, "loss": 9.9141, "step": 32840 }, { "epoch": 0.16404903992609055, "grad_norm": 0.09685643017292023, "learning_rate": 2.5141305164084207e-05, "loss": 9.9108, "step": 32850 }, { "epoch": 0.16409897875103, "grad_norm": 0.09421391785144806, "learning_rate": 2.5139803249142657e-05, "loss": 9.9192, "step": 32860 }, { "epoch": 0.16414891757596944, "grad_norm": 0.09753596037626266, "learning_rate": 2.5138301334201107e-05, "loss": 9.9174, "step": 32870 }, { "epoch": 0.1641988564009089, "grad_norm": 0.09626904875040054, "learning_rate": 2.5136799419259557e-05, "loss": 9.9172, "step": 32880 }, { "epoch": 0.16424879522584834, "grad_norm": 0.0984250158071518, "learning_rate": 2.5135297504318004e-05, "loss": 9.9117, "step": 32890 }, { "epoch": 0.1642987340507878, "grad_norm": 0.09708181023597717, "learning_rate": 2.5133795589376454e-05, "loss": 9.9127, "step": 32900 }, { "epoch": 0.16434867287572724, "grad_norm": 0.09856320172548294, "learning_rate": 2.5132293674434904e-05, "loss": 9.9167, "step": 32910 }, { "epoch": 0.1643986117006667, "grad_norm": 0.09314543008804321, "learning_rate": 2.5130791759493355e-05, "loss": 9.9153, "step": 32920 }, { "epoch": 0.16444855052560614, "grad_norm": 0.09600864350795746, "learning_rate": 2.5129289844551805e-05, "loss": 9.9172, "step": 32930 }, { "epoch": 0.1644984893505456, "grad_norm": 0.09382302314043045, "learning_rate": 2.5127787929610255e-05, "loss": 9.9149, "step": 32940 }, { "epoch": 0.16454842817548504, "grad_norm": 0.09613067656755447, "learning_rate": 2.5126286014668702e-05, "loss": 9.9112, "step": 32950 }, { "epoch": 0.1645983670004245, "grad_norm": 0.09294107556343079, "learning_rate": 2.5124784099727152e-05, "loss": 9.9157, "step": 32960 }, { "epoch": 0.16464830582536394, "grad_norm": 0.09056387096643448, "learning_rate": 2.5123282184785602e-05, "loss": 9.9145, "step": 32970 }, { "epoch": 0.1646982446503034, "grad_norm": 0.09541086107492447, "learning_rate": 2.5121780269844052e-05, "loss": 9.9074, "step": 32980 }, { "epoch": 0.16474818347524284, "grad_norm": 0.09487113356590271, "learning_rate": 2.5120278354902502e-05, "loss": 9.9122, "step": 32990 }, { "epoch": 0.1647981223001823, "grad_norm": 0.0954999253153801, "learning_rate": 2.511877643996095e-05, "loss": 9.9143, "step": 33000 }, { "epoch": 0.16484806112512174, "grad_norm": 0.09315845370292664, "learning_rate": 2.51172745250194e-05, "loss": 9.9189, "step": 33010 }, { "epoch": 0.16489799995006119, "grad_norm": 0.09187664091587067, "learning_rate": 2.511577261007785e-05, "loss": 9.9106, "step": 33020 }, { "epoch": 0.16494793877500064, "grad_norm": 0.09694460779428482, "learning_rate": 2.51142706951363e-05, "loss": 9.9149, "step": 33030 }, { "epoch": 0.16499787759994008, "grad_norm": 0.08832021057605743, "learning_rate": 2.511276878019475e-05, "loss": 9.911, "step": 33040 }, { "epoch": 0.16504781642487953, "grad_norm": 0.09793966263532639, "learning_rate": 2.5111266865253197e-05, "loss": 9.9152, "step": 33050 }, { "epoch": 0.16509775524981898, "grad_norm": 0.09660662710666656, "learning_rate": 2.5109764950311647e-05, "loss": 9.9142, "step": 33060 }, { "epoch": 0.16514769407475843, "grad_norm": 0.09192895144224167, "learning_rate": 2.5108263035370097e-05, "loss": 9.9064, "step": 33070 }, { "epoch": 0.16519763289969788, "grad_norm": 0.09510926902294159, "learning_rate": 2.5106761120428547e-05, "loss": 9.9095, "step": 33080 }, { "epoch": 0.16524757172463733, "grad_norm": 0.09554640203714371, "learning_rate": 2.5105259205486998e-05, "loss": 9.9071, "step": 33090 }, { "epoch": 0.16529751054957678, "grad_norm": 0.0876171737909317, "learning_rate": 2.5103757290545448e-05, "loss": 9.9134, "step": 33100 }, { "epoch": 0.16534744937451623, "grad_norm": 0.09115521609783173, "learning_rate": 2.5102255375603894e-05, "loss": 9.9153, "step": 33110 }, { "epoch": 0.16539738819945568, "grad_norm": 0.09953109920024872, "learning_rate": 2.5100753460662345e-05, "loss": 9.9073, "step": 33120 }, { "epoch": 0.1654473270243951, "grad_norm": 0.09405183792114258, "learning_rate": 2.5099251545720795e-05, "loss": 9.906, "step": 33130 }, { "epoch": 0.16549726584933455, "grad_norm": 0.09410317987203598, "learning_rate": 2.5097749630779245e-05, "loss": 9.9176, "step": 33140 }, { "epoch": 0.165547204674274, "grad_norm": 0.09550243616104126, "learning_rate": 2.5096247715837695e-05, "loss": 9.9054, "step": 33150 }, { "epoch": 0.16559714349921345, "grad_norm": 0.09511136263608932, "learning_rate": 2.5094745800896142e-05, "loss": 9.917, "step": 33160 }, { "epoch": 0.1656470823241529, "grad_norm": 0.09608213603496552, "learning_rate": 2.5093243885954592e-05, "loss": 9.9164, "step": 33170 }, { "epoch": 0.16569702114909235, "grad_norm": 0.0949927344918251, "learning_rate": 2.5091741971013042e-05, "loss": 9.9107, "step": 33180 }, { "epoch": 0.1657469599740318, "grad_norm": 0.09686841070652008, "learning_rate": 2.5090240056071493e-05, "loss": 9.9089, "step": 33190 }, { "epoch": 0.16579689879897125, "grad_norm": 0.09343890100717545, "learning_rate": 2.5088738141129943e-05, "loss": 9.9128, "step": 33200 }, { "epoch": 0.1658468376239107, "grad_norm": 0.09721093624830246, "learning_rate": 2.508723622618839e-05, "loss": 9.9105, "step": 33210 }, { "epoch": 0.16589677644885015, "grad_norm": 0.0927630066871643, "learning_rate": 2.508573431124684e-05, "loss": 9.9052, "step": 33220 }, { "epoch": 0.1659467152737896, "grad_norm": 0.09124623984098434, "learning_rate": 2.508423239630529e-05, "loss": 9.9141, "step": 33230 }, { "epoch": 0.16599665409872905, "grad_norm": 0.10382476449012756, "learning_rate": 2.508273048136374e-05, "loss": 9.9072, "step": 33240 }, { "epoch": 0.1660465929236685, "grad_norm": 0.09035416692495346, "learning_rate": 2.508122856642219e-05, "loss": 9.9108, "step": 33250 }, { "epoch": 0.16609653174860795, "grad_norm": 0.0920075997710228, "learning_rate": 2.507972665148064e-05, "loss": 9.9083, "step": 33260 }, { "epoch": 0.1661464705735474, "grad_norm": 0.09175487607717514, "learning_rate": 2.5078224736539087e-05, "loss": 9.9091, "step": 33270 }, { "epoch": 0.16619640939848684, "grad_norm": 0.09441912174224854, "learning_rate": 2.5076722821597537e-05, "loss": 9.9066, "step": 33280 }, { "epoch": 0.1662463482234263, "grad_norm": 0.09477470815181732, "learning_rate": 2.5075220906655988e-05, "loss": 9.9087, "step": 33290 }, { "epoch": 0.16629628704836574, "grad_norm": 0.08985955268144608, "learning_rate": 2.5073718991714438e-05, "loss": 9.9046, "step": 33300 }, { "epoch": 0.1663462258733052, "grad_norm": 0.0965331420302391, "learning_rate": 2.5072217076772888e-05, "loss": 9.9071, "step": 33310 }, { "epoch": 0.16639616469824464, "grad_norm": 0.0963759496808052, "learning_rate": 2.5070715161831335e-05, "loss": 9.9057, "step": 33320 }, { "epoch": 0.1664461035231841, "grad_norm": 0.097702257335186, "learning_rate": 2.5069213246889785e-05, "loss": 9.9115, "step": 33330 }, { "epoch": 0.16649604234812354, "grad_norm": 0.09388408064842224, "learning_rate": 2.5067711331948235e-05, "loss": 9.9045, "step": 33340 }, { "epoch": 0.166545981173063, "grad_norm": 0.09655765444040298, "learning_rate": 2.5066209417006685e-05, "loss": 9.9053, "step": 33350 }, { "epoch": 0.16659591999800244, "grad_norm": 0.09741511940956116, "learning_rate": 2.5064707502065135e-05, "loss": 9.9012, "step": 33360 }, { "epoch": 0.1666458588229419, "grad_norm": 0.09620445966720581, "learning_rate": 2.5063205587123582e-05, "loss": 9.8977, "step": 33370 }, { "epoch": 0.16669579764788134, "grad_norm": 0.09696647524833679, "learning_rate": 2.5061703672182032e-05, "loss": 9.9067, "step": 33380 }, { "epoch": 0.1667457364728208, "grad_norm": 0.09665276110172272, "learning_rate": 2.5060201757240483e-05, "loss": 9.8977, "step": 33390 }, { "epoch": 0.16679567529776024, "grad_norm": 0.09218195825815201, "learning_rate": 2.5058699842298933e-05, "loss": 9.9092, "step": 33400 }, { "epoch": 0.1668456141226997, "grad_norm": 0.09188750386238098, "learning_rate": 2.5057197927357383e-05, "loss": 9.9054, "step": 33410 }, { "epoch": 0.16689555294763914, "grad_norm": 0.09387121349573135, "learning_rate": 2.505569601241583e-05, "loss": 9.9054, "step": 33420 }, { "epoch": 0.16694549177257859, "grad_norm": 0.09394029527902603, "learning_rate": 2.505419409747428e-05, "loss": 9.8994, "step": 33430 }, { "epoch": 0.16699543059751804, "grad_norm": 0.09561529755592346, "learning_rate": 2.505269218253273e-05, "loss": 9.9005, "step": 33440 }, { "epoch": 0.16704536942245748, "grad_norm": 0.09472378343343735, "learning_rate": 2.505119026759118e-05, "loss": 9.8989, "step": 33450 }, { "epoch": 0.16709530824739693, "grad_norm": 0.09782572835683823, "learning_rate": 2.504968835264963e-05, "loss": 9.8989, "step": 33460 }, { "epoch": 0.16714524707233638, "grad_norm": 0.0978260487318039, "learning_rate": 2.5048186437708077e-05, "loss": 9.9044, "step": 33470 }, { "epoch": 0.16719518589727583, "grad_norm": 0.09694740176200867, "learning_rate": 2.5046684522766527e-05, "loss": 9.9096, "step": 33480 }, { "epoch": 0.16724512472221528, "grad_norm": 0.0943945124745369, "learning_rate": 2.5045182607824978e-05, "loss": 9.9032, "step": 33490 }, { "epoch": 0.16729506354715473, "grad_norm": 0.09394306689500809, "learning_rate": 2.5043680692883428e-05, "loss": 9.8996, "step": 33500 }, { "epoch": 0.16734500237209418, "grad_norm": 0.09680566936731339, "learning_rate": 2.5042178777941878e-05, "loss": 9.8961, "step": 33510 }, { "epoch": 0.16739494119703363, "grad_norm": 0.09609083831310272, "learning_rate": 2.5040676863000325e-05, "loss": 9.9013, "step": 33520 }, { "epoch": 0.16744488002197308, "grad_norm": 0.09596184641122818, "learning_rate": 2.5039174948058775e-05, "loss": 9.9, "step": 33530 }, { "epoch": 0.16749481884691253, "grad_norm": 0.09482643753290176, "learning_rate": 2.5037673033117225e-05, "loss": 9.8908, "step": 33540 }, { "epoch": 0.16754475767185198, "grad_norm": 0.09858851134777069, "learning_rate": 2.5036171118175675e-05, "loss": 9.8957, "step": 33550 }, { "epoch": 0.16759469649679143, "grad_norm": 0.09529074281454086, "learning_rate": 2.5034669203234125e-05, "loss": 9.8975, "step": 33560 }, { "epoch": 0.16764463532173088, "grad_norm": 0.09430502355098724, "learning_rate": 2.5033167288292572e-05, "loss": 9.9093, "step": 33570 }, { "epoch": 0.16769457414667033, "grad_norm": 0.09423274546861649, "learning_rate": 2.5031665373351026e-05, "loss": 9.9008, "step": 33580 }, { "epoch": 0.16774451297160978, "grad_norm": 0.09213889390230179, "learning_rate": 2.5030163458409473e-05, "loss": 9.8997, "step": 33590 }, { "epoch": 0.16779445179654923, "grad_norm": 0.10176771134138107, "learning_rate": 2.5028661543467923e-05, "loss": 9.8974, "step": 33600 }, { "epoch": 0.16784439062148868, "grad_norm": 0.0979059562087059, "learning_rate": 2.5027159628526373e-05, "loss": 9.8964, "step": 33610 }, { "epoch": 0.16789432944642813, "grad_norm": 0.09319134056568146, "learning_rate": 2.502565771358482e-05, "loss": 9.8955, "step": 33620 }, { "epoch": 0.16794426827136757, "grad_norm": 0.09434917569160461, "learning_rate": 2.5024155798643273e-05, "loss": 9.9026, "step": 33630 }, { "epoch": 0.16799420709630702, "grad_norm": 0.09023196995258331, "learning_rate": 2.502265388370172e-05, "loss": 9.8935, "step": 33640 }, { "epoch": 0.16804414592124647, "grad_norm": 0.08715837448835373, "learning_rate": 2.502115196876017e-05, "loss": 9.8996, "step": 33650 }, { "epoch": 0.16809408474618592, "grad_norm": 0.09682469815015793, "learning_rate": 2.501965005381862e-05, "loss": 9.9018, "step": 33660 }, { "epoch": 0.16814402357112537, "grad_norm": 0.0928163006901741, "learning_rate": 2.5018148138877067e-05, "loss": 9.8917, "step": 33670 }, { "epoch": 0.16819396239606482, "grad_norm": 0.10032060742378235, "learning_rate": 2.501664622393552e-05, "loss": 9.8959, "step": 33680 }, { "epoch": 0.16824390122100427, "grad_norm": 0.0933447927236557, "learning_rate": 2.5015144308993968e-05, "loss": 9.9003, "step": 33690 }, { "epoch": 0.16829384004594372, "grad_norm": 0.09343995898962021, "learning_rate": 2.5013642394052418e-05, "loss": 9.897, "step": 33700 }, { "epoch": 0.16834377887088317, "grad_norm": 0.09390633553266525, "learning_rate": 2.5012140479110868e-05, "loss": 9.8975, "step": 33710 }, { "epoch": 0.16839371769582262, "grad_norm": 0.10121657699346542, "learning_rate": 2.5010638564169315e-05, "loss": 9.9001, "step": 33720 }, { "epoch": 0.16844365652076207, "grad_norm": 0.09688078612089157, "learning_rate": 2.5009136649227768e-05, "loss": 9.8943, "step": 33730 }, { "epoch": 0.16849359534570152, "grad_norm": 0.09601810574531555, "learning_rate": 2.5007634734286215e-05, "loss": 9.8948, "step": 33740 }, { "epoch": 0.16854353417064097, "grad_norm": 0.09208814799785614, "learning_rate": 2.5006132819344665e-05, "loss": 9.8938, "step": 33750 }, { "epoch": 0.16859347299558042, "grad_norm": 0.10235219448804855, "learning_rate": 2.5004630904403115e-05, "loss": 9.8951, "step": 33760 }, { "epoch": 0.16864341182051987, "grad_norm": 0.09691403061151505, "learning_rate": 2.5003128989461562e-05, "loss": 9.8948, "step": 33770 }, { "epoch": 0.16869335064545932, "grad_norm": 0.0951026976108551, "learning_rate": 2.5001627074520016e-05, "loss": 9.9, "step": 33780 }, { "epoch": 0.16874328947039877, "grad_norm": 0.08896276354789734, "learning_rate": 2.5000125159578463e-05, "loss": 9.8944, "step": 33790 }, { "epoch": 0.16879322829533822, "grad_norm": 0.09217982739210129, "learning_rate": 2.4998623244636913e-05, "loss": 9.8922, "step": 33800 }, { "epoch": 0.16884316712027767, "grad_norm": 0.09872495383024216, "learning_rate": 2.4997121329695363e-05, "loss": 9.8956, "step": 33810 }, { "epoch": 0.16889310594521711, "grad_norm": 0.09138542413711548, "learning_rate": 2.499561941475381e-05, "loss": 9.8934, "step": 33820 }, { "epoch": 0.16894304477015656, "grad_norm": 0.09778387099504471, "learning_rate": 2.4994117499812263e-05, "loss": 9.8991, "step": 33830 }, { "epoch": 0.168992983595096, "grad_norm": 0.0923265740275383, "learning_rate": 2.499261558487071e-05, "loss": 9.8911, "step": 33840 }, { "epoch": 0.16904292242003546, "grad_norm": 0.09650073945522308, "learning_rate": 2.499111366992916e-05, "loss": 9.8927, "step": 33850 }, { "epoch": 0.1690928612449749, "grad_norm": 0.09646689891815186, "learning_rate": 2.498961175498761e-05, "loss": 9.8933, "step": 33860 }, { "epoch": 0.16914280006991436, "grad_norm": 0.09530924260616302, "learning_rate": 2.4988109840046057e-05, "loss": 9.893, "step": 33870 }, { "epoch": 0.1691927388948538, "grad_norm": 0.09329960495233536, "learning_rate": 2.498660792510451e-05, "loss": 9.8912, "step": 33880 }, { "epoch": 0.16924267771979326, "grad_norm": 0.09914696216583252, "learning_rate": 2.4985106010162958e-05, "loss": 9.8876, "step": 33890 }, { "epoch": 0.1692926165447327, "grad_norm": 0.09866105765104294, "learning_rate": 2.498360409522141e-05, "loss": 9.9011, "step": 33900 }, { "epoch": 0.16934255536967216, "grad_norm": 0.09826882928609848, "learning_rate": 2.4982102180279858e-05, "loss": 9.8992, "step": 33910 }, { "epoch": 0.1693924941946116, "grad_norm": 0.09396472573280334, "learning_rate": 2.4980600265338305e-05, "loss": 9.8908, "step": 33920 }, { "epoch": 0.16944243301955106, "grad_norm": 0.08894440531730652, "learning_rate": 2.4979098350396758e-05, "loss": 9.8922, "step": 33930 }, { "epoch": 0.1694923718444905, "grad_norm": 0.09588942676782608, "learning_rate": 2.4977596435455205e-05, "loss": 9.8917, "step": 33940 }, { "epoch": 0.16954231066942996, "grad_norm": 0.09317868947982788, "learning_rate": 2.497609452051366e-05, "loss": 9.8941, "step": 33950 }, { "epoch": 0.1695922494943694, "grad_norm": 0.0976976752281189, "learning_rate": 2.4974592605572105e-05, "loss": 9.8955, "step": 33960 }, { "epoch": 0.16964218831930886, "grad_norm": 0.10638713836669922, "learning_rate": 2.4973090690630552e-05, "loss": 9.8906, "step": 33970 }, { "epoch": 0.1696921271442483, "grad_norm": 0.0989893451333046, "learning_rate": 2.4971588775689006e-05, "loss": 9.8929, "step": 33980 }, { "epoch": 0.16974206596918776, "grad_norm": 0.09523206949234009, "learning_rate": 2.4970086860747453e-05, "loss": 9.8959, "step": 33990 }, { "epoch": 0.1697920047941272, "grad_norm": 0.08967464417219162, "learning_rate": 2.4968584945805906e-05, "loss": 9.8947, "step": 34000 }, { "epoch": 0.16984194361906665, "grad_norm": 0.09731745719909668, "learning_rate": 2.4967083030864353e-05, "loss": 9.8914, "step": 34010 }, { "epoch": 0.1698918824440061, "grad_norm": 0.09333444386720657, "learning_rate": 2.49655811159228e-05, "loss": 9.897, "step": 34020 }, { "epoch": 0.16994182126894555, "grad_norm": 0.09605353325605392, "learning_rate": 2.4964079200981253e-05, "loss": 9.8829, "step": 34030 }, { "epoch": 0.169991760093885, "grad_norm": 0.08907973766326904, "learning_rate": 2.49625772860397e-05, "loss": 9.8886, "step": 34040 }, { "epoch": 0.17004169891882445, "grad_norm": 0.09453407675027847, "learning_rate": 2.4961075371098154e-05, "loss": 9.8884, "step": 34050 }, { "epoch": 0.1700916377437639, "grad_norm": 0.09984616935253143, "learning_rate": 2.49595734561566e-05, "loss": 9.899, "step": 34060 }, { "epoch": 0.17014157656870335, "grad_norm": 0.0900527685880661, "learning_rate": 2.4958071541215047e-05, "loss": 9.8932, "step": 34070 }, { "epoch": 0.1701915153936428, "grad_norm": 0.09352007508277893, "learning_rate": 2.49565696262735e-05, "loss": 9.8887, "step": 34080 }, { "epoch": 0.17024145421858225, "grad_norm": 0.0973382219672203, "learning_rate": 2.4955067711331948e-05, "loss": 9.8908, "step": 34090 }, { "epoch": 0.1702913930435217, "grad_norm": 0.09201547503471375, "learning_rate": 2.49535657963904e-05, "loss": 9.8875, "step": 34100 }, { "epoch": 0.17034133186846115, "grad_norm": 0.09478182345628738, "learning_rate": 2.4952063881448848e-05, "loss": 9.8797, "step": 34110 }, { "epoch": 0.17039127069340057, "grad_norm": 0.09311734884977341, "learning_rate": 2.4950561966507295e-05, "loss": 9.8855, "step": 34120 }, { "epoch": 0.17044120951834002, "grad_norm": 0.09643889218568802, "learning_rate": 2.4949060051565748e-05, "loss": 9.8851, "step": 34130 }, { "epoch": 0.17049114834327947, "grad_norm": 0.09145352989435196, "learning_rate": 2.4947558136624195e-05, "loss": 9.8896, "step": 34140 }, { "epoch": 0.17054108716821892, "grad_norm": 0.0948040559887886, "learning_rate": 2.494605622168265e-05, "loss": 9.8845, "step": 34150 }, { "epoch": 0.17059102599315837, "grad_norm": 0.09217320382595062, "learning_rate": 2.4944554306741095e-05, "loss": 9.8888, "step": 34160 }, { "epoch": 0.17064096481809782, "grad_norm": 0.09132317453622818, "learning_rate": 2.4943052391799542e-05, "loss": 9.8871, "step": 34170 }, { "epoch": 0.17069090364303727, "grad_norm": 0.0995708778500557, "learning_rate": 2.4941550476857996e-05, "loss": 9.8957, "step": 34180 }, { "epoch": 0.17074084246797672, "grad_norm": 0.09466378390789032, "learning_rate": 2.4940048561916443e-05, "loss": 9.8955, "step": 34190 }, { "epoch": 0.17079078129291617, "grad_norm": 0.09082814306020737, "learning_rate": 2.4938546646974896e-05, "loss": 9.8832, "step": 34200 }, { "epoch": 0.17084072011785562, "grad_norm": 0.09607654809951782, "learning_rate": 2.4937044732033343e-05, "loss": 9.887, "step": 34210 }, { "epoch": 0.17089065894279507, "grad_norm": 0.09264031052589417, "learning_rate": 2.4935542817091793e-05, "loss": 9.8864, "step": 34220 }, { "epoch": 0.17094059776773451, "grad_norm": 0.09279338270425797, "learning_rate": 2.4934040902150243e-05, "loss": 9.888, "step": 34230 }, { "epoch": 0.17099053659267396, "grad_norm": 0.09093698859214783, "learning_rate": 2.493253898720869e-05, "loss": 9.8898, "step": 34240 }, { "epoch": 0.1710404754176134, "grad_norm": 0.09663654863834381, "learning_rate": 2.4931037072267144e-05, "loss": 9.8794, "step": 34250 }, { "epoch": 0.17109041424255286, "grad_norm": 0.09918060153722763, "learning_rate": 2.492953515732559e-05, "loss": 9.8779, "step": 34260 }, { "epoch": 0.1711403530674923, "grad_norm": 0.09901434928178787, "learning_rate": 2.492803324238404e-05, "loss": 9.8849, "step": 34270 }, { "epoch": 0.17119029189243176, "grad_norm": 0.09233778715133667, "learning_rate": 2.492653132744249e-05, "loss": 9.8817, "step": 34280 }, { "epoch": 0.1712402307173712, "grad_norm": 0.08926848322153091, "learning_rate": 2.4925029412500938e-05, "loss": 9.8866, "step": 34290 }, { "epoch": 0.17129016954231066, "grad_norm": 0.0978291779756546, "learning_rate": 2.492352749755939e-05, "loss": 9.8874, "step": 34300 }, { "epoch": 0.1713401083672501, "grad_norm": 0.09829317033290863, "learning_rate": 2.4922025582617838e-05, "loss": 9.8843, "step": 34310 }, { "epoch": 0.17139004719218956, "grad_norm": 0.08857450634241104, "learning_rate": 2.4920523667676288e-05, "loss": 9.8929, "step": 34320 }, { "epoch": 0.171439986017129, "grad_norm": 0.10009986907243729, "learning_rate": 2.4919021752734738e-05, "loss": 9.8932, "step": 34330 }, { "epoch": 0.17148992484206846, "grad_norm": 0.09351924806833267, "learning_rate": 2.4917519837793185e-05, "loss": 9.8878, "step": 34340 }, { "epoch": 0.1715398636670079, "grad_norm": 0.09353633224964142, "learning_rate": 2.491601792285164e-05, "loss": 9.8878, "step": 34350 }, { "epoch": 0.17158980249194736, "grad_norm": 0.09166029840707779, "learning_rate": 2.4914516007910085e-05, "loss": 9.8803, "step": 34360 }, { "epoch": 0.1716397413168868, "grad_norm": 0.09764494746923447, "learning_rate": 2.4913014092968536e-05, "loss": 9.8855, "step": 34370 }, { "epoch": 0.17168968014182626, "grad_norm": 0.0947176069021225, "learning_rate": 2.4911512178026986e-05, "loss": 9.8796, "step": 34380 }, { "epoch": 0.1717396189667657, "grad_norm": 0.09326879680156708, "learning_rate": 2.4910010263085433e-05, "loss": 9.8775, "step": 34390 }, { "epoch": 0.17178955779170516, "grad_norm": 0.0948229506611824, "learning_rate": 2.4908508348143886e-05, "loss": 9.8862, "step": 34400 }, { "epoch": 0.1718394966166446, "grad_norm": 0.09219582378864288, "learning_rate": 2.4907006433202333e-05, "loss": 9.8888, "step": 34410 }, { "epoch": 0.17188943544158405, "grad_norm": 0.0936218723654747, "learning_rate": 2.4905504518260783e-05, "loss": 9.8747, "step": 34420 }, { "epoch": 0.1719393742665235, "grad_norm": 0.09321941435337067, "learning_rate": 2.4904002603319233e-05, "loss": 9.891, "step": 34430 }, { "epoch": 0.17198931309146295, "grad_norm": 0.10023363679647446, "learning_rate": 2.490250068837768e-05, "loss": 9.8808, "step": 34440 }, { "epoch": 0.1720392519164024, "grad_norm": 0.09513888508081436, "learning_rate": 2.4900998773436134e-05, "loss": 9.882, "step": 34450 }, { "epoch": 0.17208919074134185, "grad_norm": 0.09558258205652237, "learning_rate": 2.489949685849458e-05, "loss": 9.8793, "step": 34460 }, { "epoch": 0.1721391295662813, "grad_norm": 0.10544897615909576, "learning_rate": 2.489799494355303e-05, "loss": 9.8799, "step": 34470 }, { "epoch": 0.17218906839122075, "grad_norm": 0.09681084007024765, "learning_rate": 2.489649302861148e-05, "loss": 9.879, "step": 34480 }, { "epoch": 0.1722390072161602, "grad_norm": 0.10312927514314651, "learning_rate": 2.4894991113669928e-05, "loss": 9.8859, "step": 34490 }, { "epoch": 0.17228894604109965, "grad_norm": 0.09519031643867493, "learning_rate": 2.489348919872838e-05, "loss": 9.8728, "step": 34500 }, { "epoch": 0.1723388848660391, "grad_norm": 0.09442928433418274, "learning_rate": 2.4891987283786828e-05, "loss": 9.8753, "step": 34510 }, { "epoch": 0.17238882369097855, "grad_norm": 0.09661301970481873, "learning_rate": 2.4890485368845278e-05, "loss": 9.8788, "step": 34520 }, { "epoch": 0.172438762515918, "grad_norm": 0.0956299677491188, "learning_rate": 2.488898345390373e-05, "loss": 9.8807, "step": 34530 }, { "epoch": 0.17248870134085745, "grad_norm": 0.097622811794281, "learning_rate": 2.488748153896218e-05, "loss": 9.8781, "step": 34540 }, { "epoch": 0.1725386401657969, "grad_norm": 0.0975886806845665, "learning_rate": 2.488597962402063e-05, "loss": 9.8766, "step": 34550 }, { "epoch": 0.17258857899073635, "grad_norm": 0.09032001346349716, "learning_rate": 2.4884477709079075e-05, "loss": 9.8834, "step": 34560 }, { "epoch": 0.1726385178156758, "grad_norm": 0.09692685306072235, "learning_rate": 2.4882975794137526e-05, "loss": 9.8768, "step": 34570 }, { "epoch": 0.17268845664061525, "grad_norm": 0.09569180756807327, "learning_rate": 2.4881473879195976e-05, "loss": 9.8756, "step": 34580 }, { "epoch": 0.1727383954655547, "grad_norm": 0.0921887531876564, "learning_rate": 2.4879971964254426e-05, "loss": 9.8815, "step": 34590 }, { "epoch": 0.17278833429049414, "grad_norm": 0.09386293590068817, "learning_rate": 2.4878470049312876e-05, "loss": 9.8763, "step": 34600 }, { "epoch": 0.1728382731154336, "grad_norm": 0.09377525746822357, "learning_rate": 2.4876968134371323e-05, "loss": 9.8792, "step": 34610 }, { "epoch": 0.17288821194037304, "grad_norm": 0.09777195751667023, "learning_rate": 2.4875466219429773e-05, "loss": 9.8779, "step": 34620 }, { "epoch": 0.1729381507653125, "grad_norm": 0.08953432738780975, "learning_rate": 2.4873964304488223e-05, "loss": 9.8801, "step": 34630 }, { "epoch": 0.17298808959025194, "grad_norm": 0.09429240971803665, "learning_rate": 2.4872462389546674e-05, "loss": 9.8756, "step": 34640 }, { "epoch": 0.1730380284151914, "grad_norm": 0.09474718570709229, "learning_rate": 2.4870960474605124e-05, "loss": 9.8751, "step": 34650 }, { "epoch": 0.17308796724013084, "grad_norm": 0.09288160502910614, "learning_rate": 2.486945855966357e-05, "loss": 9.8807, "step": 34660 }, { "epoch": 0.1731379060650703, "grad_norm": 0.09404736012220383, "learning_rate": 2.486795664472202e-05, "loss": 9.8768, "step": 34670 }, { "epoch": 0.17318784489000974, "grad_norm": 0.0975610539317131, "learning_rate": 2.486645472978047e-05, "loss": 9.8753, "step": 34680 }, { "epoch": 0.1732377837149492, "grad_norm": 0.0982472151517868, "learning_rate": 2.486495281483892e-05, "loss": 9.8792, "step": 34690 }, { "epoch": 0.17328772253988864, "grad_norm": 0.09330155700445175, "learning_rate": 2.486345089989737e-05, "loss": 9.8726, "step": 34700 }, { "epoch": 0.1733376613648281, "grad_norm": 0.09651447087526321, "learning_rate": 2.4861948984955818e-05, "loss": 9.8716, "step": 34710 }, { "epoch": 0.17338760018976754, "grad_norm": 0.09619554132223129, "learning_rate": 2.4860447070014268e-05, "loss": 9.8736, "step": 34720 }, { "epoch": 0.173437539014707, "grad_norm": 0.09537581354379654, "learning_rate": 2.485894515507272e-05, "loss": 9.8743, "step": 34730 }, { "epoch": 0.17348747783964644, "grad_norm": 0.09322098642587662, "learning_rate": 2.485744324013117e-05, "loss": 9.8832, "step": 34740 }, { "epoch": 0.1735374166645859, "grad_norm": 0.09934062510728836, "learning_rate": 2.485594132518962e-05, "loss": 9.8735, "step": 34750 }, { "epoch": 0.17358735548952534, "grad_norm": 0.09587859362363815, "learning_rate": 2.4854439410248065e-05, "loss": 9.8625, "step": 34760 }, { "epoch": 0.17363729431446479, "grad_norm": 0.09331879019737244, "learning_rate": 2.4852937495306516e-05, "loss": 9.8709, "step": 34770 }, { "epoch": 0.17368723313940423, "grad_norm": 0.0957440435886383, "learning_rate": 2.4851435580364966e-05, "loss": 9.8665, "step": 34780 }, { "epoch": 0.17373717196434368, "grad_norm": 0.10028447955846786, "learning_rate": 2.4849933665423416e-05, "loss": 9.8696, "step": 34790 }, { "epoch": 0.17378711078928313, "grad_norm": 0.09930728375911713, "learning_rate": 2.4848431750481866e-05, "loss": 9.8731, "step": 34800 }, { "epoch": 0.17383704961422258, "grad_norm": 0.09186220914125443, "learning_rate": 2.4846929835540313e-05, "loss": 9.8791, "step": 34810 }, { "epoch": 0.17388698843916203, "grad_norm": 0.09665971249341965, "learning_rate": 2.4845427920598763e-05, "loss": 9.8828, "step": 34820 }, { "epoch": 0.17393692726410148, "grad_norm": 0.09465493261814117, "learning_rate": 2.4843926005657213e-05, "loss": 9.8729, "step": 34830 }, { "epoch": 0.17398686608904093, "grad_norm": 0.09141222387552261, "learning_rate": 2.4842424090715664e-05, "loss": 9.8718, "step": 34840 }, { "epoch": 0.17403680491398038, "grad_norm": 0.09826017171144485, "learning_rate": 2.4840922175774114e-05, "loss": 9.8765, "step": 34850 }, { "epoch": 0.17408674373891983, "grad_norm": 0.10129930078983307, "learning_rate": 2.4839420260832564e-05, "loss": 9.8716, "step": 34860 }, { "epoch": 0.17413668256385928, "grad_norm": 0.09397407621145248, "learning_rate": 2.483791834589101e-05, "loss": 9.875, "step": 34870 }, { "epoch": 0.17418662138879873, "grad_norm": 0.09884011745452881, "learning_rate": 2.483641643094946e-05, "loss": 9.8708, "step": 34880 }, { "epoch": 0.17423656021373818, "grad_norm": 0.09467148035764694, "learning_rate": 2.483491451600791e-05, "loss": 9.8709, "step": 34890 }, { "epoch": 0.17428649903867763, "grad_norm": 0.0914902612566948, "learning_rate": 2.483341260106636e-05, "loss": 9.8711, "step": 34900 }, { "epoch": 0.17433643786361708, "grad_norm": 0.0950956791639328, "learning_rate": 2.483191068612481e-05, "loss": 9.8675, "step": 34910 }, { "epoch": 0.17438637668855653, "grad_norm": 0.09284093976020813, "learning_rate": 2.4830408771183258e-05, "loss": 9.8682, "step": 34920 }, { "epoch": 0.17443631551349598, "grad_norm": 0.09593179076910019, "learning_rate": 2.482890685624171e-05, "loss": 9.8714, "step": 34930 }, { "epoch": 0.17448625433843543, "grad_norm": 0.09419502317905426, "learning_rate": 2.482740494130016e-05, "loss": 9.8694, "step": 34940 }, { "epoch": 0.17453619316337488, "grad_norm": 0.09543459862470627, "learning_rate": 2.482590302635861e-05, "loss": 9.8715, "step": 34950 }, { "epoch": 0.17458613198831432, "grad_norm": 0.09066810458898544, "learning_rate": 2.482440111141706e-05, "loss": 9.8734, "step": 34960 }, { "epoch": 0.17463607081325377, "grad_norm": 0.09701773524284363, "learning_rate": 2.4822899196475506e-05, "loss": 9.8711, "step": 34970 }, { "epoch": 0.17468600963819322, "grad_norm": 0.09097738564014435, "learning_rate": 2.4821397281533956e-05, "loss": 9.875, "step": 34980 }, { "epoch": 0.17473594846313267, "grad_norm": 0.0955294743180275, "learning_rate": 2.4819895366592406e-05, "loss": 9.8632, "step": 34990 }, { "epoch": 0.17478588728807212, "grad_norm": 0.09581121802330017, "learning_rate": 2.4818393451650856e-05, "loss": 9.8698, "step": 35000 }, { "epoch": 0.17483582611301157, "grad_norm": 0.09599103778600693, "learning_rate": 2.4816891536709306e-05, "loss": 9.867, "step": 35010 }, { "epoch": 0.17488576493795102, "grad_norm": 0.1030481606721878, "learning_rate": 2.4815389621767753e-05, "loss": 9.8662, "step": 35020 }, { "epoch": 0.17493570376289047, "grad_norm": 0.09832312166690826, "learning_rate": 2.4813887706826203e-05, "loss": 9.8675, "step": 35030 }, { "epoch": 0.17498564258782992, "grad_norm": 0.09342101961374283, "learning_rate": 2.4812385791884654e-05, "loss": 9.8692, "step": 35040 }, { "epoch": 0.17503558141276937, "grad_norm": 0.09640762209892273, "learning_rate": 2.4810883876943104e-05, "loss": 9.8705, "step": 35050 }, { "epoch": 0.17508552023770882, "grad_norm": 0.09944464266300201, "learning_rate": 2.4809381962001554e-05, "loss": 9.8669, "step": 35060 }, { "epoch": 0.17513545906264827, "grad_norm": 0.09033916890621185, "learning_rate": 2.480788004706e-05, "loss": 9.8649, "step": 35070 }, { "epoch": 0.17518539788758772, "grad_norm": 0.09908484667539597, "learning_rate": 2.480637813211845e-05, "loss": 9.8719, "step": 35080 }, { "epoch": 0.17523533671252717, "grad_norm": 0.09301396459341049, "learning_rate": 2.48048762171769e-05, "loss": 9.8616, "step": 35090 }, { "epoch": 0.17528527553746662, "grad_norm": 0.09278962016105652, "learning_rate": 2.480337430223535e-05, "loss": 9.8608, "step": 35100 }, { "epoch": 0.17533521436240604, "grad_norm": 0.09971888363361359, "learning_rate": 2.48018723872938e-05, "loss": 9.8672, "step": 35110 }, { "epoch": 0.1753851531873455, "grad_norm": 0.09398912638425827, "learning_rate": 2.4800370472352248e-05, "loss": 9.8683, "step": 35120 }, { "epoch": 0.17543509201228494, "grad_norm": 0.09367619454860687, "learning_rate": 2.47988685574107e-05, "loss": 9.8646, "step": 35130 }, { "epoch": 0.1754850308372244, "grad_norm": 0.08890117704868317, "learning_rate": 2.479736664246915e-05, "loss": 9.8611, "step": 35140 }, { "epoch": 0.17553496966216384, "grad_norm": 0.09400789439678192, "learning_rate": 2.47958647275276e-05, "loss": 9.8675, "step": 35150 }, { "epoch": 0.1755849084871033, "grad_norm": 0.0941472202539444, "learning_rate": 2.479436281258605e-05, "loss": 9.8662, "step": 35160 }, { "epoch": 0.17563484731204274, "grad_norm": 0.0907822698354721, "learning_rate": 2.4792860897644496e-05, "loss": 9.8724, "step": 35170 }, { "epoch": 0.17568478613698219, "grad_norm": 0.09430518746376038, "learning_rate": 2.479135898270295e-05, "loss": 9.8677, "step": 35180 }, { "epoch": 0.17573472496192163, "grad_norm": 0.09625846147537231, "learning_rate": 2.4789857067761396e-05, "loss": 9.86, "step": 35190 }, { "epoch": 0.17578466378686108, "grad_norm": 0.09888491779565811, "learning_rate": 2.4788355152819846e-05, "loss": 9.864, "step": 35200 }, { "epoch": 0.17583460261180053, "grad_norm": 0.10357237607240677, "learning_rate": 2.4786853237878296e-05, "loss": 9.8568, "step": 35210 }, { "epoch": 0.17588454143673998, "grad_norm": 0.09656186401844025, "learning_rate": 2.4785351322936743e-05, "loss": 9.8683, "step": 35220 }, { "epoch": 0.17593448026167943, "grad_norm": 0.09330909699201584, "learning_rate": 2.4783849407995197e-05, "loss": 9.8613, "step": 35230 }, { "epoch": 0.17598441908661888, "grad_norm": 0.09761014580726624, "learning_rate": 2.4782347493053644e-05, "loss": 9.864, "step": 35240 }, { "epoch": 0.17603435791155833, "grad_norm": 0.09538024663925171, "learning_rate": 2.4780845578112094e-05, "loss": 9.8589, "step": 35250 }, { "epoch": 0.17608429673649778, "grad_norm": 0.09242220222949982, "learning_rate": 2.4779343663170544e-05, "loss": 9.8593, "step": 35260 }, { "epoch": 0.17613423556143723, "grad_norm": 0.09523601830005646, "learning_rate": 2.477784174822899e-05, "loss": 9.8591, "step": 35270 }, { "epoch": 0.17618417438637668, "grad_norm": 0.09717884659767151, "learning_rate": 2.4776339833287444e-05, "loss": 9.8658, "step": 35280 }, { "epoch": 0.17623411321131613, "grad_norm": 0.11009235680103302, "learning_rate": 2.477483791834589e-05, "loss": 9.8577, "step": 35290 }, { "epoch": 0.17628405203625558, "grad_norm": 0.09665638953447342, "learning_rate": 2.477333600340434e-05, "loss": 9.8682, "step": 35300 }, { "epoch": 0.17633399086119503, "grad_norm": 0.09212061762809753, "learning_rate": 2.477183408846279e-05, "loss": 9.8629, "step": 35310 }, { "epoch": 0.17638392968613448, "grad_norm": 0.09347554296255112, "learning_rate": 2.4770332173521238e-05, "loss": 9.8615, "step": 35320 }, { "epoch": 0.17643386851107393, "grad_norm": 0.09957797080278397, "learning_rate": 2.4768830258579692e-05, "loss": 9.8613, "step": 35330 }, { "epoch": 0.17648380733601338, "grad_norm": 0.0971374586224556, "learning_rate": 2.476732834363814e-05, "loss": 9.861, "step": 35340 }, { "epoch": 0.17653374616095283, "grad_norm": 0.09239890426397324, "learning_rate": 2.476582642869659e-05, "loss": 9.865, "step": 35350 }, { "epoch": 0.17658368498589228, "grad_norm": 0.09785747528076172, "learning_rate": 2.476432451375504e-05, "loss": 9.8611, "step": 35360 }, { "epoch": 0.17663362381083172, "grad_norm": 0.09622731059789658, "learning_rate": 2.4762822598813486e-05, "loss": 9.8552, "step": 35370 }, { "epoch": 0.17668356263577117, "grad_norm": 0.09199772030115128, "learning_rate": 2.476132068387194e-05, "loss": 9.8599, "step": 35380 }, { "epoch": 0.17673350146071062, "grad_norm": 0.09622587263584137, "learning_rate": 2.4759818768930386e-05, "loss": 9.8641, "step": 35390 }, { "epoch": 0.17678344028565007, "grad_norm": 0.09253069758415222, "learning_rate": 2.4758316853988836e-05, "loss": 9.8557, "step": 35400 }, { "epoch": 0.17683337911058952, "grad_norm": 0.09692025184631348, "learning_rate": 2.4756814939047286e-05, "loss": 9.8591, "step": 35410 }, { "epoch": 0.17688331793552897, "grad_norm": 0.0917033851146698, "learning_rate": 2.4755313024105733e-05, "loss": 9.8558, "step": 35420 }, { "epoch": 0.17693325676046842, "grad_norm": 0.09357017278671265, "learning_rate": 2.4753811109164187e-05, "loss": 9.8624, "step": 35430 }, { "epoch": 0.17698319558540787, "grad_norm": 0.09529455006122589, "learning_rate": 2.4752309194222634e-05, "loss": 9.8557, "step": 35440 }, { "epoch": 0.17703313441034732, "grad_norm": 0.0990082174539566, "learning_rate": 2.4750807279281084e-05, "loss": 9.8607, "step": 35450 }, { "epoch": 0.17708307323528677, "grad_norm": 0.09450554102659225, "learning_rate": 2.4749305364339534e-05, "loss": 9.8506, "step": 35460 }, { "epoch": 0.17713301206022622, "grad_norm": 0.09906260669231415, "learning_rate": 2.474780344939798e-05, "loss": 9.8575, "step": 35470 }, { "epoch": 0.17718295088516567, "grad_norm": 0.09663223475217819, "learning_rate": 2.4746301534456434e-05, "loss": 9.8683, "step": 35480 }, { "epoch": 0.17723288971010512, "grad_norm": 0.0887630507349968, "learning_rate": 2.474479961951488e-05, "loss": 9.8639, "step": 35490 }, { "epoch": 0.17728282853504457, "grad_norm": 0.09145206212997437, "learning_rate": 2.474329770457333e-05, "loss": 9.8622, "step": 35500 }, { "epoch": 0.17733276735998402, "grad_norm": 0.09705221652984619, "learning_rate": 2.474179578963178e-05, "loss": 9.8569, "step": 35510 }, { "epoch": 0.17738270618492347, "grad_norm": 0.09884809702634811, "learning_rate": 2.4740293874690228e-05, "loss": 9.8575, "step": 35520 }, { "epoch": 0.17743264500986292, "grad_norm": 0.09495537728071213, "learning_rate": 2.4738791959748682e-05, "loss": 9.8571, "step": 35530 }, { "epoch": 0.17748258383480237, "grad_norm": 0.0902133509516716, "learning_rate": 2.473729004480713e-05, "loss": 9.8606, "step": 35540 }, { "epoch": 0.17753252265974181, "grad_norm": 0.09612247347831726, "learning_rate": 2.4735788129865582e-05, "loss": 9.8519, "step": 35550 }, { "epoch": 0.17758246148468126, "grad_norm": 0.09739767014980316, "learning_rate": 2.473428621492403e-05, "loss": 9.8538, "step": 35560 }, { "epoch": 0.1776324003096207, "grad_norm": 0.10079686343669891, "learning_rate": 2.4732784299982476e-05, "loss": 9.8571, "step": 35570 }, { "epoch": 0.17768233913456016, "grad_norm": 0.0968751311302185, "learning_rate": 2.473128238504093e-05, "loss": 9.8485, "step": 35580 }, { "epoch": 0.1777322779594996, "grad_norm": 0.10343873500823975, "learning_rate": 2.4729780470099376e-05, "loss": 9.8578, "step": 35590 }, { "epoch": 0.17778221678443906, "grad_norm": 0.09628947824239731, "learning_rate": 2.472827855515783e-05, "loss": 9.8457, "step": 35600 }, { "epoch": 0.1778321556093785, "grad_norm": 0.08937991410493851, "learning_rate": 2.4726776640216276e-05, "loss": 9.8513, "step": 35610 }, { "epoch": 0.17788209443431796, "grad_norm": 0.09612028300762177, "learning_rate": 2.4725274725274723e-05, "loss": 9.8552, "step": 35620 }, { "epoch": 0.1779320332592574, "grad_norm": 0.09348214417695999, "learning_rate": 2.4723772810333177e-05, "loss": 9.8598, "step": 35630 }, { "epoch": 0.17798197208419686, "grad_norm": 0.09429159015417099, "learning_rate": 2.4722270895391624e-05, "loss": 9.8548, "step": 35640 }, { "epoch": 0.1780319109091363, "grad_norm": 0.09209638088941574, "learning_rate": 2.4720768980450077e-05, "loss": 9.8631, "step": 35650 }, { "epoch": 0.17808184973407576, "grad_norm": 0.1002785861492157, "learning_rate": 2.4719267065508524e-05, "loss": 9.845, "step": 35660 }, { "epoch": 0.1781317885590152, "grad_norm": 0.09212984144687653, "learning_rate": 2.471776515056697e-05, "loss": 9.8511, "step": 35670 }, { "epoch": 0.17818172738395466, "grad_norm": 0.09603580087423325, "learning_rate": 2.4716263235625424e-05, "loss": 9.8495, "step": 35680 }, { "epoch": 0.1782316662088941, "grad_norm": 0.0989808738231659, "learning_rate": 2.471476132068387e-05, "loss": 9.8624, "step": 35690 }, { "epoch": 0.17828160503383356, "grad_norm": 0.09753132611513138, "learning_rate": 2.4713259405742325e-05, "loss": 9.8588, "step": 35700 }, { "epoch": 0.178331543858773, "grad_norm": 0.09459420293569565, "learning_rate": 2.471175749080077e-05, "loss": 9.8551, "step": 35710 }, { "epoch": 0.17838148268371246, "grad_norm": 0.09288576990365982, "learning_rate": 2.4710255575859218e-05, "loss": 9.852, "step": 35720 }, { "epoch": 0.1784314215086519, "grad_norm": 0.09371695667505264, "learning_rate": 2.4708753660917672e-05, "loss": 9.8552, "step": 35730 }, { "epoch": 0.17848136033359135, "grad_norm": 0.09233945608139038, "learning_rate": 2.470725174597612e-05, "loss": 9.8561, "step": 35740 }, { "epoch": 0.1785312991585308, "grad_norm": 0.09840219467878342, "learning_rate": 2.4705749831034572e-05, "loss": 9.8552, "step": 35750 }, { "epoch": 0.17858123798347025, "grad_norm": 0.09583665430545807, "learning_rate": 2.470424791609302e-05, "loss": 9.8496, "step": 35760 }, { "epoch": 0.1786311768084097, "grad_norm": 0.09573546051979065, "learning_rate": 2.4702746001151466e-05, "loss": 9.8575, "step": 35770 }, { "epoch": 0.17868111563334915, "grad_norm": 0.09594116359949112, "learning_rate": 2.470124408620992e-05, "loss": 9.8538, "step": 35780 }, { "epoch": 0.1787310544582886, "grad_norm": 0.09792909026145935, "learning_rate": 2.4699742171268366e-05, "loss": 9.853, "step": 35790 }, { "epoch": 0.17878099328322805, "grad_norm": 0.09887704998254776, "learning_rate": 2.469824025632682e-05, "loss": 9.8519, "step": 35800 }, { "epoch": 0.1788309321081675, "grad_norm": 0.09814807772636414, "learning_rate": 2.4696738341385266e-05, "loss": 9.8464, "step": 35810 }, { "epoch": 0.17888087093310695, "grad_norm": 0.09986128658056259, "learning_rate": 2.4695236426443713e-05, "loss": 9.8488, "step": 35820 }, { "epoch": 0.1789308097580464, "grad_norm": 0.0871347114443779, "learning_rate": 2.4693734511502167e-05, "loss": 9.8522, "step": 35830 }, { "epoch": 0.17898074858298585, "grad_norm": 0.09611291438341141, "learning_rate": 2.4692232596560614e-05, "loss": 9.8551, "step": 35840 }, { "epoch": 0.1790306874079253, "grad_norm": 0.1038837879896164, "learning_rate": 2.4690730681619067e-05, "loss": 9.8544, "step": 35850 }, { "epoch": 0.17908062623286475, "grad_norm": 0.09933873265981674, "learning_rate": 2.4689228766677514e-05, "loss": 9.8551, "step": 35860 }, { "epoch": 0.1791305650578042, "grad_norm": 0.0895809531211853, "learning_rate": 2.4687726851735964e-05, "loss": 9.8486, "step": 35870 }, { "epoch": 0.17918050388274365, "grad_norm": 0.09082268923521042, "learning_rate": 2.4686224936794414e-05, "loss": 9.8563, "step": 35880 }, { "epoch": 0.1792304427076831, "grad_norm": 0.09877155721187592, "learning_rate": 2.468472302185286e-05, "loss": 9.8526, "step": 35890 }, { "epoch": 0.17928038153262255, "grad_norm": 0.09575866162776947, "learning_rate": 2.4683221106911315e-05, "loss": 9.857, "step": 35900 }, { "epoch": 0.179330320357562, "grad_norm": 0.0871095061302185, "learning_rate": 2.468171919196976e-05, "loss": 9.8527, "step": 35910 }, { "epoch": 0.17938025918250144, "grad_norm": 0.09860642999410629, "learning_rate": 2.468021727702821e-05, "loss": 9.8475, "step": 35920 }, { "epoch": 0.1794301980074409, "grad_norm": 0.09469640254974365, "learning_rate": 2.4678715362086662e-05, "loss": 9.8499, "step": 35930 }, { "epoch": 0.17948013683238034, "grad_norm": 0.09654894471168518, "learning_rate": 2.467721344714511e-05, "loss": 9.8446, "step": 35940 }, { "epoch": 0.1795300756573198, "grad_norm": 0.09052994847297668, "learning_rate": 2.4675711532203562e-05, "loss": 9.8469, "step": 35950 }, { "epoch": 0.17958001448225924, "grad_norm": 0.09217459708452225, "learning_rate": 2.467420961726201e-05, "loss": 9.8483, "step": 35960 }, { "epoch": 0.1796299533071987, "grad_norm": 0.09349222481250763, "learning_rate": 2.467270770232046e-05, "loss": 9.8498, "step": 35970 }, { "epoch": 0.17967989213213814, "grad_norm": 0.09795039892196655, "learning_rate": 2.467120578737891e-05, "loss": 9.848, "step": 35980 }, { "epoch": 0.1797298309570776, "grad_norm": 0.09592344611883163, "learning_rate": 2.4669703872437356e-05, "loss": 9.8407, "step": 35990 }, { "epoch": 0.17977976978201704, "grad_norm": 0.09404613822698593, "learning_rate": 2.466820195749581e-05, "loss": 9.8429, "step": 36000 }, { "epoch": 0.1798297086069565, "grad_norm": 0.09326544404029846, "learning_rate": 2.4666700042554256e-05, "loss": 9.8567, "step": 36010 }, { "epoch": 0.17987964743189594, "grad_norm": 0.09783726930618286, "learning_rate": 2.4665198127612707e-05, "loss": 9.8545, "step": 36020 }, { "epoch": 0.1799295862568354, "grad_norm": 0.09461402893066406, "learning_rate": 2.4663696212671157e-05, "loss": 9.8438, "step": 36030 }, { "epoch": 0.17997952508177484, "grad_norm": 0.09616295248270035, "learning_rate": 2.4662194297729604e-05, "loss": 9.8417, "step": 36040 }, { "epoch": 0.1800294639067143, "grad_norm": 0.09772490710020065, "learning_rate": 2.4660692382788057e-05, "loss": 9.8518, "step": 36050 }, { "epoch": 0.18007940273165374, "grad_norm": 0.09420907497406006, "learning_rate": 2.4659190467846504e-05, "loss": 9.8473, "step": 36060 }, { "epoch": 0.1801293415565932, "grad_norm": 0.09083731472492218, "learning_rate": 2.4657688552904954e-05, "loss": 9.8436, "step": 36070 }, { "epoch": 0.18017928038153264, "grad_norm": 0.09854559600353241, "learning_rate": 2.4656186637963404e-05, "loss": 9.8468, "step": 36080 }, { "epoch": 0.18022921920647209, "grad_norm": 0.09707348048686981, "learning_rate": 2.465468472302185e-05, "loss": 9.8503, "step": 36090 }, { "epoch": 0.1802791580314115, "grad_norm": 0.09600484371185303, "learning_rate": 2.4653182808080305e-05, "loss": 9.8457, "step": 36100 }, { "epoch": 0.18032909685635096, "grad_norm": 0.0931532233953476, "learning_rate": 2.465168089313875e-05, "loss": 9.8469, "step": 36110 }, { "epoch": 0.1803790356812904, "grad_norm": 0.09178706258535385, "learning_rate": 2.46501789781972e-05, "loss": 9.8432, "step": 36120 }, { "epoch": 0.18042897450622986, "grad_norm": 0.09376762807369232, "learning_rate": 2.4648677063255652e-05, "loss": 9.8451, "step": 36130 }, { "epoch": 0.1804789133311693, "grad_norm": 0.09681384265422821, "learning_rate": 2.46471751483141e-05, "loss": 9.8427, "step": 36140 }, { "epoch": 0.18052885215610875, "grad_norm": 0.09608221054077148, "learning_rate": 2.4645673233372552e-05, "loss": 9.8498, "step": 36150 }, { "epoch": 0.1805787909810482, "grad_norm": 0.0937625989317894, "learning_rate": 2.4644171318431e-05, "loss": 9.8502, "step": 36160 }, { "epoch": 0.18062872980598765, "grad_norm": 0.09719646722078323, "learning_rate": 2.464266940348945e-05, "loss": 9.849, "step": 36170 }, { "epoch": 0.1806786686309271, "grad_norm": 0.09256922453641891, "learning_rate": 2.46411674885479e-05, "loss": 9.8471, "step": 36180 }, { "epoch": 0.18072860745586655, "grad_norm": 0.09239103645086288, "learning_rate": 2.463966557360635e-05, "loss": 9.8389, "step": 36190 }, { "epoch": 0.180778546280806, "grad_norm": 0.09770379960536957, "learning_rate": 2.46381636586648e-05, "loss": 9.8456, "step": 36200 }, { "epoch": 0.18082848510574545, "grad_norm": 0.09287633001804352, "learning_rate": 2.4636661743723246e-05, "loss": 9.8502, "step": 36210 }, { "epoch": 0.1808784239306849, "grad_norm": 0.0940169245004654, "learning_rate": 2.4635159828781697e-05, "loss": 9.848, "step": 36220 }, { "epoch": 0.18092836275562435, "grad_norm": 0.0958079993724823, "learning_rate": 2.4633657913840147e-05, "loss": 9.8366, "step": 36230 }, { "epoch": 0.1809783015805638, "grad_norm": 0.09197930246591568, "learning_rate": 2.4632155998898597e-05, "loss": 12.8969, "step": 36240 }, { "epoch": 0.18102824040550325, "grad_norm": 0.08773362636566162, "learning_rate": 2.4630654083957047e-05, "loss": 14.5353, "step": 36250 }, { "epoch": 0.1810781792304427, "grad_norm": 0.10025262087583542, "learning_rate": 2.4629152169015494e-05, "loss": 9.8401, "step": 36260 }, { "epoch": 0.18112811805538215, "grad_norm": 0.0945044681429863, "learning_rate": 2.4627650254073944e-05, "loss": 9.8424, "step": 36270 }, { "epoch": 0.1811780568803216, "grad_norm": 0.0924852043390274, "learning_rate": 2.4626148339132394e-05, "loss": 9.8448, "step": 36280 }, { "epoch": 0.18122799570526105, "grad_norm": 0.09346658736467361, "learning_rate": 2.4624646424190845e-05, "loss": 9.8408, "step": 36290 }, { "epoch": 0.1812779345302005, "grad_norm": 0.10162097215652466, "learning_rate": 2.4623144509249295e-05, "loss": 9.8382, "step": 36300 }, { "epoch": 0.18132787335513995, "grad_norm": 0.093153215944767, "learning_rate": 2.462164259430774e-05, "loss": 9.8415, "step": 36310 }, { "epoch": 0.1813778121800794, "grad_norm": 0.09772991389036179, "learning_rate": 2.462014067936619e-05, "loss": 9.8463, "step": 36320 }, { "epoch": 0.18142775100501884, "grad_norm": 0.09410078823566437, "learning_rate": 2.4618638764424642e-05, "loss": 9.8374, "step": 36330 }, { "epoch": 0.1814776898299583, "grad_norm": 0.09498627483844757, "learning_rate": 2.4617136849483092e-05, "loss": 9.8382, "step": 36340 }, { "epoch": 0.18152762865489774, "grad_norm": 0.09718122333288193, "learning_rate": 2.4615634934541542e-05, "loss": 9.839, "step": 36350 }, { "epoch": 0.1815775674798372, "grad_norm": 0.09762446582317352, "learning_rate": 2.461413301959999e-05, "loss": 9.8365, "step": 36360 }, { "epoch": 0.18162750630477664, "grad_norm": 0.09202492982149124, "learning_rate": 2.461263110465844e-05, "loss": 9.8351, "step": 36370 }, { "epoch": 0.1816774451297161, "grad_norm": 0.09190283715724945, "learning_rate": 2.461112918971689e-05, "loss": 9.8375, "step": 36380 }, { "epoch": 0.18172738395465554, "grad_norm": 0.09559813141822815, "learning_rate": 2.460962727477534e-05, "loss": 9.8424, "step": 36390 }, { "epoch": 0.181777322779595, "grad_norm": 0.09660519659519196, "learning_rate": 2.460812535983379e-05, "loss": 9.8377, "step": 36400 }, { "epoch": 0.18182726160453444, "grad_norm": 0.0976775512099266, "learning_rate": 2.4606623444892236e-05, "loss": 9.841, "step": 36410 }, { "epoch": 0.1818772004294739, "grad_norm": 0.10077891498804092, "learning_rate": 2.4605121529950687e-05, "loss": 9.8422, "step": 36420 }, { "epoch": 0.18192713925441334, "grad_norm": 0.09487045556306839, "learning_rate": 2.4603619615009137e-05, "loss": 9.8388, "step": 36430 }, { "epoch": 0.1819770780793528, "grad_norm": 0.08921613544225693, "learning_rate": 2.4602117700067587e-05, "loss": 9.8387, "step": 36440 }, { "epoch": 0.18202701690429224, "grad_norm": 0.09209630638360977, "learning_rate": 2.4600615785126037e-05, "loss": 9.8364, "step": 36450 }, { "epoch": 0.1820769557292317, "grad_norm": 0.09444855153560638, "learning_rate": 2.4599113870184484e-05, "loss": 9.8348, "step": 36460 }, { "epoch": 0.18212689455417114, "grad_norm": 0.10311925411224365, "learning_rate": 2.4597611955242934e-05, "loss": 9.8378, "step": 36470 }, { "epoch": 0.1821768333791106, "grad_norm": 0.09053095430135727, "learning_rate": 2.4596110040301384e-05, "loss": 9.8406, "step": 36480 }, { "epoch": 0.18222677220405004, "grad_norm": 0.09320933371782303, "learning_rate": 2.4594608125359835e-05, "loss": 9.8384, "step": 36490 }, { "epoch": 0.18227671102898949, "grad_norm": 0.09924657642841339, "learning_rate": 2.4593106210418285e-05, "loss": 9.8366, "step": 36500 }, { "epoch": 0.18232664985392893, "grad_norm": 0.09330344945192337, "learning_rate": 2.4591604295476735e-05, "loss": 9.829, "step": 36510 }, { "epoch": 0.18237658867886838, "grad_norm": 0.0905805230140686, "learning_rate": 2.459010238053518e-05, "loss": 9.8449, "step": 36520 }, { "epoch": 0.18242652750380783, "grad_norm": 0.09339253604412079, "learning_rate": 2.4588600465593632e-05, "loss": 9.8361, "step": 36530 }, { "epoch": 0.18247646632874728, "grad_norm": 0.09230770170688629, "learning_rate": 2.4587098550652082e-05, "loss": 9.8377, "step": 36540 }, { "epoch": 0.18252640515368673, "grad_norm": 0.09087874740362167, "learning_rate": 2.4585596635710532e-05, "loss": 9.8389, "step": 36550 }, { "epoch": 0.18257634397862618, "grad_norm": 0.09459523111581802, "learning_rate": 2.4584094720768982e-05, "loss": 9.8339, "step": 36560 }, { "epoch": 0.18262628280356563, "grad_norm": 0.09835857897996902, "learning_rate": 2.458259280582743e-05, "loss": 9.8379, "step": 36570 }, { "epoch": 0.18267622162850508, "grad_norm": 0.09462913870811462, "learning_rate": 2.458109089088588e-05, "loss": 9.8366, "step": 36580 }, { "epoch": 0.18272616045344453, "grad_norm": 0.09966031461954117, "learning_rate": 2.457958897594433e-05, "loss": 9.8301, "step": 36590 }, { "epoch": 0.18277609927838398, "grad_norm": 0.09340931475162506, "learning_rate": 2.457808706100278e-05, "loss": 9.8389, "step": 36600 }, { "epoch": 0.18282603810332343, "grad_norm": 0.09110315144062042, "learning_rate": 2.457658514606123e-05, "loss": 9.8342, "step": 36610 }, { "epoch": 0.18287597692826288, "grad_norm": 0.09554065018892288, "learning_rate": 2.4575083231119677e-05, "loss": 9.8408, "step": 36620 }, { "epoch": 0.18292591575320233, "grad_norm": 0.09898494929075241, "learning_rate": 2.4573581316178127e-05, "loss": 9.8364, "step": 36630 }, { "epoch": 0.18297585457814178, "grad_norm": 0.09261037409305573, "learning_rate": 2.4572079401236577e-05, "loss": 9.8335, "step": 36640 }, { "epoch": 0.18302579340308123, "grad_norm": 0.09171412140130997, "learning_rate": 2.4570577486295027e-05, "loss": 9.8286, "step": 36650 }, { "epoch": 0.18307573222802068, "grad_norm": 0.09472862631082535, "learning_rate": 2.4569075571353477e-05, "loss": 9.8358, "step": 36660 }, { "epoch": 0.18312567105296013, "grad_norm": 0.09369245171546936, "learning_rate": 2.4567573656411924e-05, "loss": 9.8337, "step": 36670 }, { "epoch": 0.18317560987789958, "grad_norm": 0.09198103100061417, "learning_rate": 2.4566071741470374e-05, "loss": 9.8384, "step": 36680 }, { "epoch": 0.18322554870283903, "grad_norm": 0.09761819988489151, "learning_rate": 2.4564569826528825e-05, "loss": 9.833, "step": 36690 }, { "epoch": 0.18327548752777847, "grad_norm": 0.09576039016246796, "learning_rate": 2.4563067911587275e-05, "loss": 9.8346, "step": 36700 }, { "epoch": 0.18332542635271792, "grad_norm": 0.10029350966215134, "learning_rate": 2.4561565996645725e-05, "loss": 9.8383, "step": 36710 }, { "epoch": 0.18337536517765737, "grad_norm": 0.09115588665008545, "learning_rate": 2.456006408170417e-05, "loss": 9.8311, "step": 36720 }, { "epoch": 0.18342530400259682, "grad_norm": 0.09495572000741959, "learning_rate": 2.4558562166762622e-05, "loss": 9.8296, "step": 36730 }, { "epoch": 0.18347524282753627, "grad_norm": 0.0938972532749176, "learning_rate": 2.4557060251821072e-05, "loss": 9.8303, "step": 36740 }, { "epoch": 0.18352518165247572, "grad_norm": 0.0916903167963028, "learning_rate": 2.4555558336879522e-05, "loss": 9.8256, "step": 36750 }, { "epoch": 0.18357512047741517, "grad_norm": 0.09134495258331299, "learning_rate": 2.4554056421937972e-05, "loss": 9.8269, "step": 36760 }, { "epoch": 0.18362505930235462, "grad_norm": 0.09667996317148209, "learning_rate": 2.455255450699642e-05, "loss": 9.8308, "step": 36770 }, { "epoch": 0.18367499812729407, "grad_norm": 0.09833179414272308, "learning_rate": 2.455105259205487e-05, "loss": 9.8352, "step": 36780 }, { "epoch": 0.18372493695223352, "grad_norm": 0.09507912397384644, "learning_rate": 2.454955067711332e-05, "loss": 9.8326, "step": 36790 }, { "epoch": 0.18377487577717297, "grad_norm": 0.09790566563606262, "learning_rate": 2.454804876217177e-05, "loss": 9.831, "step": 36800 }, { "epoch": 0.18382481460211242, "grad_norm": 0.1005285233259201, "learning_rate": 2.454654684723022e-05, "loss": 9.8316, "step": 36810 }, { "epoch": 0.18387475342705187, "grad_norm": 0.09769100695848465, "learning_rate": 2.4545044932288667e-05, "loss": 9.8273, "step": 36820 }, { "epoch": 0.18392469225199132, "grad_norm": 0.09412973374128342, "learning_rate": 2.454354301734712e-05, "loss": 9.826, "step": 36830 }, { "epoch": 0.18397463107693077, "grad_norm": 0.09262508153915405, "learning_rate": 2.4542041102405567e-05, "loss": 9.8291, "step": 36840 }, { "epoch": 0.18402456990187022, "grad_norm": 0.09512876719236374, "learning_rate": 2.4540539187464017e-05, "loss": 9.8278, "step": 36850 }, { "epoch": 0.18407450872680967, "grad_norm": 0.0943412259221077, "learning_rate": 2.4539037272522467e-05, "loss": 9.8363, "step": 36860 }, { "epoch": 0.18412444755174912, "grad_norm": 0.09464114904403687, "learning_rate": 2.4537535357580914e-05, "loss": 9.8294, "step": 36870 }, { "epoch": 0.18417438637668856, "grad_norm": 0.09377477318048477, "learning_rate": 2.4536033442639368e-05, "loss": 9.8332, "step": 36880 }, { "epoch": 0.18422432520162801, "grad_norm": 0.09622970223426819, "learning_rate": 2.4534531527697815e-05, "loss": 9.8243, "step": 36890 }, { "epoch": 0.18427426402656746, "grad_norm": 0.09202242642641068, "learning_rate": 2.4533029612756265e-05, "loss": 9.8243, "step": 36900 }, { "epoch": 0.1843242028515069, "grad_norm": 0.09235205501317978, "learning_rate": 2.4531527697814715e-05, "loss": 9.8343, "step": 36910 }, { "epoch": 0.18437414167644636, "grad_norm": 0.09455617517232895, "learning_rate": 2.4530025782873162e-05, "loss": 9.8255, "step": 36920 }, { "epoch": 0.1844240805013858, "grad_norm": 0.09160789847373962, "learning_rate": 2.4528523867931615e-05, "loss": 9.8267, "step": 36930 }, { "epoch": 0.18447401932632526, "grad_norm": 0.09022436290979385, "learning_rate": 2.4527021952990062e-05, "loss": 9.8328, "step": 36940 }, { "epoch": 0.1845239581512647, "grad_norm": 0.09537603706121445, "learning_rate": 2.4525520038048512e-05, "loss": 9.8258, "step": 36950 }, { "epoch": 0.18457389697620416, "grad_norm": 0.09323366731405258, "learning_rate": 2.4524018123106962e-05, "loss": 9.8279, "step": 36960 }, { "epoch": 0.1846238358011436, "grad_norm": 0.0941099300980568, "learning_rate": 2.452251620816541e-05, "loss": 9.8292, "step": 36970 }, { "epoch": 0.18467377462608306, "grad_norm": 0.0980052724480629, "learning_rate": 2.4521014293223863e-05, "loss": 9.8218, "step": 36980 }, { "epoch": 0.1847237134510225, "grad_norm": 0.09246113896369934, "learning_rate": 2.451951237828231e-05, "loss": 9.8246, "step": 36990 }, { "epoch": 0.18477365227596196, "grad_norm": 0.09564212709665298, "learning_rate": 2.451801046334076e-05, "loss": 9.8209, "step": 37000 }, { "epoch": 0.1848235911009014, "grad_norm": 0.09970725327730179, "learning_rate": 2.451650854839921e-05, "loss": 9.8251, "step": 37010 }, { "epoch": 0.18487352992584086, "grad_norm": 0.09713415801525116, "learning_rate": 2.4515006633457657e-05, "loss": 9.8269, "step": 37020 }, { "epoch": 0.1849234687507803, "grad_norm": 0.09196287393569946, "learning_rate": 2.451350471851611e-05, "loss": 9.827, "step": 37030 }, { "epoch": 0.18497340757571976, "grad_norm": 0.09455425292253494, "learning_rate": 2.4512002803574557e-05, "loss": 9.8193, "step": 37040 }, { "epoch": 0.1850233464006592, "grad_norm": 0.09092764556407928, "learning_rate": 2.4510500888633007e-05, "loss": 9.8283, "step": 37050 }, { "epoch": 0.18507328522559865, "grad_norm": 0.0928642749786377, "learning_rate": 2.4508998973691457e-05, "loss": 9.8273, "step": 37060 }, { "epoch": 0.1851232240505381, "grad_norm": 0.09685216099023819, "learning_rate": 2.4507497058749904e-05, "loss": 9.8225, "step": 37070 }, { "epoch": 0.18517316287547755, "grad_norm": 0.0929524376988411, "learning_rate": 2.4505995143808358e-05, "loss": 9.8339, "step": 37080 }, { "epoch": 0.18522310170041698, "grad_norm": 0.09492181241512299, "learning_rate": 2.4504493228866805e-05, "loss": 9.8241, "step": 37090 }, { "epoch": 0.18527304052535642, "grad_norm": 0.09493514150381088, "learning_rate": 2.4502991313925255e-05, "loss": 9.8202, "step": 37100 }, { "epoch": 0.18532297935029587, "grad_norm": 0.0974758043885231, "learning_rate": 2.4501489398983705e-05, "loss": 9.8215, "step": 37110 }, { "epoch": 0.18537291817523532, "grad_norm": 0.09460314363241196, "learning_rate": 2.4499987484042152e-05, "loss": 9.8209, "step": 37120 }, { "epoch": 0.18542285700017477, "grad_norm": 0.10088241845369339, "learning_rate": 2.4498485569100605e-05, "loss": 9.8161, "step": 37130 }, { "epoch": 0.18547279582511422, "grad_norm": 0.09594068676233292, "learning_rate": 2.4496983654159052e-05, "loss": 9.8158, "step": 37140 }, { "epoch": 0.18552273465005367, "grad_norm": 0.09308291226625443, "learning_rate": 2.4495481739217506e-05, "loss": 9.8229, "step": 37150 }, { "epoch": 0.18557267347499312, "grad_norm": 0.09384151548147202, "learning_rate": 2.4493979824275952e-05, "loss": 9.8258, "step": 37160 }, { "epoch": 0.18562261229993257, "grad_norm": 0.09089542925357819, "learning_rate": 2.44924779093344e-05, "loss": 9.8214, "step": 37170 }, { "epoch": 0.18567255112487202, "grad_norm": 0.10226479917764664, "learning_rate": 2.4490975994392853e-05, "loss": 9.8194, "step": 37180 }, { "epoch": 0.18572248994981147, "grad_norm": 0.09530473500490189, "learning_rate": 2.44894740794513e-05, "loss": 9.8214, "step": 37190 }, { "epoch": 0.18577242877475092, "grad_norm": 0.09461791068315506, "learning_rate": 2.4487972164509753e-05, "loss": 9.8234, "step": 37200 }, { "epoch": 0.18582236759969037, "grad_norm": 0.09858336299657822, "learning_rate": 2.44864702495682e-05, "loss": 9.8273, "step": 37210 }, { "epoch": 0.18587230642462982, "grad_norm": 0.0900527834892273, "learning_rate": 2.4484968334626647e-05, "loss": 9.8225, "step": 37220 }, { "epoch": 0.18592224524956927, "grad_norm": 0.09260288625955582, "learning_rate": 2.44834664196851e-05, "loss": 9.8215, "step": 37230 }, { "epoch": 0.18597218407450872, "grad_norm": 0.0938689112663269, "learning_rate": 2.4481964504743547e-05, "loss": 9.8189, "step": 37240 }, { "epoch": 0.18602212289944817, "grad_norm": 0.09121273458003998, "learning_rate": 2.4480462589802e-05, "loss": 9.819, "step": 37250 }, { "epoch": 0.18607206172438762, "grad_norm": 0.09315800666809082, "learning_rate": 2.4478960674860447e-05, "loss": 9.8215, "step": 37260 }, { "epoch": 0.18612200054932707, "grad_norm": 0.09155386686325073, "learning_rate": 2.4477458759918898e-05, "loss": 9.8212, "step": 37270 }, { "epoch": 0.18617193937426652, "grad_norm": 0.09750566631555557, "learning_rate": 2.4475956844977348e-05, "loss": 9.8185, "step": 37280 }, { "epoch": 0.18622187819920596, "grad_norm": 0.09988453984260559, "learning_rate": 2.4474454930035795e-05, "loss": 9.8174, "step": 37290 }, { "epoch": 0.18627181702414541, "grad_norm": 0.09393157064914703, "learning_rate": 2.4472953015094248e-05, "loss": 9.8232, "step": 37300 }, { "epoch": 0.18632175584908486, "grad_norm": 0.09441966563463211, "learning_rate": 2.4471451100152695e-05, "loss": 9.8228, "step": 37310 }, { "epoch": 0.1863716946740243, "grad_norm": 0.091115802526474, "learning_rate": 2.4469949185211145e-05, "loss": 9.8182, "step": 37320 }, { "epoch": 0.18642163349896376, "grad_norm": 0.09511063247919083, "learning_rate": 2.4468447270269595e-05, "loss": 9.8202, "step": 37330 }, { "epoch": 0.1864715723239032, "grad_norm": 0.10219678282737732, "learning_rate": 2.4466945355328042e-05, "loss": 9.8191, "step": 37340 }, { "epoch": 0.18652151114884266, "grad_norm": 0.09869439899921417, "learning_rate": 2.4465443440386496e-05, "loss": 9.8132, "step": 37350 }, { "epoch": 0.1865714499737821, "grad_norm": 0.09464725852012634, "learning_rate": 2.4463941525444942e-05, "loss": 9.8219, "step": 37360 }, { "epoch": 0.18662138879872156, "grad_norm": 0.08857977390289307, "learning_rate": 2.4462439610503393e-05, "loss": 9.8222, "step": 37370 }, { "epoch": 0.186671327623661, "grad_norm": 0.09524288773536682, "learning_rate": 2.4460937695561843e-05, "loss": 9.8197, "step": 37380 }, { "epoch": 0.18672126644860046, "grad_norm": 0.09553853422403336, "learning_rate": 2.445943578062029e-05, "loss": 9.8198, "step": 37390 }, { "epoch": 0.1867712052735399, "grad_norm": 0.10078942030668259, "learning_rate": 2.4457933865678743e-05, "loss": 9.8121, "step": 37400 }, { "epoch": 0.18682114409847936, "grad_norm": 0.09711633622646332, "learning_rate": 2.445643195073719e-05, "loss": 9.8176, "step": 37410 }, { "epoch": 0.1868710829234188, "grad_norm": 0.0985088124871254, "learning_rate": 2.445493003579564e-05, "loss": 9.8125, "step": 37420 }, { "epoch": 0.18692102174835826, "grad_norm": 0.10270990431308746, "learning_rate": 2.445342812085409e-05, "loss": 9.824, "step": 37430 }, { "epoch": 0.1869709605732977, "grad_norm": 0.09112194925546646, "learning_rate": 2.4451926205912537e-05, "loss": 9.8115, "step": 37440 }, { "epoch": 0.18702089939823716, "grad_norm": 0.09221579879522324, "learning_rate": 2.445042429097099e-05, "loss": 9.8166, "step": 37450 }, { "epoch": 0.1870708382231766, "grad_norm": 0.09492717683315277, "learning_rate": 2.4448922376029437e-05, "loss": 9.816, "step": 37460 }, { "epoch": 0.18712077704811605, "grad_norm": 0.09594427794218063, "learning_rate": 2.4447420461087888e-05, "loss": 9.8106, "step": 37470 }, { "epoch": 0.1871707158730555, "grad_norm": 0.10151471197605133, "learning_rate": 2.4445918546146338e-05, "loss": 9.8237, "step": 37480 }, { "epoch": 0.18722065469799495, "grad_norm": 0.09668134897947311, "learning_rate": 2.4444416631204785e-05, "loss": 9.8231, "step": 37490 }, { "epoch": 0.1872705935229344, "grad_norm": 0.09197778254747391, "learning_rate": 2.4442914716263238e-05, "loss": 9.817, "step": 37500 }, { "epoch": 0.18732053234787385, "grad_norm": 0.09514784067869186, "learning_rate": 2.4441412801321685e-05, "loss": 9.817, "step": 37510 }, { "epoch": 0.1873704711728133, "grad_norm": 0.09452585130929947, "learning_rate": 2.4439910886380135e-05, "loss": 9.8115, "step": 37520 }, { "epoch": 0.18742040999775275, "grad_norm": 0.09140285104513168, "learning_rate": 2.4438408971438585e-05, "loss": 9.8115, "step": 37530 }, { "epoch": 0.1874703488226922, "grad_norm": 0.10165819525718689, "learning_rate": 2.4436907056497032e-05, "loss": 9.8183, "step": 37540 }, { "epoch": 0.18752028764763165, "grad_norm": 0.09387430548667908, "learning_rate": 2.4435405141555486e-05, "loss": 9.8187, "step": 37550 }, { "epoch": 0.1875702264725711, "grad_norm": 0.09394615888595581, "learning_rate": 2.4433903226613932e-05, "loss": 9.8146, "step": 37560 }, { "epoch": 0.18762016529751055, "grad_norm": 0.09397023171186447, "learning_rate": 2.4432401311672386e-05, "loss": 9.8182, "step": 37570 }, { "epoch": 0.18767010412245, "grad_norm": 0.09779095649719238, "learning_rate": 2.4430899396730833e-05, "loss": 9.81, "step": 37580 }, { "epoch": 0.18772004294738945, "grad_norm": 0.09826376289129257, "learning_rate": 2.442939748178928e-05, "loss": 9.8129, "step": 37590 }, { "epoch": 0.1877699817723289, "grad_norm": 0.09404350817203522, "learning_rate": 2.4427895566847733e-05, "loss": 9.813, "step": 37600 }, { "epoch": 0.18781992059726835, "grad_norm": 0.09514794498682022, "learning_rate": 2.442639365190618e-05, "loss": 9.8125, "step": 37610 }, { "epoch": 0.1878698594222078, "grad_norm": 0.09750908613204956, "learning_rate": 2.4424891736964634e-05, "loss": 9.8146, "step": 37620 }, { "epoch": 0.18791979824714725, "grad_norm": 0.09314355999231339, "learning_rate": 2.442338982202308e-05, "loss": 9.8149, "step": 37630 }, { "epoch": 0.1879697370720867, "grad_norm": 0.08955375105142593, "learning_rate": 2.4421887907081527e-05, "loss": 9.8092, "step": 37640 }, { "epoch": 0.18801967589702615, "grad_norm": 0.09085706621408463, "learning_rate": 2.442038599213998e-05, "loss": 9.8129, "step": 37650 }, { "epoch": 0.1880696147219656, "grad_norm": 0.09261947870254517, "learning_rate": 2.4418884077198427e-05, "loss": 9.8183, "step": 37660 }, { "epoch": 0.18811955354690504, "grad_norm": 0.09396150708198547, "learning_rate": 2.441738216225688e-05, "loss": 9.8103, "step": 37670 }, { "epoch": 0.1881694923718445, "grad_norm": 0.10040120780467987, "learning_rate": 2.4415880247315328e-05, "loss": 9.815, "step": 37680 }, { "epoch": 0.18821943119678394, "grad_norm": 0.09295698255300522, "learning_rate": 2.4414378332373775e-05, "loss": 9.8175, "step": 37690 }, { "epoch": 0.1882693700217234, "grad_norm": 0.09030512720346451, "learning_rate": 2.4412876417432228e-05, "loss": 9.8094, "step": 37700 }, { "epoch": 0.18831930884666284, "grad_norm": 0.08929255604743958, "learning_rate": 2.4411374502490675e-05, "loss": 9.8134, "step": 37710 }, { "epoch": 0.1883692476716023, "grad_norm": 0.09024254232645035, "learning_rate": 2.440987258754913e-05, "loss": 9.816, "step": 37720 }, { "epoch": 0.18841918649654174, "grad_norm": 0.09843594580888748, "learning_rate": 2.4408370672607575e-05, "loss": 9.8173, "step": 37730 }, { "epoch": 0.1884691253214812, "grad_norm": 0.09375924617052078, "learning_rate": 2.4406868757666022e-05, "loss": 9.8139, "step": 37740 }, { "epoch": 0.18851906414642064, "grad_norm": 0.09652825444936752, "learning_rate": 2.4405366842724476e-05, "loss": 9.8105, "step": 37750 }, { "epoch": 0.1885690029713601, "grad_norm": 0.09221908450126648, "learning_rate": 2.4403864927782922e-05, "loss": 9.8123, "step": 37760 }, { "epoch": 0.18861894179629954, "grad_norm": 0.0924220085144043, "learning_rate": 2.4402363012841376e-05, "loss": 9.8106, "step": 37770 }, { "epoch": 0.188668880621239, "grad_norm": 0.09092016518115997, "learning_rate": 2.4400861097899823e-05, "loss": 9.8147, "step": 37780 }, { "epoch": 0.18871881944617844, "grad_norm": 0.09375354647636414, "learning_rate": 2.4399359182958273e-05, "loss": 9.8052, "step": 37790 }, { "epoch": 0.1887687582711179, "grad_norm": 0.09391508996486664, "learning_rate": 2.4397857268016723e-05, "loss": 9.8157, "step": 37800 }, { "epoch": 0.18881869709605734, "grad_norm": 0.0947243720293045, "learning_rate": 2.439635535307517e-05, "loss": 9.808, "step": 37810 }, { "epoch": 0.18886863592099679, "grad_norm": 0.09377534687519073, "learning_rate": 2.4394853438133624e-05, "loss": 9.8004, "step": 37820 }, { "epoch": 0.18891857474593624, "grad_norm": 0.09614383429288864, "learning_rate": 2.439335152319207e-05, "loss": 9.8076, "step": 37830 }, { "epoch": 0.18896851357087568, "grad_norm": 0.0933867022395134, "learning_rate": 2.439184960825052e-05, "loss": 9.8132, "step": 37840 }, { "epoch": 0.18901845239581513, "grad_norm": 0.0957120954990387, "learning_rate": 2.439034769330897e-05, "loss": 9.8068, "step": 37850 }, { "epoch": 0.18906839122075458, "grad_norm": 0.09493666887283325, "learning_rate": 2.4388845778367417e-05, "loss": 9.8015, "step": 37860 }, { "epoch": 0.18911833004569403, "grad_norm": 0.09380702674388885, "learning_rate": 2.438734386342587e-05, "loss": 9.8049, "step": 37870 }, { "epoch": 0.18916826887063348, "grad_norm": 0.09432664513587952, "learning_rate": 2.4385841948484318e-05, "loss": 9.8008, "step": 37880 }, { "epoch": 0.18921820769557293, "grad_norm": 0.09340386092662811, "learning_rate": 2.4384340033542768e-05, "loss": 9.8072, "step": 37890 }, { "epoch": 0.18926814652051238, "grad_norm": 0.09343277662992477, "learning_rate": 2.4382838118601218e-05, "loss": 9.8092, "step": 37900 }, { "epoch": 0.18931808534545183, "grad_norm": 0.09355462342500687, "learning_rate": 2.4381336203659665e-05, "loss": 9.8062, "step": 37910 }, { "epoch": 0.18936802417039128, "grad_norm": 0.09270920604467392, "learning_rate": 2.437983428871812e-05, "loss": 9.8113, "step": 37920 }, { "epoch": 0.18941796299533073, "grad_norm": 0.09088815748691559, "learning_rate": 2.4378332373776565e-05, "loss": 9.8089, "step": 37930 }, { "epoch": 0.18946790182027018, "grad_norm": 0.09358256310224533, "learning_rate": 2.4376830458835016e-05, "loss": 9.8005, "step": 37940 }, { "epoch": 0.18951784064520963, "grad_norm": 0.09258565306663513, "learning_rate": 2.4375328543893466e-05, "loss": 9.8029, "step": 37950 }, { "epoch": 0.18956777947014908, "grad_norm": 0.09664854407310486, "learning_rate": 2.4373826628951912e-05, "loss": 9.8015, "step": 37960 }, { "epoch": 0.18961771829508853, "grad_norm": 0.09192045032978058, "learning_rate": 2.4372324714010366e-05, "loss": 9.8067, "step": 37970 }, { "epoch": 0.18966765712002798, "grad_norm": 0.09485509991645813, "learning_rate": 2.4370822799068813e-05, "loss": 9.8016, "step": 37980 }, { "epoch": 0.18971759594496743, "grad_norm": 0.09736743569374084, "learning_rate": 2.4369320884127263e-05, "loss": 9.8028, "step": 37990 }, { "epoch": 0.18976753476990688, "grad_norm": 0.09561127424240112, "learning_rate": 2.4367818969185713e-05, "loss": 9.803, "step": 38000 }, { "epoch": 0.18981747359484633, "grad_norm": 0.0922846645116806, "learning_rate": 2.436631705424416e-05, "loss": 9.8083, "step": 38010 }, { "epoch": 0.18986741241978577, "grad_norm": 0.09271165728569031, "learning_rate": 2.4364815139302614e-05, "loss": 9.8141, "step": 38020 }, { "epoch": 0.18991735124472522, "grad_norm": 0.09106654673814774, "learning_rate": 2.436331322436106e-05, "loss": 9.8048, "step": 38030 }, { "epoch": 0.18996729006966467, "grad_norm": 0.09738942235708237, "learning_rate": 2.436181130941951e-05, "loss": 9.8077, "step": 38040 }, { "epoch": 0.19001722889460412, "grad_norm": 0.09800420701503754, "learning_rate": 2.436030939447796e-05, "loss": 9.8041, "step": 38050 }, { "epoch": 0.19006716771954357, "grad_norm": 0.09482547640800476, "learning_rate": 2.4358807479536407e-05, "loss": 9.804, "step": 38060 }, { "epoch": 0.19011710654448302, "grad_norm": 0.09160934388637543, "learning_rate": 2.435730556459486e-05, "loss": 9.8012, "step": 38070 }, { "epoch": 0.19016704536942244, "grad_norm": 0.09235961735248566, "learning_rate": 2.4355803649653308e-05, "loss": 9.8039, "step": 38080 }, { "epoch": 0.1902169841943619, "grad_norm": 0.0927954837679863, "learning_rate": 2.4354301734711758e-05, "loss": 9.805, "step": 38090 }, { "epoch": 0.19026692301930134, "grad_norm": 0.09588232636451721, "learning_rate": 2.4352799819770208e-05, "loss": 9.8001, "step": 38100 }, { "epoch": 0.1903168618442408, "grad_norm": 0.09209771454334259, "learning_rate": 2.435129790482866e-05, "loss": 9.7988, "step": 38110 }, { "epoch": 0.19036680066918024, "grad_norm": 0.09557371586561203, "learning_rate": 2.434979598988711e-05, "loss": 9.7982, "step": 38120 }, { "epoch": 0.1904167394941197, "grad_norm": 0.08873193711042404, "learning_rate": 2.4348294074945555e-05, "loss": 9.8029, "step": 38130 }, { "epoch": 0.19046667831905914, "grad_norm": 0.09229250252246857, "learning_rate": 2.4346792160004006e-05, "loss": 9.8071, "step": 38140 }, { "epoch": 0.1905166171439986, "grad_norm": 0.09603758901357651, "learning_rate": 2.4345290245062456e-05, "loss": 9.8052, "step": 38150 }, { "epoch": 0.19056655596893804, "grad_norm": 0.09332188963890076, "learning_rate": 2.4343788330120906e-05, "loss": 9.7968, "step": 38160 }, { "epoch": 0.1906164947938775, "grad_norm": 0.09354902803897858, "learning_rate": 2.4342286415179356e-05, "loss": 9.7974, "step": 38170 }, { "epoch": 0.19066643361881694, "grad_norm": 0.0928342416882515, "learning_rate": 2.4340784500237803e-05, "loss": 9.7945, "step": 38180 }, { "epoch": 0.1907163724437564, "grad_norm": 0.0948493480682373, "learning_rate": 2.4339282585296253e-05, "loss": 9.7977, "step": 38190 }, { "epoch": 0.19076631126869584, "grad_norm": 0.09960921853780746, "learning_rate": 2.4337780670354703e-05, "loss": 9.8052, "step": 38200 }, { "epoch": 0.1908162500936353, "grad_norm": 0.09617515653371811, "learning_rate": 2.4336278755413153e-05, "loss": 9.7999, "step": 38210 }, { "epoch": 0.19086618891857474, "grad_norm": 0.09649699926376343, "learning_rate": 2.4334776840471604e-05, "loss": 9.8018, "step": 38220 }, { "epoch": 0.19091612774351419, "grad_norm": 0.09706174582242966, "learning_rate": 2.433327492553005e-05, "loss": 9.8035, "step": 38230 }, { "epoch": 0.19096606656845364, "grad_norm": 0.09482137858867645, "learning_rate": 2.43317730105885e-05, "loss": 9.8009, "step": 38240 }, { "epoch": 0.19101600539339308, "grad_norm": 0.0948929712176323, "learning_rate": 2.433027109564695e-05, "loss": 9.796, "step": 38250 }, { "epoch": 0.19106594421833253, "grad_norm": 0.09649039804935455, "learning_rate": 2.43287691807054e-05, "loss": 9.8034, "step": 38260 }, { "epoch": 0.19111588304327198, "grad_norm": 0.09573374688625336, "learning_rate": 2.432726726576385e-05, "loss": 9.7922, "step": 38270 }, { "epoch": 0.19116582186821143, "grad_norm": 0.10029622167348862, "learning_rate": 2.4325765350822298e-05, "loss": 9.7982, "step": 38280 }, { "epoch": 0.19121576069315088, "grad_norm": 0.09856070578098297, "learning_rate": 2.4324263435880748e-05, "loss": 9.8087, "step": 38290 }, { "epoch": 0.19126569951809033, "grad_norm": 0.08724341541528702, "learning_rate": 2.4322761520939198e-05, "loss": 9.7989, "step": 38300 }, { "epoch": 0.19131563834302978, "grad_norm": 0.09495957940816879, "learning_rate": 2.432125960599765e-05, "loss": 9.7892, "step": 38310 }, { "epoch": 0.19136557716796923, "grad_norm": 0.09710295498371124, "learning_rate": 2.43197576910561e-05, "loss": 9.8049, "step": 38320 }, { "epoch": 0.19141551599290868, "grad_norm": 0.0903317853808403, "learning_rate": 2.4318255776114545e-05, "loss": 9.8063, "step": 38330 }, { "epoch": 0.19146545481784813, "grad_norm": 0.09293761849403381, "learning_rate": 2.4316753861172996e-05, "loss": 9.7994, "step": 38340 }, { "epoch": 0.19151539364278758, "grad_norm": 0.09713525325059891, "learning_rate": 2.4315251946231446e-05, "loss": 9.7968, "step": 38350 }, { "epoch": 0.19156533246772703, "grad_norm": 0.09488287568092346, "learning_rate": 2.4313750031289896e-05, "loss": 9.8005, "step": 38360 }, { "epoch": 0.19161527129266648, "grad_norm": 0.0962325856089592, "learning_rate": 2.4312248116348346e-05, "loss": 9.7941, "step": 38370 }, { "epoch": 0.19166521011760593, "grad_norm": 0.09315495938062668, "learning_rate": 2.4310746201406793e-05, "loss": 9.7957, "step": 38380 }, { "epoch": 0.19171514894254538, "grad_norm": 0.09810058772563934, "learning_rate": 2.4309244286465243e-05, "loss": 9.8038, "step": 38390 }, { "epoch": 0.19176508776748483, "grad_norm": 0.09418797492980957, "learning_rate": 2.4307742371523693e-05, "loss": 9.7966, "step": 38400 }, { "epoch": 0.19181502659242428, "grad_norm": 0.0971032902598381, "learning_rate": 2.4306240456582143e-05, "loss": 9.7953, "step": 38410 }, { "epoch": 0.19186496541736373, "grad_norm": 0.09385687112808228, "learning_rate": 2.4304738541640594e-05, "loss": 9.7957, "step": 38420 }, { "epoch": 0.19191490424230317, "grad_norm": 0.09179292619228363, "learning_rate": 2.430323662669904e-05, "loss": 9.7907, "step": 38430 }, { "epoch": 0.19196484306724262, "grad_norm": 0.09452084451913834, "learning_rate": 2.430173471175749e-05, "loss": 9.8014, "step": 38440 }, { "epoch": 0.19201478189218207, "grad_norm": 0.09643220156431198, "learning_rate": 2.430023279681594e-05, "loss": 9.7915, "step": 38450 }, { "epoch": 0.19206472071712152, "grad_norm": 0.0984674021601677, "learning_rate": 2.429873088187439e-05, "loss": 9.7976, "step": 38460 }, { "epoch": 0.19211465954206097, "grad_norm": 0.09368205070495605, "learning_rate": 2.429722896693284e-05, "loss": 9.7938, "step": 38470 }, { "epoch": 0.19216459836700042, "grad_norm": 0.09682975709438324, "learning_rate": 2.429572705199129e-05, "loss": 9.7942, "step": 38480 }, { "epoch": 0.19221453719193987, "grad_norm": 0.09321840852499008, "learning_rate": 2.4294225137049738e-05, "loss": 9.7997, "step": 38490 }, { "epoch": 0.19226447601687932, "grad_norm": 0.09486759454011917, "learning_rate": 2.4292723222108188e-05, "loss": 9.7955, "step": 38500 }, { "epoch": 0.19231441484181877, "grad_norm": 0.09923727810382843, "learning_rate": 2.429122130716664e-05, "loss": 9.7936, "step": 38510 }, { "epoch": 0.19236435366675822, "grad_norm": 0.09178563952445984, "learning_rate": 2.428971939222509e-05, "loss": 9.7945, "step": 38520 }, { "epoch": 0.19241429249169767, "grad_norm": 0.09080483019351959, "learning_rate": 2.428821747728354e-05, "loss": 9.7983, "step": 38530 }, { "epoch": 0.19246423131663712, "grad_norm": 0.09831558167934418, "learning_rate": 2.4286715562341986e-05, "loss": 9.7918, "step": 38540 }, { "epoch": 0.19251417014157657, "grad_norm": 0.095986008644104, "learning_rate": 2.4285213647400436e-05, "loss": 9.792, "step": 38550 }, { "epoch": 0.19256410896651602, "grad_norm": 0.10043915361166, "learning_rate": 2.4283711732458886e-05, "loss": 9.7913, "step": 38560 }, { "epoch": 0.19261404779145547, "grad_norm": 0.09741128236055374, "learning_rate": 2.4282209817517336e-05, "loss": 9.7836, "step": 38570 }, { "epoch": 0.19266398661639492, "grad_norm": 0.09496723860502243, "learning_rate": 2.4280707902575786e-05, "loss": 9.7869, "step": 38580 }, { "epoch": 0.19271392544133437, "grad_norm": 0.09604073315858841, "learning_rate": 2.4279205987634233e-05, "loss": 9.7975, "step": 38590 }, { "epoch": 0.19276386426627382, "grad_norm": 0.09631392359733582, "learning_rate": 2.4277704072692683e-05, "loss": 9.7917, "step": 38600 }, { "epoch": 0.19281380309121326, "grad_norm": 0.09735754877328873, "learning_rate": 2.4276202157751133e-05, "loss": 9.7898, "step": 38610 }, { "epoch": 0.19286374191615271, "grad_norm": 0.09502192586660385, "learning_rate": 2.4274700242809584e-05, "loss": 9.7959, "step": 38620 }, { "epoch": 0.19291368074109216, "grad_norm": 0.09539380669593811, "learning_rate": 2.4273198327868034e-05, "loss": 9.7939, "step": 38630 }, { "epoch": 0.1929636195660316, "grad_norm": 0.09433183073997498, "learning_rate": 2.427169641292648e-05, "loss": 9.7888, "step": 38640 }, { "epoch": 0.19301355839097106, "grad_norm": 0.09847880899906158, "learning_rate": 2.427019449798493e-05, "loss": 9.7895, "step": 38650 }, { "epoch": 0.1930634972159105, "grad_norm": 0.09963309019804001, "learning_rate": 2.426869258304338e-05, "loss": 9.7924, "step": 38660 }, { "epoch": 0.19311343604084996, "grad_norm": 0.09441536664962769, "learning_rate": 2.426719066810183e-05, "loss": 9.7913, "step": 38670 }, { "epoch": 0.1931633748657894, "grad_norm": 0.09207624942064285, "learning_rate": 2.426568875316028e-05, "loss": 9.7925, "step": 38680 }, { "epoch": 0.19321331369072886, "grad_norm": 0.09765110164880753, "learning_rate": 2.4264186838218728e-05, "loss": 9.7963, "step": 38690 }, { "epoch": 0.1932632525156683, "grad_norm": 0.09711901098489761, "learning_rate": 2.4262684923277178e-05, "loss": 9.788, "step": 38700 }, { "epoch": 0.19331319134060776, "grad_norm": 0.09210279583930969, "learning_rate": 2.426118300833563e-05, "loss": 9.7852, "step": 38710 }, { "epoch": 0.1933631301655472, "grad_norm": 0.09786038845777512, "learning_rate": 2.425968109339408e-05, "loss": 9.7915, "step": 38720 }, { "epoch": 0.19341306899048666, "grad_norm": 0.09457087516784668, "learning_rate": 2.425817917845253e-05, "loss": 9.7967, "step": 38730 }, { "epoch": 0.1934630078154261, "grad_norm": 0.09489721804857254, "learning_rate": 2.4256677263510976e-05, "loss": 9.7965, "step": 38740 }, { "epoch": 0.19351294664036556, "grad_norm": 0.09461694210767746, "learning_rate": 2.4255175348569426e-05, "loss": 9.7884, "step": 38750 }, { "epoch": 0.193562885465305, "grad_norm": 0.10121167451143265, "learning_rate": 2.4253673433627876e-05, "loss": 9.7838, "step": 38760 }, { "epoch": 0.19361282429024446, "grad_norm": 0.09524286538362503, "learning_rate": 2.4252171518686326e-05, "loss": 9.7871, "step": 38770 }, { "epoch": 0.1936627631151839, "grad_norm": 0.09742001444101334, "learning_rate": 2.4250669603744776e-05, "loss": 9.7947, "step": 38780 }, { "epoch": 0.19371270194012336, "grad_norm": 0.09000395238399506, "learning_rate": 2.4249167688803223e-05, "loss": 9.7865, "step": 38790 }, { "epoch": 0.1937626407650628, "grad_norm": 0.09661939740180969, "learning_rate": 2.4247665773861677e-05, "loss": 9.7887, "step": 38800 }, { "epoch": 0.19381257959000225, "grad_norm": 0.09397201985120773, "learning_rate": 2.4246163858920123e-05, "loss": 9.7851, "step": 38810 }, { "epoch": 0.1938625184149417, "grad_norm": 0.0975542664527893, "learning_rate": 2.4244661943978574e-05, "loss": 9.786, "step": 38820 }, { "epoch": 0.19391245723988115, "grad_norm": 0.0956290140748024, "learning_rate": 2.4243160029037024e-05, "loss": 9.783, "step": 38830 }, { "epoch": 0.1939623960648206, "grad_norm": 0.09244171530008316, "learning_rate": 2.424165811409547e-05, "loss": 9.7812, "step": 38840 }, { "epoch": 0.19401233488976005, "grad_norm": 0.10087534040212631, "learning_rate": 2.4240156199153924e-05, "loss": 9.7849, "step": 38850 }, { "epoch": 0.1940622737146995, "grad_norm": 0.09302230924367905, "learning_rate": 2.423865428421237e-05, "loss": 9.7873, "step": 38860 }, { "epoch": 0.19411221253963895, "grad_norm": 0.09214049577713013, "learning_rate": 2.423715236927082e-05, "loss": 9.7873, "step": 38870 }, { "epoch": 0.1941621513645784, "grad_norm": 0.09628362953662872, "learning_rate": 2.423565045432927e-05, "loss": 9.7885, "step": 38880 }, { "epoch": 0.19421209018951785, "grad_norm": 0.09119916707277298, "learning_rate": 2.4234148539387718e-05, "loss": 9.7847, "step": 38890 }, { "epoch": 0.1942620290144573, "grad_norm": 0.09237845242023468, "learning_rate": 2.423264662444617e-05, "loss": 9.7874, "step": 38900 }, { "epoch": 0.19431196783939675, "grad_norm": 0.08875925093889236, "learning_rate": 2.423114470950462e-05, "loss": 9.7925, "step": 38910 }, { "epoch": 0.1943619066643362, "grad_norm": 0.0932464450597763, "learning_rate": 2.422964279456307e-05, "loss": 9.7909, "step": 38920 }, { "epoch": 0.19441184548927565, "grad_norm": 0.09021969884634018, "learning_rate": 2.422814087962152e-05, "loss": 9.7863, "step": 38930 }, { "epoch": 0.1944617843142151, "grad_norm": 0.09389606863260269, "learning_rate": 2.4226638964679966e-05, "loss": 9.7808, "step": 38940 }, { "epoch": 0.19451172313915455, "grad_norm": 0.09377970546483994, "learning_rate": 2.422513704973842e-05, "loss": 9.7729, "step": 38950 }, { "epoch": 0.194561661964094, "grad_norm": 0.09550289064645767, "learning_rate": 2.4223635134796866e-05, "loss": 9.7801, "step": 38960 }, { "epoch": 0.19461160078903345, "grad_norm": 0.0985620841383934, "learning_rate": 2.4222133219855316e-05, "loss": 9.7896, "step": 38970 }, { "epoch": 0.1946615396139729, "grad_norm": 0.09124714881181717, "learning_rate": 2.4220631304913766e-05, "loss": 9.7848, "step": 38980 }, { "epoch": 0.19471147843891234, "grad_norm": 0.09538350999355316, "learning_rate": 2.4219129389972213e-05, "loss": 9.7863, "step": 38990 }, { "epoch": 0.1947614172638518, "grad_norm": 0.09843659400939941, "learning_rate": 2.4217627475030667e-05, "loss": 9.7772, "step": 39000 }, { "epoch": 0.19481135608879124, "grad_norm": 0.09667788445949554, "learning_rate": 2.4216125560089113e-05, "loss": 9.7902, "step": 39010 }, { "epoch": 0.1948612949137307, "grad_norm": 0.09687555581331253, "learning_rate": 2.4214623645147564e-05, "loss": 9.7835, "step": 39020 }, { "epoch": 0.19491123373867014, "grad_norm": 0.09759131073951721, "learning_rate": 2.4213121730206014e-05, "loss": 9.7849, "step": 39030 }, { "epoch": 0.1949611725636096, "grad_norm": 0.09392527490854263, "learning_rate": 2.421161981526446e-05, "loss": 9.784, "step": 39040 }, { "epoch": 0.19501111138854904, "grad_norm": 0.09155687689781189, "learning_rate": 2.4210117900322914e-05, "loss": 9.7849, "step": 39050 }, { "epoch": 0.1950610502134885, "grad_norm": 0.09576915949583054, "learning_rate": 2.420861598538136e-05, "loss": 9.7815, "step": 39060 }, { "epoch": 0.1951109890384279, "grad_norm": 0.09315671026706696, "learning_rate": 2.420711407043981e-05, "loss": 9.7802, "step": 39070 }, { "epoch": 0.19516092786336736, "grad_norm": 0.09243351221084595, "learning_rate": 2.420561215549826e-05, "loss": 9.7802, "step": 39080 }, { "epoch": 0.1952108666883068, "grad_norm": 0.09323740005493164, "learning_rate": 2.4204110240556708e-05, "loss": 9.7835, "step": 39090 }, { "epoch": 0.19526080551324626, "grad_norm": 0.09786425530910492, "learning_rate": 2.420260832561516e-05, "loss": 9.78, "step": 39100 }, { "epoch": 0.1953107443381857, "grad_norm": 0.09116844832897186, "learning_rate": 2.420110641067361e-05, "loss": 9.7841, "step": 39110 }, { "epoch": 0.19536068316312516, "grad_norm": 0.09829219430685043, "learning_rate": 2.4199604495732062e-05, "loss": 9.7839, "step": 39120 }, { "epoch": 0.1954106219880646, "grad_norm": 0.0964910238981247, "learning_rate": 2.419810258079051e-05, "loss": 9.7754, "step": 39130 }, { "epoch": 0.19546056081300406, "grad_norm": 0.09510182589292526, "learning_rate": 2.4196600665848956e-05, "loss": 9.779, "step": 39140 }, { "epoch": 0.1955104996379435, "grad_norm": 0.09461788833141327, "learning_rate": 2.419509875090741e-05, "loss": 9.7828, "step": 39150 }, { "epoch": 0.19556043846288296, "grad_norm": 0.09442439675331116, "learning_rate": 2.4193596835965856e-05, "loss": 45.9869, "step": 39160 }, { "epoch": 0.1956103772878224, "grad_norm": 0.09443948417901993, "learning_rate": 2.419209492102431e-05, "loss": 9.7777, "step": 39170 }, { "epoch": 0.19566031611276186, "grad_norm": 0.08871215581893921, "learning_rate": 2.4190593006082756e-05, "loss": 9.7808, "step": 39180 }, { "epoch": 0.1957102549377013, "grad_norm": 0.09522920846939087, "learning_rate": 2.4189091091141203e-05, "loss": 9.7887, "step": 39190 }, { "epoch": 0.19576019376264076, "grad_norm": 0.0978919193148613, "learning_rate": 2.4187589176199657e-05, "loss": 9.7724, "step": 39200 }, { "epoch": 0.1958101325875802, "grad_norm": 0.09242391586303711, "learning_rate": 2.4186087261258103e-05, "loss": 9.7893, "step": 39210 }, { "epoch": 0.19586007141251965, "grad_norm": 0.09181750565767288, "learning_rate": 2.4184585346316557e-05, "loss": 9.7748, "step": 39220 }, { "epoch": 0.1959100102374591, "grad_norm": 0.0938679501414299, "learning_rate": 2.4183083431375004e-05, "loss": 9.7745, "step": 39230 }, { "epoch": 0.19595994906239855, "grad_norm": 0.09096869826316833, "learning_rate": 2.418158151643345e-05, "loss": 9.7836, "step": 39240 }, { "epoch": 0.196009887887338, "grad_norm": 0.09524725377559662, "learning_rate": 2.4180079601491904e-05, "loss": 9.787, "step": 39250 }, { "epoch": 0.19605982671227745, "grad_norm": 0.09219633787870407, "learning_rate": 2.417857768655035e-05, "loss": 9.7777, "step": 39260 }, { "epoch": 0.1961097655372169, "grad_norm": 0.09788725525140762, "learning_rate": 2.4177075771608805e-05, "loss": 9.7787, "step": 39270 }, { "epoch": 0.19615970436215635, "grad_norm": 0.09663385152816772, "learning_rate": 2.417557385666725e-05, "loss": 9.7779, "step": 39280 }, { "epoch": 0.1962096431870958, "grad_norm": 0.10418345034122467, "learning_rate": 2.4174071941725698e-05, "loss": 9.7713, "step": 39290 }, { "epoch": 0.19625958201203525, "grad_norm": 0.09116250276565552, "learning_rate": 2.417257002678415e-05, "loss": 9.7772, "step": 39300 }, { "epoch": 0.1963095208369747, "grad_norm": 0.09256359934806824, "learning_rate": 2.41710681118426e-05, "loss": 9.7782, "step": 39310 }, { "epoch": 0.19635945966191415, "grad_norm": 0.10003110766410828, "learning_rate": 2.4169566196901052e-05, "loss": 9.7717, "step": 39320 }, { "epoch": 0.1964093984868536, "grad_norm": 0.09774117916822433, "learning_rate": 2.41680642819595e-05, "loss": 9.777, "step": 39330 }, { "epoch": 0.19645933731179305, "grad_norm": 0.09338162839412689, "learning_rate": 2.4166562367017946e-05, "loss": 9.7811, "step": 39340 }, { "epoch": 0.1965092761367325, "grad_norm": 0.09204205125570297, "learning_rate": 2.41650604520764e-05, "loss": 9.7827, "step": 39350 }, { "epoch": 0.19655921496167195, "grad_norm": 0.0937473401427269, "learning_rate": 2.4163558537134846e-05, "loss": 9.7759, "step": 39360 }, { "epoch": 0.1966091537866114, "grad_norm": 0.09584249556064606, "learning_rate": 2.41620566221933e-05, "loss": 9.7817, "step": 39370 }, { "epoch": 0.19665909261155085, "grad_norm": 0.09530872106552124, "learning_rate": 2.4160554707251746e-05, "loss": 9.7736, "step": 39380 }, { "epoch": 0.1967090314364903, "grad_norm": 0.09426391869783401, "learning_rate": 2.4159052792310193e-05, "loss": 9.7796, "step": 39390 }, { "epoch": 0.19675897026142974, "grad_norm": 0.09266220033168793, "learning_rate": 2.4157550877368647e-05, "loss": 9.7739, "step": 39400 }, { "epoch": 0.1968089090863692, "grad_norm": 0.0935848280787468, "learning_rate": 2.4156048962427093e-05, "loss": 9.7755, "step": 39410 }, { "epoch": 0.19685884791130864, "grad_norm": 0.0957341194152832, "learning_rate": 2.4154547047485547e-05, "loss": 9.7699, "step": 39420 }, { "epoch": 0.1969087867362481, "grad_norm": 0.09587478637695312, "learning_rate": 2.4153045132543994e-05, "loss": 9.7725, "step": 39430 }, { "epoch": 0.19695872556118754, "grad_norm": 0.09046075493097305, "learning_rate": 2.4151543217602444e-05, "loss": 9.776, "step": 39440 }, { "epoch": 0.197008664386127, "grad_norm": 0.09086257219314575, "learning_rate": 2.4150041302660894e-05, "loss": 9.7735, "step": 39450 }, { "epoch": 0.19705860321106644, "grad_norm": 0.10431148111820221, "learning_rate": 2.414853938771934e-05, "loss": 9.773, "step": 39460 }, { "epoch": 0.1971085420360059, "grad_norm": 0.09601059556007385, "learning_rate": 2.4147037472777795e-05, "loss": 9.779, "step": 39470 }, { "epoch": 0.19715848086094534, "grad_norm": 0.09748616814613342, "learning_rate": 2.414553555783624e-05, "loss": 9.7766, "step": 39480 }, { "epoch": 0.1972084196858848, "grad_norm": 0.10166972130537033, "learning_rate": 2.414403364289469e-05, "loss": 9.7738, "step": 39490 }, { "epoch": 0.19725835851082424, "grad_norm": 0.09647940844297409, "learning_rate": 2.4142531727953142e-05, "loss": 9.7646, "step": 39500 }, { "epoch": 0.1973082973357637, "grad_norm": 0.09661668539047241, "learning_rate": 2.414102981301159e-05, "loss": 9.7793, "step": 39510 }, { "epoch": 0.19735823616070314, "grad_norm": 0.09515753388404846, "learning_rate": 2.4139527898070042e-05, "loss": 9.7797, "step": 39520 }, { "epoch": 0.1974081749856426, "grad_norm": 0.0913529172539711, "learning_rate": 2.413802598312849e-05, "loss": 9.7787, "step": 39530 }, { "epoch": 0.19745811381058204, "grad_norm": 0.09632540494203568, "learning_rate": 2.413652406818694e-05, "loss": 9.7703, "step": 39540 }, { "epoch": 0.19750805263552149, "grad_norm": 0.09521202743053436, "learning_rate": 2.413502215324539e-05, "loss": 9.7713, "step": 39550 }, { "epoch": 0.19755799146046094, "grad_norm": 0.09425238519906998, "learning_rate": 2.4133520238303836e-05, "loss": 9.7705, "step": 39560 }, { "epoch": 0.19760793028540038, "grad_norm": 0.09330090880393982, "learning_rate": 2.413201832336229e-05, "loss": 9.7747, "step": 39570 }, { "epoch": 0.19765786911033983, "grad_norm": 0.09047570824623108, "learning_rate": 2.4130516408420736e-05, "loss": 9.7726, "step": 39580 }, { "epoch": 0.19770780793527928, "grad_norm": 0.09425200521945953, "learning_rate": 2.4129014493479187e-05, "loss": 9.7723, "step": 39590 }, { "epoch": 0.19775774676021873, "grad_norm": 0.09698259085416794, "learning_rate": 2.4127512578537637e-05, "loss": 9.7848, "step": 39600 }, { "epoch": 0.19780768558515818, "grad_norm": 0.09237400442361832, "learning_rate": 2.4126010663596083e-05, "loss": 9.774, "step": 39610 }, { "epoch": 0.19785762441009763, "grad_norm": 0.09415727108716965, "learning_rate": 2.4124508748654537e-05, "loss": 9.7657, "step": 39620 }, { "epoch": 0.19790756323503708, "grad_norm": 0.09116394817829132, "learning_rate": 2.4123006833712984e-05, "loss": 9.7767, "step": 39630 }, { "epoch": 0.19795750205997653, "grad_norm": 0.09226060658693314, "learning_rate": 2.4121504918771434e-05, "loss": 9.7691, "step": 39640 }, { "epoch": 0.19800744088491598, "grad_norm": 0.09217618405818939, "learning_rate": 2.4120003003829884e-05, "loss": 9.7718, "step": 39650 }, { "epoch": 0.19805737970985543, "grad_norm": 0.09849265962839127, "learning_rate": 2.411850108888833e-05, "loss": 9.7769, "step": 39660 }, { "epoch": 0.19810731853479488, "grad_norm": 0.09423698484897614, "learning_rate": 2.4116999173946785e-05, "loss": 9.7673, "step": 39670 }, { "epoch": 0.19815725735973433, "grad_norm": 0.09352479875087738, "learning_rate": 2.411549725900523e-05, "loss": 9.764, "step": 39680 }, { "epoch": 0.19820719618467378, "grad_norm": 0.09337835013866425, "learning_rate": 2.411399534406368e-05, "loss": 9.7659, "step": 39690 }, { "epoch": 0.19825713500961323, "grad_norm": 0.08959398418664932, "learning_rate": 2.4112493429122132e-05, "loss": 9.7724, "step": 39700 }, { "epoch": 0.19830707383455268, "grad_norm": 0.09081783145666122, "learning_rate": 2.411099151418058e-05, "loss": 9.7665, "step": 39710 }, { "epoch": 0.19835701265949213, "grad_norm": 0.09610147774219513, "learning_rate": 2.4109489599239032e-05, "loss": 9.759, "step": 39720 }, { "epoch": 0.19840695148443158, "grad_norm": 0.09528447687625885, "learning_rate": 2.410798768429748e-05, "loss": 9.7684, "step": 39730 }, { "epoch": 0.19845689030937103, "grad_norm": 0.0971222072839737, "learning_rate": 2.410648576935593e-05, "loss": 9.7688, "step": 39740 }, { "epoch": 0.19850682913431048, "grad_norm": 0.09684096276760101, "learning_rate": 2.410498385441438e-05, "loss": 9.7715, "step": 39750 }, { "epoch": 0.19855676795924992, "grad_norm": 0.0940563753247261, "learning_rate": 2.410348193947283e-05, "loss": 9.7577, "step": 39760 }, { "epoch": 0.19860670678418937, "grad_norm": 0.09706791490316391, "learning_rate": 2.410198002453128e-05, "loss": 9.7708, "step": 39770 }, { "epoch": 0.19865664560912882, "grad_norm": 0.10084111243486404, "learning_rate": 2.4100478109589726e-05, "loss": 9.7658, "step": 39780 }, { "epoch": 0.19870658443406827, "grad_norm": 0.09000294655561447, "learning_rate": 2.4098976194648177e-05, "loss": 9.7698, "step": 39790 }, { "epoch": 0.19875652325900772, "grad_norm": 0.09340784698724747, "learning_rate": 2.4097474279706627e-05, "loss": 9.7694, "step": 39800 }, { "epoch": 0.19880646208394717, "grad_norm": 0.09671244770288467, "learning_rate": 2.4095972364765077e-05, "loss": 9.7608, "step": 39810 }, { "epoch": 0.19885640090888662, "grad_norm": 0.09407161921262741, "learning_rate": 2.4094470449823527e-05, "loss": 9.7632, "step": 39820 }, { "epoch": 0.19890633973382607, "grad_norm": 0.09390780329704285, "learning_rate": 2.4092968534881974e-05, "loss": 9.7639, "step": 39830 }, { "epoch": 0.19895627855876552, "grad_norm": 0.10034262388944626, "learning_rate": 2.4091466619940424e-05, "loss": 9.7688, "step": 39840 }, { "epoch": 0.19900621738370497, "grad_norm": 0.09268135577440262, "learning_rate": 2.4089964704998874e-05, "loss": 9.7654, "step": 39850 }, { "epoch": 0.19905615620864442, "grad_norm": 0.09061171859502792, "learning_rate": 2.4088462790057324e-05, "loss": 9.7667, "step": 39860 }, { "epoch": 0.19910609503358387, "grad_norm": 0.09159281104803085, "learning_rate": 2.4086960875115775e-05, "loss": 9.7727, "step": 39870 }, { "epoch": 0.19915603385852332, "grad_norm": 0.09168130904436111, "learning_rate": 2.408545896017422e-05, "loss": 9.7752, "step": 39880 }, { "epoch": 0.19920597268346277, "grad_norm": 0.09341683238744736, "learning_rate": 2.408395704523267e-05, "loss": 9.7705, "step": 39890 }, { "epoch": 0.19925591150840222, "grad_norm": 0.10817967355251312, "learning_rate": 2.4082455130291122e-05, "loss": 9.7616, "step": 39900 }, { "epoch": 0.19930585033334167, "grad_norm": 0.09794482588768005, "learning_rate": 2.4080953215349572e-05, "loss": 9.7589, "step": 39910 }, { "epoch": 0.19935578915828112, "grad_norm": 0.09640230238437653, "learning_rate": 2.4079451300408022e-05, "loss": 9.7614, "step": 39920 }, { "epoch": 0.19940572798322057, "grad_norm": 0.09363255649805069, "learning_rate": 2.407794938546647e-05, "loss": 9.7684, "step": 39930 }, { "epoch": 0.19945566680816001, "grad_norm": 0.09409525245428085, "learning_rate": 2.407644747052492e-05, "loss": 9.7558, "step": 39940 }, { "epoch": 0.19950560563309946, "grad_norm": 0.0943169966340065, "learning_rate": 2.407494555558337e-05, "loss": 9.7663, "step": 39950 }, { "epoch": 0.1995555444580389, "grad_norm": 0.09789732098579407, "learning_rate": 2.407344364064182e-05, "loss": 9.7623, "step": 39960 }, { "epoch": 0.19960548328297836, "grad_norm": 0.09631295502185822, "learning_rate": 2.407194172570027e-05, "loss": 9.7692, "step": 39970 }, { "epoch": 0.1996554221079178, "grad_norm": 0.09948205947875977, "learning_rate": 2.4070439810758716e-05, "loss": 9.7623, "step": 39980 }, { "epoch": 0.19970536093285726, "grad_norm": 0.0964735895395279, "learning_rate": 2.4068937895817167e-05, "loss": 9.7621, "step": 39990 }, { "epoch": 0.1997552997577967, "grad_norm": 0.09828507900238037, "learning_rate": 2.4067435980875617e-05, "loss": 9.7603, "step": 40000 }, { "epoch": 0.19980523858273616, "grad_norm": 0.09244830161333084, "learning_rate": 2.4065934065934067e-05, "loss": 9.7644, "step": 40010 }, { "epoch": 0.1998551774076756, "grad_norm": 0.09815704822540283, "learning_rate": 2.4064432150992517e-05, "loss": 9.7665, "step": 40020 }, { "epoch": 0.19990511623261506, "grad_norm": 0.0952480360865593, "learning_rate": 2.4062930236050964e-05, "loss": 9.7668, "step": 40030 }, { "epoch": 0.1999550550575545, "grad_norm": 0.09597064554691315, "learning_rate": 2.4061428321109414e-05, "loss": 9.7613, "step": 40040 }, { "epoch": 0.20000499388249393, "grad_norm": 0.09825017303228378, "learning_rate": 2.4059926406167864e-05, "loss": 9.76, "step": 40050 }, { "epoch": 0.20005493270743338, "grad_norm": 0.09238851070404053, "learning_rate": 2.4058424491226314e-05, "loss": 9.765, "step": 40060 }, { "epoch": 0.20010487153237283, "grad_norm": 0.09596622735261917, "learning_rate": 2.4056922576284765e-05, "loss": 9.7618, "step": 40070 }, { "epoch": 0.20015481035731228, "grad_norm": 0.09516778588294983, "learning_rate": 2.4055420661343215e-05, "loss": 9.766, "step": 40080 }, { "epoch": 0.20020474918225173, "grad_norm": 0.10546738654375076, "learning_rate": 2.405391874640166e-05, "loss": 9.7556, "step": 40090 }, { "epoch": 0.20025468800719118, "grad_norm": 0.09358225017786026, "learning_rate": 2.4052416831460112e-05, "loss": 9.759, "step": 40100 }, { "epoch": 0.20030462683213063, "grad_norm": 0.08909553289413452, "learning_rate": 2.4050914916518562e-05, "loss": 9.7602, "step": 40110 }, { "epoch": 0.20035456565707008, "grad_norm": 0.1016073226928711, "learning_rate": 2.4049413001577012e-05, "loss": 9.7603, "step": 40120 }, { "epoch": 0.20040450448200953, "grad_norm": 0.09752840548753738, "learning_rate": 2.4047911086635462e-05, "loss": 9.7584, "step": 40130 }, { "epoch": 0.20045444330694898, "grad_norm": 0.09266415238380432, "learning_rate": 2.404640917169391e-05, "loss": 9.7625, "step": 40140 }, { "epoch": 0.20050438213188843, "grad_norm": 0.09927397966384888, "learning_rate": 2.404490725675236e-05, "loss": 9.7559, "step": 40150 }, { "epoch": 0.20055432095682788, "grad_norm": 0.09626518934965134, "learning_rate": 2.404340534181081e-05, "loss": 9.7664, "step": 40160 }, { "epoch": 0.20060425978176732, "grad_norm": 0.09638325124979019, "learning_rate": 2.404190342686926e-05, "loss": 9.7583, "step": 40170 }, { "epoch": 0.20065419860670677, "grad_norm": 0.09331472218036652, "learning_rate": 2.404040151192771e-05, "loss": 9.752, "step": 40180 }, { "epoch": 0.20070413743164622, "grad_norm": 0.09364567697048187, "learning_rate": 2.4038899596986157e-05, "loss": 9.7648, "step": 40190 }, { "epoch": 0.20075407625658567, "grad_norm": 0.09309536218643188, "learning_rate": 2.4037397682044607e-05, "loss": 9.7596, "step": 40200 }, { "epoch": 0.20080401508152512, "grad_norm": 0.091143399477005, "learning_rate": 2.4035895767103057e-05, "loss": 9.7572, "step": 40210 }, { "epoch": 0.20085395390646457, "grad_norm": 0.09167341887950897, "learning_rate": 2.4034393852161507e-05, "loss": 9.7644, "step": 40220 }, { "epoch": 0.20090389273140402, "grad_norm": 0.09573984146118164, "learning_rate": 2.4032891937219957e-05, "loss": 9.7607, "step": 40230 }, { "epoch": 0.20095383155634347, "grad_norm": 0.0927746593952179, "learning_rate": 2.4031390022278404e-05, "loss": 9.7661, "step": 40240 }, { "epoch": 0.20100377038128292, "grad_norm": 0.09519543498754501, "learning_rate": 2.4029888107336854e-05, "loss": 9.758, "step": 40250 }, { "epoch": 0.20105370920622237, "grad_norm": 0.09138039499521255, "learning_rate": 2.4028386192395304e-05, "loss": 9.7658, "step": 40260 }, { "epoch": 0.20110364803116182, "grad_norm": 0.0913807824254036, "learning_rate": 2.4026884277453755e-05, "loss": 9.7553, "step": 40270 }, { "epoch": 0.20115358685610127, "grad_norm": 0.09113917499780655, "learning_rate": 2.4025382362512205e-05, "loss": 9.7586, "step": 40280 }, { "epoch": 0.20120352568104072, "grad_norm": 0.09379607439041138, "learning_rate": 2.402388044757065e-05, "loss": 9.7621, "step": 40290 }, { "epoch": 0.20125346450598017, "grad_norm": 0.09333807975053787, "learning_rate": 2.4022378532629102e-05, "loss": 9.7623, "step": 40300 }, { "epoch": 0.20130340333091962, "grad_norm": 0.09286303073167801, "learning_rate": 2.4020876617687552e-05, "loss": 9.7592, "step": 40310 }, { "epoch": 0.20135334215585907, "grad_norm": 0.09583623707294464, "learning_rate": 2.4019374702746002e-05, "loss": 9.7556, "step": 40320 }, { "epoch": 0.20140328098079852, "grad_norm": 0.09831235557794571, "learning_rate": 2.4017872787804452e-05, "loss": 9.7499, "step": 40330 }, { "epoch": 0.20145321980573797, "grad_norm": 0.09651533514261246, "learning_rate": 2.40163708728629e-05, "loss": 9.7571, "step": 40340 }, { "epoch": 0.20150315863067741, "grad_norm": 0.0948728695511818, "learning_rate": 2.401486895792135e-05, "loss": 9.7519, "step": 40350 }, { "epoch": 0.20155309745561686, "grad_norm": 0.09901313483715057, "learning_rate": 2.40133670429798e-05, "loss": 9.7589, "step": 40360 }, { "epoch": 0.2016030362805563, "grad_norm": 0.09512390196323395, "learning_rate": 2.401186512803825e-05, "loss": 9.7564, "step": 40370 }, { "epoch": 0.20165297510549576, "grad_norm": 0.08876697719097137, "learning_rate": 2.40103632130967e-05, "loss": 9.7596, "step": 40380 }, { "epoch": 0.2017029139304352, "grad_norm": 0.09231546521186829, "learning_rate": 2.4008861298155147e-05, "loss": 9.7513, "step": 40390 }, { "epoch": 0.20175285275537466, "grad_norm": 0.08933798968791962, "learning_rate": 2.40073593832136e-05, "loss": 9.7478, "step": 40400 }, { "epoch": 0.2018027915803141, "grad_norm": 0.09653082489967346, "learning_rate": 2.4005857468272047e-05, "loss": 9.751, "step": 40410 }, { "epoch": 0.20185273040525356, "grad_norm": 0.09329882264137268, "learning_rate": 2.4004355553330497e-05, "loss": 9.7608, "step": 40420 }, { "epoch": 0.201902669230193, "grad_norm": 0.09065824002027512, "learning_rate": 2.4002853638388947e-05, "loss": 9.7584, "step": 40430 }, { "epoch": 0.20195260805513246, "grad_norm": 0.09478091448545456, "learning_rate": 2.4001351723447394e-05, "loss": 9.7633, "step": 40440 }, { "epoch": 0.2020025468800719, "grad_norm": 0.09942101687192917, "learning_rate": 2.3999849808505848e-05, "loss": 9.7596, "step": 40450 }, { "epoch": 0.20205248570501136, "grad_norm": 0.0919698178768158, "learning_rate": 2.3998347893564294e-05, "loss": 9.7657, "step": 40460 }, { "epoch": 0.2021024245299508, "grad_norm": 0.08957584947347641, "learning_rate": 2.3996845978622745e-05, "loss": 9.7507, "step": 40470 }, { "epoch": 0.20215236335489026, "grad_norm": 0.09440477192401886, "learning_rate": 2.3995344063681195e-05, "loss": 9.7558, "step": 40480 }, { "epoch": 0.2022023021798297, "grad_norm": 0.0963861346244812, "learning_rate": 2.399384214873964e-05, "loss": 9.7496, "step": 40490 }, { "epoch": 0.20225224100476916, "grad_norm": 0.09470155835151672, "learning_rate": 2.3992340233798095e-05, "loss": 9.7483, "step": 40500 }, { "epoch": 0.2023021798297086, "grad_norm": 0.09395986050367355, "learning_rate": 2.3990838318856542e-05, "loss": 9.7426, "step": 40510 }, { "epoch": 0.20235211865464806, "grad_norm": 0.09907002747058868, "learning_rate": 2.3989336403914992e-05, "loss": 9.7482, "step": 40520 }, { "epoch": 0.2024020574795875, "grad_norm": 0.09406791627407074, "learning_rate": 2.3987834488973442e-05, "loss": 9.7551, "step": 40530 }, { "epoch": 0.20245199630452695, "grad_norm": 0.09641828387975693, "learning_rate": 2.398633257403189e-05, "loss": 9.756, "step": 40540 }, { "epoch": 0.2025019351294664, "grad_norm": 0.08803495764732361, "learning_rate": 2.3984830659090343e-05, "loss": 9.7517, "step": 40550 }, { "epoch": 0.20255187395440585, "grad_norm": 0.089680016040802, "learning_rate": 2.398332874414879e-05, "loss": 9.7548, "step": 40560 }, { "epoch": 0.2026018127793453, "grad_norm": 0.09483745694160461, "learning_rate": 2.398182682920724e-05, "loss": 9.7505, "step": 40570 }, { "epoch": 0.20265175160428475, "grad_norm": 0.09230205416679382, "learning_rate": 2.398032491426569e-05, "loss": 9.7526, "step": 40580 }, { "epoch": 0.2027016904292242, "grad_norm": 0.0983130931854248, "learning_rate": 2.3978822999324137e-05, "loss": 9.7463, "step": 40590 }, { "epoch": 0.20275162925416365, "grad_norm": 0.0941585898399353, "learning_rate": 2.397732108438259e-05, "loss": 9.7575, "step": 40600 }, { "epoch": 0.2028015680791031, "grad_norm": 0.09447518736124039, "learning_rate": 2.3975819169441037e-05, "loss": 9.7447, "step": 40610 }, { "epoch": 0.20285150690404255, "grad_norm": 0.09139948338270187, "learning_rate": 2.3974317254499487e-05, "loss": 9.7499, "step": 40620 }, { "epoch": 0.202901445728982, "grad_norm": 0.09922108799219131, "learning_rate": 2.3972815339557937e-05, "loss": 9.7463, "step": 40630 }, { "epoch": 0.20295138455392145, "grad_norm": 0.09205441176891327, "learning_rate": 2.3971313424616384e-05, "loss": 9.7516, "step": 40640 }, { "epoch": 0.2030013233788609, "grad_norm": 0.097032330930233, "learning_rate": 2.3969811509674838e-05, "loss": 9.7507, "step": 40650 }, { "epoch": 0.20305126220380035, "grad_norm": 0.09273160994052887, "learning_rate": 2.3968309594733284e-05, "loss": 9.7485, "step": 40660 }, { "epoch": 0.2031012010287398, "grad_norm": 0.09222143143415451, "learning_rate": 2.3966807679791735e-05, "loss": 9.7494, "step": 40670 }, { "epoch": 0.20315113985367925, "grad_norm": 0.10249687731266022, "learning_rate": 2.3965305764850185e-05, "loss": 9.749, "step": 40680 }, { "epoch": 0.2032010786786187, "grad_norm": 0.09170138835906982, "learning_rate": 2.396380384990863e-05, "loss": 9.7538, "step": 40690 }, { "epoch": 0.20325101750355815, "grad_norm": 0.09489130973815918, "learning_rate": 2.3962301934967085e-05, "loss": 9.7539, "step": 40700 }, { "epoch": 0.2033009563284976, "grad_norm": 0.09625539183616638, "learning_rate": 2.3960800020025532e-05, "loss": 9.7509, "step": 40710 }, { "epoch": 0.20335089515343704, "grad_norm": 0.09620354324579239, "learning_rate": 2.3959298105083986e-05, "loss": 9.7504, "step": 40720 }, { "epoch": 0.2034008339783765, "grad_norm": 0.10117631405591965, "learning_rate": 2.3957796190142432e-05, "loss": 9.7386, "step": 40730 }, { "epoch": 0.20345077280331594, "grad_norm": 0.09158354997634888, "learning_rate": 2.395629427520088e-05, "loss": 9.7471, "step": 40740 }, { "epoch": 0.2035007116282554, "grad_norm": 0.09191151708364487, "learning_rate": 2.3954792360259333e-05, "loss": 9.7529, "step": 40750 }, { "epoch": 0.20355065045319484, "grad_norm": 0.09399069845676422, "learning_rate": 2.395329044531778e-05, "loss": 9.745, "step": 40760 }, { "epoch": 0.2036005892781343, "grad_norm": 0.09574571996927261, "learning_rate": 2.3951788530376233e-05, "loss": 9.7506, "step": 40770 }, { "epoch": 0.20365052810307374, "grad_norm": 0.09355718642473221, "learning_rate": 2.395028661543468e-05, "loss": 9.7426, "step": 40780 }, { "epoch": 0.2037004669280132, "grad_norm": 0.09288044273853302, "learning_rate": 2.3948784700493127e-05, "loss": 9.752, "step": 40790 }, { "epoch": 0.20375040575295264, "grad_norm": 0.09448196738958359, "learning_rate": 2.394728278555158e-05, "loss": 9.7449, "step": 40800 }, { "epoch": 0.2038003445778921, "grad_norm": 0.09567010402679443, "learning_rate": 2.3945780870610027e-05, "loss": 9.7417, "step": 40810 }, { "epoch": 0.20385028340283154, "grad_norm": 0.08981973677873611, "learning_rate": 2.394427895566848e-05, "loss": 9.7473, "step": 40820 }, { "epoch": 0.203900222227771, "grad_norm": 0.09741527587175369, "learning_rate": 2.3942777040726927e-05, "loss": 9.7482, "step": 40830 }, { "epoch": 0.20395016105271044, "grad_norm": 0.09928309917449951, "learning_rate": 2.3941275125785374e-05, "loss": 9.7452, "step": 40840 }, { "epoch": 0.2040000998776499, "grad_norm": 0.09600803256034851, "learning_rate": 2.3939773210843828e-05, "loss": 9.7513, "step": 40850 }, { "epoch": 0.20405003870258934, "grad_norm": 0.09777117520570755, "learning_rate": 2.3938271295902274e-05, "loss": 9.752, "step": 40860 }, { "epoch": 0.2040999775275288, "grad_norm": 0.09423068910837173, "learning_rate": 2.3936769380960728e-05, "loss": 9.7462, "step": 40870 }, { "epoch": 0.20414991635246824, "grad_norm": 0.09674641489982605, "learning_rate": 2.3935267466019175e-05, "loss": 9.745, "step": 40880 }, { "epoch": 0.20419985517740769, "grad_norm": 0.095578134059906, "learning_rate": 2.393376555107762e-05, "loss": 9.7457, "step": 40890 }, { "epoch": 0.20424979400234713, "grad_norm": 0.09196728467941284, "learning_rate": 2.3932263636136075e-05, "loss": 9.7413, "step": 40900 }, { "epoch": 0.20429973282728658, "grad_norm": 0.09680663794279099, "learning_rate": 2.3930761721194522e-05, "loss": 9.7394, "step": 40910 }, { "epoch": 0.20434967165222603, "grad_norm": 0.10243181139230728, "learning_rate": 2.3929259806252976e-05, "loss": 9.7504, "step": 40920 }, { "epoch": 0.20439961047716548, "grad_norm": 0.09534817934036255, "learning_rate": 2.3927757891311422e-05, "loss": 9.7527, "step": 40930 }, { "epoch": 0.20444954930210493, "grad_norm": 0.10091328620910645, "learning_rate": 2.392625597636987e-05, "loss": 9.7457, "step": 40940 }, { "epoch": 0.20449948812704438, "grad_norm": 0.09642726182937622, "learning_rate": 2.3924754061428323e-05, "loss": 9.7483, "step": 40950 }, { "epoch": 0.20454942695198383, "grad_norm": 0.09273668378591537, "learning_rate": 2.392325214648677e-05, "loss": 9.7487, "step": 40960 }, { "epoch": 0.20459936577692328, "grad_norm": 0.09491418302059174, "learning_rate": 2.3921750231545223e-05, "loss": 9.7384, "step": 40970 }, { "epoch": 0.20464930460186273, "grad_norm": 0.09379623830318451, "learning_rate": 2.392024831660367e-05, "loss": 9.7397, "step": 40980 }, { "epoch": 0.20469924342680218, "grad_norm": 0.09489590674638748, "learning_rate": 2.3918746401662117e-05, "loss": 9.738, "step": 40990 }, { "epoch": 0.20474918225174163, "grad_norm": 0.09257600456476212, "learning_rate": 2.391724448672057e-05, "loss": 9.743, "step": 41000 }, { "epoch": 0.20479912107668108, "grad_norm": 0.09949919581413269, "learning_rate": 2.3915742571779017e-05, "loss": 9.7409, "step": 41010 }, { "epoch": 0.20484905990162053, "grad_norm": 0.09563277661800385, "learning_rate": 2.391424065683747e-05, "loss": 9.738, "step": 41020 }, { "epoch": 0.20489899872655998, "grad_norm": 0.09452064335346222, "learning_rate": 2.3912738741895917e-05, "loss": 9.736, "step": 41030 }, { "epoch": 0.2049489375514994, "grad_norm": 0.09839780628681183, "learning_rate": 2.3911236826954368e-05, "loss": 9.7439, "step": 41040 }, { "epoch": 0.20499887637643885, "grad_norm": 0.0967588871717453, "learning_rate": 2.3909734912012818e-05, "loss": 9.7426, "step": 41050 }, { "epoch": 0.2050488152013783, "grad_norm": 0.09972970187664032, "learning_rate": 2.3908232997071264e-05, "loss": 9.7462, "step": 41060 }, { "epoch": 0.20509875402631775, "grad_norm": 0.09793765842914581, "learning_rate": 2.3906731082129718e-05, "loss": 9.7425, "step": 41070 }, { "epoch": 0.2051486928512572, "grad_norm": 0.09164223074913025, "learning_rate": 2.3905229167188165e-05, "loss": 9.7346, "step": 41080 }, { "epoch": 0.20519863167619665, "grad_norm": 0.09696740657091141, "learning_rate": 2.3903727252246615e-05, "loss": 9.7373, "step": 41090 }, { "epoch": 0.2052485705011361, "grad_norm": 0.09416086226701736, "learning_rate": 2.3902225337305065e-05, "loss": 9.74, "step": 41100 }, { "epoch": 0.20529850932607555, "grad_norm": 0.09924998879432678, "learning_rate": 2.3900723422363512e-05, "loss": 9.7414, "step": 41110 }, { "epoch": 0.205348448151015, "grad_norm": 0.08897435665130615, "learning_rate": 2.3899221507421966e-05, "loss": 9.7388, "step": 41120 }, { "epoch": 0.20539838697595444, "grad_norm": 0.09157969057559967, "learning_rate": 2.3897719592480412e-05, "loss": 9.7346, "step": 41130 }, { "epoch": 0.2054483258008939, "grad_norm": 0.09318001568317413, "learning_rate": 2.3896217677538863e-05, "loss": 9.7399, "step": 41140 }, { "epoch": 0.20549826462583334, "grad_norm": 0.09765854477882385, "learning_rate": 2.3894715762597313e-05, "loss": 9.7369, "step": 41150 }, { "epoch": 0.2055482034507728, "grad_norm": 0.09092031419277191, "learning_rate": 2.389321384765576e-05, "loss": 9.7477, "step": 41160 }, { "epoch": 0.20559814227571224, "grad_norm": 0.09183992445468903, "learning_rate": 2.3891711932714213e-05, "loss": 9.741, "step": 41170 }, { "epoch": 0.2056480811006517, "grad_norm": 0.0956728532910347, "learning_rate": 2.389021001777266e-05, "loss": 9.7374, "step": 41180 }, { "epoch": 0.20569801992559114, "grad_norm": 0.09464164078235626, "learning_rate": 2.388870810283111e-05, "loss": 9.7397, "step": 41190 }, { "epoch": 0.2057479587505306, "grad_norm": 0.09706345945596695, "learning_rate": 2.388720618788956e-05, "loss": 9.7364, "step": 41200 }, { "epoch": 0.20579789757547004, "grad_norm": 0.09342235326766968, "learning_rate": 2.3885704272948007e-05, "loss": 9.7431, "step": 41210 }, { "epoch": 0.2058478364004095, "grad_norm": 0.09735559672117233, "learning_rate": 2.388420235800646e-05, "loss": 9.737, "step": 41220 }, { "epoch": 0.20589777522534894, "grad_norm": 0.0972183495759964, "learning_rate": 2.3882700443064907e-05, "loss": 9.735, "step": 41230 }, { "epoch": 0.2059477140502884, "grad_norm": 0.09024586528539658, "learning_rate": 2.3881198528123358e-05, "loss": 9.7344, "step": 41240 }, { "epoch": 0.20599765287522784, "grad_norm": 0.09795460104942322, "learning_rate": 2.3879696613181808e-05, "loss": 9.7426, "step": 41250 }, { "epoch": 0.2060475917001673, "grad_norm": 0.10017476975917816, "learning_rate": 2.3878194698240255e-05, "loss": 9.7292, "step": 41260 }, { "epoch": 0.20609753052510674, "grad_norm": 0.09363752603530884, "learning_rate": 2.3876692783298708e-05, "loss": 9.7424, "step": 41270 }, { "epoch": 0.2061474693500462, "grad_norm": 0.09633088111877441, "learning_rate": 2.3875190868357155e-05, "loss": 9.7319, "step": 41280 }, { "epoch": 0.20619740817498564, "grad_norm": 0.09266993403434753, "learning_rate": 2.3873688953415605e-05, "loss": 9.732, "step": 41290 }, { "epoch": 0.20624734699992509, "grad_norm": 0.10170450061559677, "learning_rate": 2.3872187038474055e-05, "loss": 9.7402, "step": 41300 }, { "epoch": 0.20629728582486453, "grad_norm": 0.093755342066288, "learning_rate": 2.3870685123532502e-05, "loss": 9.7369, "step": 41310 }, { "epoch": 0.20634722464980398, "grad_norm": 0.09129472076892853, "learning_rate": 2.3869183208590956e-05, "loss": 9.7356, "step": 41320 }, { "epoch": 0.20639716347474343, "grad_norm": 0.09476242959499359, "learning_rate": 2.3867681293649402e-05, "loss": 9.7394, "step": 41330 }, { "epoch": 0.20644710229968288, "grad_norm": 0.09445454180240631, "learning_rate": 2.3866179378707853e-05, "loss": 9.7293, "step": 41340 }, { "epoch": 0.20649704112462233, "grad_norm": 0.09727781265974045, "learning_rate": 2.3864677463766303e-05, "loss": 9.7333, "step": 41350 }, { "epoch": 0.20654697994956178, "grad_norm": 0.093324214220047, "learning_rate": 2.386317554882475e-05, "loss": 9.7324, "step": 41360 }, { "epoch": 0.20659691877450123, "grad_norm": 0.09454476833343506, "learning_rate": 2.3861673633883203e-05, "loss": 9.7345, "step": 41370 }, { "epoch": 0.20664685759944068, "grad_norm": 0.09696672111749649, "learning_rate": 2.386017171894165e-05, "loss": 9.7345, "step": 41380 }, { "epoch": 0.20669679642438013, "grad_norm": 0.09323279559612274, "learning_rate": 2.38586698040001e-05, "loss": 9.7358, "step": 41390 }, { "epoch": 0.20674673524931958, "grad_norm": 0.09088480472564697, "learning_rate": 2.385716788905855e-05, "loss": 9.7327, "step": 41400 }, { "epoch": 0.20679667407425903, "grad_norm": 0.08868908882141113, "learning_rate": 2.3855665974117e-05, "loss": 9.7327, "step": 41410 }, { "epoch": 0.20684661289919848, "grad_norm": 0.0900205671787262, "learning_rate": 2.385416405917545e-05, "loss": 9.7318, "step": 41420 }, { "epoch": 0.20689655172413793, "grad_norm": 0.0981634333729744, "learning_rate": 2.3852662144233897e-05, "loss": 9.7366, "step": 41430 }, { "epoch": 0.20694649054907738, "grad_norm": 0.10074867308139801, "learning_rate": 2.3851160229292348e-05, "loss": 9.729, "step": 41440 }, { "epoch": 0.20699642937401683, "grad_norm": 0.09191495180130005, "learning_rate": 2.3849658314350798e-05, "loss": 9.7327, "step": 41450 }, { "epoch": 0.20704636819895628, "grad_norm": 0.09636077284812927, "learning_rate": 2.3848156399409248e-05, "loss": 9.7327, "step": 41460 }, { "epoch": 0.20709630702389573, "grad_norm": 0.09494554251432419, "learning_rate": 2.3846654484467698e-05, "loss": 9.7348, "step": 41470 }, { "epoch": 0.20714624584883518, "grad_norm": 0.09379441291093826, "learning_rate": 2.3845152569526145e-05, "loss": 9.7329, "step": 41480 }, { "epoch": 0.20719618467377462, "grad_norm": 0.09855134785175323, "learning_rate": 2.3843650654584595e-05, "loss": 9.7449, "step": 41490 }, { "epoch": 0.20724612349871407, "grad_norm": 0.09271756559610367, "learning_rate": 2.3842148739643045e-05, "loss": 9.7368, "step": 41500 }, { "epoch": 0.20729606232365352, "grad_norm": 0.09612946957349777, "learning_rate": 2.3840646824701495e-05, "loss": 9.7314, "step": 41510 }, { "epoch": 0.20734600114859297, "grad_norm": 0.09541863203048706, "learning_rate": 2.3839144909759946e-05, "loss": 9.7345, "step": 41520 }, { "epoch": 0.20739593997353242, "grad_norm": 0.09798285365104675, "learning_rate": 2.3837642994818392e-05, "loss": 9.7369, "step": 41530 }, { "epoch": 0.20744587879847187, "grad_norm": 0.09581144899129868, "learning_rate": 2.3836141079876843e-05, "loss": 9.7313, "step": 41540 }, { "epoch": 0.20749581762341132, "grad_norm": 0.09617334604263306, "learning_rate": 2.3834639164935293e-05, "loss": 9.7242, "step": 41550 }, { "epoch": 0.20754575644835077, "grad_norm": 0.09934971481561661, "learning_rate": 2.3833137249993743e-05, "loss": 9.7257, "step": 41560 }, { "epoch": 0.20759569527329022, "grad_norm": 0.09321165084838867, "learning_rate": 2.3831635335052193e-05, "loss": 9.7286, "step": 41570 }, { "epoch": 0.20764563409822967, "grad_norm": 0.09776625037193298, "learning_rate": 2.383013342011064e-05, "loss": 9.7297, "step": 41580 }, { "epoch": 0.20769557292316912, "grad_norm": 0.09788148105144501, "learning_rate": 2.382863150516909e-05, "loss": 9.7298, "step": 41590 }, { "epoch": 0.20774551174810857, "grad_norm": 0.09117887914180756, "learning_rate": 2.382712959022754e-05, "loss": 9.7353, "step": 41600 }, { "epoch": 0.20779545057304802, "grad_norm": 0.09594250470399857, "learning_rate": 2.382562767528599e-05, "loss": 9.7353, "step": 41610 }, { "epoch": 0.20784538939798747, "grad_norm": 0.08893455564975739, "learning_rate": 2.382412576034444e-05, "loss": 9.7268, "step": 41620 }, { "epoch": 0.20789532822292692, "grad_norm": 0.0987037941813469, "learning_rate": 2.3822623845402887e-05, "loss": 9.7287, "step": 41630 }, { "epoch": 0.20794526704786637, "grad_norm": 0.09531259536743164, "learning_rate": 2.3821121930461338e-05, "loss": 9.7335, "step": 41640 }, { "epoch": 0.20799520587280582, "grad_norm": 0.09097182750701904, "learning_rate": 2.3819620015519788e-05, "loss": 9.7319, "step": 41650 }, { "epoch": 0.20804514469774527, "grad_norm": 0.09137894958257675, "learning_rate": 2.3818118100578238e-05, "loss": 9.7361, "step": 41660 }, { "epoch": 0.20809508352268472, "grad_norm": 0.09314315766096115, "learning_rate": 2.3816616185636688e-05, "loss": 9.727, "step": 41670 }, { "epoch": 0.20814502234762416, "grad_norm": 0.09129998832941055, "learning_rate": 2.3815114270695135e-05, "loss": 9.7235, "step": 41680 }, { "epoch": 0.20819496117256361, "grad_norm": 0.09588906913995743, "learning_rate": 2.3813612355753585e-05, "loss": 9.7317, "step": 41690 }, { "epoch": 0.20824489999750306, "grad_norm": 0.089946448802948, "learning_rate": 2.3812110440812035e-05, "loss": 9.7317, "step": 41700 }, { "epoch": 0.2082948388224425, "grad_norm": 0.09250712394714355, "learning_rate": 2.3810608525870485e-05, "loss": 9.7321, "step": 41710 }, { "epoch": 0.20834477764738196, "grad_norm": 0.09257816523313522, "learning_rate": 2.3809106610928936e-05, "loss": 9.7294, "step": 41720 }, { "epoch": 0.2083947164723214, "grad_norm": 0.09143438935279846, "learning_rate": 2.3807604695987386e-05, "loss": 9.731, "step": 41730 }, { "epoch": 0.20844465529726086, "grad_norm": 0.09791240841150284, "learning_rate": 2.3806102781045833e-05, "loss": 9.7234, "step": 41740 }, { "epoch": 0.2084945941222003, "grad_norm": 0.10063060373067856, "learning_rate": 2.3804600866104283e-05, "loss": 9.7279, "step": 41750 }, { "epoch": 0.20854453294713976, "grad_norm": 0.09549122303724289, "learning_rate": 2.3803098951162733e-05, "loss": 9.7233, "step": 41760 }, { "epoch": 0.2085944717720792, "grad_norm": 0.09345372021198273, "learning_rate": 2.3801597036221183e-05, "loss": 9.7282, "step": 41770 }, { "epoch": 0.20864441059701866, "grad_norm": 0.0949343666434288, "learning_rate": 2.3800095121279633e-05, "loss": 9.721, "step": 41780 }, { "epoch": 0.2086943494219581, "grad_norm": 0.09225095063447952, "learning_rate": 2.3798593206338083e-05, "loss": 9.7369, "step": 41790 }, { "epoch": 0.20874428824689756, "grad_norm": 0.09359176456928253, "learning_rate": 2.379709129139653e-05, "loss": 9.7298, "step": 41800 }, { "epoch": 0.208794227071837, "grad_norm": 0.09750591963529587, "learning_rate": 2.379558937645498e-05, "loss": 9.7263, "step": 41810 }, { "epoch": 0.20884416589677646, "grad_norm": 0.0969536155462265, "learning_rate": 2.379408746151343e-05, "loss": 9.7193, "step": 41820 }, { "epoch": 0.2088941047217159, "grad_norm": 0.09383013099431992, "learning_rate": 2.379258554657188e-05, "loss": 9.7168, "step": 41830 }, { "epoch": 0.20894404354665536, "grad_norm": 0.09686887264251709, "learning_rate": 2.379108363163033e-05, "loss": 9.7198, "step": 41840 }, { "epoch": 0.2089939823715948, "grad_norm": 0.09708838164806366, "learning_rate": 2.3789581716688778e-05, "loss": 9.7315, "step": 41850 }, { "epoch": 0.20904392119653425, "grad_norm": 0.09624648839235306, "learning_rate": 2.3788079801747228e-05, "loss": 9.7148, "step": 41860 }, { "epoch": 0.2090938600214737, "grad_norm": 0.09800230711698532, "learning_rate": 2.3786577886805678e-05, "loss": 9.7328, "step": 41870 }, { "epoch": 0.20914379884641315, "grad_norm": 0.09165570139884949, "learning_rate": 2.3785075971864128e-05, "loss": 9.7219, "step": 41880 }, { "epoch": 0.2091937376713526, "grad_norm": 0.09577257931232452, "learning_rate": 2.378357405692258e-05, "loss": 9.7289, "step": 41890 }, { "epoch": 0.20924367649629205, "grad_norm": 0.09999013692140579, "learning_rate": 2.3782072141981025e-05, "loss": 9.7178, "step": 41900 }, { "epoch": 0.2092936153212315, "grad_norm": 0.09558932483196259, "learning_rate": 2.3780570227039475e-05, "loss": 9.7261, "step": 41910 }, { "epoch": 0.20934355414617095, "grad_norm": 0.09391254931688309, "learning_rate": 2.3779068312097926e-05, "loss": 9.7244, "step": 41920 }, { "epoch": 0.2093934929711104, "grad_norm": 0.09899161756038666, "learning_rate": 2.3777566397156376e-05, "loss": 9.7294, "step": 41930 }, { "epoch": 0.20944343179604985, "grad_norm": 0.09575141966342926, "learning_rate": 2.3776064482214826e-05, "loss": 9.72, "step": 41940 }, { "epoch": 0.2094933706209893, "grad_norm": 0.10070329904556274, "learning_rate": 2.3774562567273273e-05, "loss": 9.7279, "step": 41950 }, { "epoch": 0.20954330944592875, "grad_norm": 0.09497936069965363, "learning_rate": 2.3773060652331723e-05, "loss": 9.7317, "step": 41960 }, { "epoch": 0.2095932482708682, "grad_norm": 0.09233599156141281, "learning_rate": 2.3771558737390173e-05, "loss": 9.7265, "step": 41970 }, { "epoch": 0.20964318709580765, "grad_norm": 0.08788701891899109, "learning_rate": 2.3770056822448623e-05, "loss": 9.7191, "step": 41980 }, { "epoch": 0.2096931259207471, "grad_norm": 0.09631108492612839, "learning_rate": 2.3768554907507073e-05, "loss": 9.7255, "step": 41990 }, { "epoch": 0.20974306474568655, "grad_norm": 0.09688840806484222, "learning_rate": 2.376705299256552e-05, "loss": 9.7247, "step": 42000 }, { "epoch": 0.209793003570626, "grad_norm": 0.09418191015720367, "learning_rate": 2.376555107762397e-05, "loss": 9.7267, "step": 42010 }, { "epoch": 0.20984294239556545, "grad_norm": 0.0922936499118805, "learning_rate": 2.376404916268242e-05, "loss": 9.7249, "step": 42020 }, { "epoch": 0.20989288122050487, "grad_norm": 0.09569290280342102, "learning_rate": 2.376254724774087e-05, "loss": 9.723, "step": 42030 }, { "epoch": 0.20994282004544432, "grad_norm": 0.10295400768518448, "learning_rate": 2.376104533279932e-05, "loss": 9.7236, "step": 42040 }, { "epoch": 0.20999275887038377, "grad_norm": 0.09752771258354187, "learning_rate": 2.375954341785777e-05, "loss": 9.7179, "step": 42050 }, { "epoch": 0.21004269769532322, "grad_norm": 0.09303586930036545, "learning_rate": 2.3758041502916218e-05, "loss": 9.7193, "step": 42060 }, { "epoch": 0.21009263652026267, "grad_norm": 0.10309559851884842, "learning_rate": 2.3756539587974668e-05, "loss": 9.7216, "step": 42070 }, { "epoch": 0.21014257534520212, "grad_norm": 0.09949391335248947, "learning_rate": 2.375503767303312e-05, "loss": 9.7174, "step": 42080 }, { "epoch": 0.21019251417014156, "grad_norm": 0.0931687206029892, "learning_rate": 2.375353575809157e-05, "loss": 9.7203, "step": 42090 }, { "epoch": 0.21024245299508101, "grad_norm": 0.09643073379993439, "learning_rate": 2.375203384315002e-05, "loss": 9.7225, "step": 42100 }, { "epoch": 0.21029239182002046, "grad_norm": 0.09048034995794296, "learning_rate": 2.3750531928208465e-05, "loss": 9.7207, "step": 42110 }, { "epoch": 0.2103423306449599, "grad_norm": 0.10079729557037354, "learning_rate": 2.3749030013266916e-05, "loss": 9.7121, "step": 42120 }, { "epoch": 0.21039226946989936, "grad_norm": 0.09450411796569824, "learning_rate": 2.3747528098325366e-05, "loss": 9.7202, "step": 42130 }, { "epoch": 0.2104422082948388, "grad_norm": 0.09755988419055939, "learning_rate": 2.3746026183383816e-05, "loss": 9.7198, "step": 42140 }, { "epoch": 0.21049214711977826, "grad_norm": 0.09404526650905609, "learning_rate": 2.3744524268442266e-05, "loss": 9.7194, "step": 42150 }, { "epoch": 0.2105420859447177, "grad_norm": 0.09205637127161026, "learning_rate": 2.3743022353500713e-05, "loss": 9.7256, "step": 42160 }, { "epoch": 0.21059202476965716, "grad_norm": 0.09348385035991669, "learning_rate": 2.3741520438559163e-05, "loss": 9.7176, "step": 42170 }, { "epoch": 0.2106419635945966, "grad_norm": 0.09955950081348419, "learning_rate": 2.3740018523617613e-05, "loss": 9.7243, "step": 42180 }, { "epoch": 0.21069190241953606, "grad_norm": 0.09449559450149536, "learning_rate": 2.3738516608676063e-05, "loss": 9.7126, "step": 42190 }, { "epoch": 0.2107418412444755, "grad_norm": 0.09414367377758026, "learning_rate": 2.3737014693734514e-05, "loss": 9.7162, "step": 42200 }, { "epoch": 0.21079178006941496, "grad_norm": 0.09569656103849411, "learning_rate": 2.373551277879296e-05, "loss": 9.7146, "step": 42210 }, { "epoch": 0.2108417188943544, "grad_norm": 0.10219884663820267, "learning_rate": 2.373401086385141e-05, "loss": 9.7196, "step": 42220 }, { "epoch": 0.21089165771929386, "grad_norm": 0.091194286942482, "learning_rate": 2.373250894890986e-05, "loss": 9.7179, "step": 42230 }, { "epoch": 0.2109415965442333, "grad_norm": 0.09087838232517242, "learning_rate": 2.373100703396831e-05, "loss": 9.7237, "step": 42240 }, { "epoch": 0.21099153536917276, "grad_norm": 0.09350999444723129, "learning_rate": 2.372950511902676e-05, "loss": 9.7151, "step": 42250 }, { "epoch": 0.2110414741941122, "grad_norm": 0.08830499649047852, "learning_rate": 2.3728003204085208e-05, "loss": 9.7138, "step": 42260 }, { "epoch": 0.21109141301905165, "grad_norm": 0.09413766860961914, "learning_rate": 2.3726501289143658e-05, "loss": 9.7158, "step": 42270 }, { "epoch": 0.2111413518439911, "grad_norm": 0.09809092432260513, "learning_rate": 2.372499937420211e-05, "loss": 9.7141, "step": 42280 }, { "epoch": 0.21119129066893055, "grad_norm": 0.09166799485683441, "learning_rate": 2.372349745926056e-05, "loss": 9.7125, "step": 42290 }, { "epoch": 0.21124122949387, "grad_norm": 0.09736365079879761, "learning_rate": 2.372199554431901e-05, "loss": 9.7256, "step": 42300 }, { "epoch": 0.21129116831880945, "grad_norm": 0.10044820606708527, "learning_rate": 2.3720493629377455e-05, "loss": 9.7196, "step": 42310 }, { "epoch": 0.2113411071437489, "grad_norm": 0.09292640537023544, "learning_rate": 2.3718991714435906e-05, "loss": 9.7137, "step": 42320 }, { "epoch": 0.21139104596868835, "grad_norm": 0.09433227777481079, "learning_rate": 2.3717489799494356e-05, "loss": 9.7101, "step": 42330 }, { "epoch": 0.2114409847936278, "grad_norm": 0.0979892909526825, "learning_rate": 2.3715987884552806e-05, "loss": 9.7208, "step": 42340 }, { "epoch": 0.21149092361856725, "grad_norm": 0.09797633439302444, "learning_rate": 2.3714485969611256e-05, "loss": 9.7147, "step": 42350 }, { "epoch": 0.2115408624435067, "grad_norm": 0.09626011550426483, "learning_rate": 2.3712984054669703e-05, "loss": 9.71, "step": 42360 }, { "epoch": 0.21159080126844615, "grad_norm": 0.10008444637060165, "learning_rate": 2.3711482139728157e-05, "loss": 9.7133, "step": 42370 }, { "epoch": 0.2116407400933856, "grad_norm": 0.09693645685911179, "learning_rate": 2.3709980224786603e-05, "loss": 9.7155, "step": 42380 }, { "epoch": 0.21169067891832505, "grad_norm": 0.09394808113574982, "learning_rate": 2.3708478309845054e-05, "loss": 9.7087, "step": 42390 }, { "epoch": 0.2117406177432645, "grad_norm": 0.0966886356472969, "learning_rate": 2.3706976394903504e-05, "loss": 9.7124, "step": 42400 }, { "epoch": 0.21179055656820395, "grad_norm": 0.09753970056772232, "learning_rate": 2.370547447996195e-05, "loss": 9.712, "step": 42410 }, { "epoch": 0.2118404953931434, "grad_norm": 0.095003142952919, "learning_rate": 2.3703972565020404e-05, "loss": 9.7176, "step": 42420 }, { "epoch": 0.21189043421808285, "grad_norm": 0.09311114996671677, "learning_rate": 2.370247065007885e-05, "loss": 9.7169, "step": 42430 }, { "epoch": 0.2119403730430223, "grad_norm": 0.08979105204343796, "learning_rate": 2.37009687351373e-05, "loss": 9.7247, "step": 42440 }, { "epoch": 0.21199031186796174, "grad_norm": 0.09097933769226074, "learning_rate": 2.369946682019575e-05, "loss": 9.7103, "step": 42450 }, { "epoch": 0.2120402506929012, "grad_norm": 0.09349437057971954, "learning_rate": 2.3697964905254198e-05, "loss": 9.7147, "step": 42460 }, { "epoch": 0.21209018951784064, "grad_norm": 0.09845960140228271, "learning_rate": 2.369646299031265e-05, "loss": 9.7105, "step": 42470 }, { "epoch": 0.2121401283427801, "grad_norm": 0.09305304288864136, "learning_rate": 2.36949610753711e-05, "loss": 9.7152, "step": 42480 }, { "epoch": 0.21219006716771954, "grad_norm": 0.09439215064048767, "learning_rate": 2.369345916042955e-05, "loss": 9.7083, "step": 42490 }, { "epoch": 0.212240005992659, "grad_norm": 0.09390342235565186, "learning_rate": 2.3691957245488e-05, "loss": 9.7082, "step": 42500 }, { "epoch": 0.21228994481759844, "grad_norm": 0.08965789526700974, "learning_rate": 2.3690455330546445e-05, "loss": 9.7106, "step": 42510 }, { "epoch": 0.2123398836425379, "grad_norm": 0.09593262523412704, "learning_rate": 2.36889534156049e-05, "loss": 9.7163, "step": 42520 }, { "epoch": 0.21238982246747734, "grad_norm": 0.09541445225477219, "learning_rate": 2.3687451500663346e-05, "loss": 9.7153, "step": 42530 }, { "epoch": 0.2124397612924168, "grad_norm": 0.09254483878612518, "learning_rate": 2.3685949585721796e-05, "loss": 9.7166, "step": 42540 }, { "epoch": 0.21248970011735624, "grad_norm": 0.09330740571022034, "learning_rate": 2.3684447670780246e-05, "loss": 9.7049, "step": 42550 }, { "epoch": 0.2125396389422957, "grad_norm": 0.10075175762176514, "learning_rate": 2.3682945755838693e-05, "loss": 9.7118, "step": 42560 }, { "epoch": 0.21258957776723514, "grad_norm": 0.09620960056781769, "learning_rate": 2.3681443840897147e-05, "loss": 9.711, "step": 42570 }, { "epoch": 0.2126395165921746, "grad_norm": 0.09333734214305878, "learning_rate": 2.3679941925955593e-05, "loss": 9.7069, "step": 42580 }, { "epoch": 0.21268945541711404, "grad_norm": 0.09951247274875641, "learning_rate": 2.3678440011014044e-05, "loss": 9.7065, "step": 42590 }, { "epoch": 0.2127393942420535, "grad_norm": 0.09395639598369598, "learning_rate": 2.3676938096072494e-05, "loss": 9.7071, "step": 42600 }, { "epoch": 0.21278933306699294, "grad_norm": 0.10100211948156357, "learning_rate": 2.367543618113094e-05, "loss": 9.7104, "step": 42610 }, { "epoch": 0.21283927189193239, "grad_norm": 0.09487060457468033, "learning_rate": 2.3673934266189394e-05, "loss": 9.7075, "step": 42620 }, { "epoch": 0.21288921071687184, "grad_norm": 0.09256965667009354, "learning_rate": 2.367243235124784e-05, "loss": 9.7168, "step": 42630 }, { "epoch": 0.21293914954181128, "grad_norm": 0.0953928530216217, "learning_rate": 2.367093043630629e-05, "loss": 9.708, "step": 42640 }, { "epoch": 0.21298908836675073, "grad_norm": 0.09185754507780075, "learning_rate": 2.366942852136474e-05, "loss": 9.7165, "step": 42650 }, { "epoch": 0.21303902719169018, "grad_norm": 0.09027005732059479, "learning_rate": 2.3667926606423188e-05, "loss": 9.71, "step": 42660 }, { "epoch": 0.21308896601662963, "grad_norm": 0.0954156145453453, "learning_rate": 2.366642469148164e-05, "loss": 9.709, "step": 42670 }, { "epoch": 0.21313890484156908, "grad_norm": 0.08864131569862366, "learning_rate": 2.366492277654009e-05, "loss": 9.7154, "step": 42680 }, { "epoch": 0.21318884366650853, "grad_norm": 0.09630046784877777, "learning_rate": 2.3663420861598542e-05, "loss": 9.712, "step": 42690 }, { "epoch": 0.21323878249144798, "grad_norm": 0.09661024063825607, "learning_rate": 2.366191894665699e-05, "loss": 9.7077, "step": 42700 }, { "epoch": 0.21328872131638743, "grad_norm": 0.09176916629076004, "learning_rate": 2.3660417031715436e-05, "loss": 9.7138, "step": 42710 }, { "epoch": 0.21333866014132688, "grad_norm": 0.09320518374443054, "learning_rate": 2.365891511677389e-05, "loss": 9.712, "step": 42720 }, { "epoch": 0.21338859896626633, "grad_norm": 0.10201375186443329, "learning_rate": 2.3657413201832336e-05, "loss": 9.7025, "step": 42730 }, { "epoch": 0.21343853779120578, "grad_norm": 0.09826554358005524, "learning_rate": 2.365591128689079e-05, "loss": 9.7106, "step": 42740 }, { "epoch": 0.21348847661614523, "grad_norm": 0.09512682259082794, "learning_rate": 2.3654409371949236e-05, "loss": 9.7113, "step": 42750 }, { "epoch": 0.21353841544108468, "grad_norm": 0.10446712374687195, "learning_rate": 2.3652907457007683e-05, "loss": 9.7097, "step": 42760 }, { "epoch": 0.21358835426602413, "grad_norm": 0.09374655038118362, "learning_rate": 2.3651405542066137e-05, "loss": 9.7101, "step": 42770 }, { "epoch": 0.21363829309096358, "grad_norm": 0.10137982666492462, "learning_rate": 2.3649903627124583e-05, "loss": 9.7063, "step": 42780 }, { "epoch": 0.21368823191590303, "grad_norm": 0.09552907198667526, "learning_rate": 2.3648401712183037e-05, "loss": 9.7035, "step": 42790 }, { "epoch": 0.21373817074084248, "grad_norm": 0.10118672996759415, "learning_rate": 2.3646899797241484e-05, "loss": 9.7137, "step": 42800 }, { "epoch": 0.21378810956578193, "grad_norm": 0.09724298119544983, "learning_rate": 2.364539788229993e-05, "loss": 9.7052, "step": 42810 }, { "epoch": 0.21383804839072137, "grad_norm": 0.09356910735368729, "learning_rate": 2.3643895967358384e-05, "loss": 9.7068, "step": 42820 }, { "epoch": 0.21388798721566082, "grad_norm": 0.09422975033521652, "learning_rate": 2.364239405241683e-05, "loss": 9.7097, "step": 42830 }, { "epoch": 0.21393792604060027, "grad_norm": 0.09154810756444931, "learning_rate": 2.3640892137475284e-05, "loss": 9.7082, "step": 42840 }, { "epoch": 0.21398786486553972, "grad_norm": 0.08905761688947678, "learning_rate": 2.363939022253373e-05, "loss": 9.7014, "step": 42850 }, { "epoch": 0.21403780369047917, "grad_norm": 0.0964711457490921, "learning_rate": 2.3637888307592178e-05, "loss": 9.7035, "step": 42860 }, { "epoch": 0.21408774251541862, "grad_norm": 0.09149517863988876, "learning_rate": 2.363638639265063e-05, "loss": 9.7052, "step": 42870 }, { "epoch": 0.21413768134035807, "grad_norm": 0.09464892745018005, "learning_rate": 2.363488447770908e-05, "loss": 9.7134, "step": 42880 }, { "epoch": 0.21418762016529752, "grad_norm": 0.09502291679382324, "learning_rate": 2.3633382562767532e-05, "loss": 9.7036, "step": 42890 }, { "epoch": 0.21423755899023697, "grad_norm": 0.09215651452541351, "learning_rate": 2.363188064782598e-05, "loss": 9.7038, "step": 42900 }, { "epoch": 0.21428749781517642, "grad_norm": 0.09255354106426239, "learning_rate": 2.3630378732884426e-05, "loss": 9.6993, "step": 42910 }, { "epoch": 0.21433743664011587, "grad_norm": 0.09328746050596237, "learning_rate": 2.362887681794288e-05, "loss": 9.7027, "step": 42920 }, { "epoch": 0.21438737546505532, "grad_norm": 0.0980675145983696, "learning_rate": 2.3627374903001326e-05, "loss": 9.7081, "step": 42930 }, { "epoch": 0.21443731428999477, "grad_norm": 0.09392713755369186, "learning_rate": 2.362587298805978e-05, "loss": 9.7009, "step": 42940 }, { "epoch": 0.21448725311493422, "grad_norm": 0.09184177964925766, "learning_rate": 2.3624371073118226e-05, "loss": 9.7055, "step": 42950 }, { "epoch": 0.21453719193987367, "grad_norm": 0.10423033684492111, "learning_rate": 2.3622869158176673e-05, "loss": 9.7083, "step": 42960 }, { "epoch": 0.21458713076481312, "grad_norm": 0.09163152426481247, "learning_rate": 2.3621367243235127e-05, "loss": 10.5286, "step": 42970 }, { "epoch": 0.21463706958975257, "grad_norm": 0.09652361273765564, "learning_rate": 2.3619865328293573e-05, "loss": 41.2823, "step": 42980 }, { "epoch": 0.21468700841469202, "grad_norm": 0.09223783016204834, "learning_rate": 2.3618363413352027e-05, "loss": 9.7045, "step": 42990 }, { "epoch": 0.21473694723963146, "grad_norm": 0.09330345690250397, "learning_rate": 2.3616861498410474e-05, "loss": 9.7024, "step": 43000 }, { "epoch": 0.21478688606457091, "grad_norm": 0.09638207405805588, "learning_rate": 2.3615359583468924e-05, "loss": 9.7073, "step": 43010 }, { "epoch": 0.21483682488951034, "grad_norm": 0.09385758638381958, "learning_rate": 2.3613857668527374e-05, "loss": 9.706, "step": 43020 }, { "epoch": 0.21488676371444979, "grad_norm": 0.09166792035102844, "learning_rate": 2.361235575358582e-05, "loss": 9.6945, "step": 43030 }, { "epoch": 0.21493670253938923, "grad_norm": 0.09677714109420776, "learning_rate": 2.3610853838644274e-05, "loss": 9.7021, "step": 43040 }, { "epoch": 0.21498664136432868, "grad_norm": 0.09546297788619995, "learning_rate": 2.360935192370272e-05, "loss": 9.6937, "step": 43050 }, { "epoch": 0.21503658018926813, "grad_norm": 0.09486702084541321, "learning_rate": 2.360785000876117e-05, "loss": 9.7021, "step": 43060 }, { "epoch": 0.21508651901420758, "grad_norm": 0.09048479050397873, "learning_rate": 2.360634809381962e-05, "loss": 9.7101, "step": 43070 }, { "epoch": 0.21513645783914703, "grad_norm": 0.0977143719792366, "learning_rate": 2.360484617887807e-05, "loss": 9.6991, "step": 43080 }, { "epoch": 0.21518639666408648, "grad_norm": 0.0984925925731659, "learning_rate": 2.3603344263936522e-05, "loss": 9.6967, "step": 43090 }, { "epoch": 0.21523633548902593, "grad_norm": 0.09045634418725967, "learning_rate": 2.360184234899497e-05, "loss": 9.6997, "step": 43100 }, { "epoch": 0.21528627431396538, "grad_norm": 0.09143086522817612, "learning_rate": 2.360034043405342e-05, "loss": 9.7057, "step": 43110 }, { "epoch": 0.21533621313890483, "grad_norm": 0.09460347890853882, "learning_rate": 2.359883851911187e-05, "loss": 9.6983, "step": 43120 }, { "epoch": 0.21538615196384428, "grad_norm": 0.09431096911430359, "learning_rate": 2.3597336604170316e-05, "loss": 9.7025, "step": 43130 }, { "epoch": 0.21543609078878373, "grad_norm": 0.1000736653804779, "learning_rate": 2.359583468922877e-05, "loss": 9.694, "step": 43140 }, { "epoch": 0.21548602961372318, "grad_norm": 0.09227129071950912, "learning_rate": 2.3594332774287216e-05, "loss": 9.7007, "step": 43150 }, { "epoch": 0.21553596843866263, "grad_norm": 0.09356602281332016, "learning_rate": 2.3592830859345666e-05, "loss": 9.6967, "step": 43160 }, { "epoch": 0.21558590726360208, "grad_norm": 0.09989676624536514, "learning_rate": 2.3591328944404117e-05, "loss": 9.7074, "step": 43170 }, { "epoch": 0.21563584608854153, "grad_norm": 0.0949828028678894, "learning_rate": 2.3589827029462563e-05, "loss": 9.6986, "step": 43180 }, { "epoch": 0.21568578491348098, "grad_norm": 0.09488669037818909, "learning_rate": 2.3588325114521017e-05, "loss": 9.7024, "step": 43190 }, { "epoch": 0.21573572373842043, "grad_norm": 0.09980147331953049, "learning_rate": 2.3586823199579464e-05, "loss": 9.6998, "step": 43200 }, { "epoch": 0.21578566256335988, "grad_norm": 0.09408322721719742, "learning_rate": 2.3585321284637914e-05, "loss": 9.6962, "step": 43210 }, { "epoch": 0.21583560138829933, "grad_norm": 0.09337528049945831, "learning_rate": 2.3583819369696364e-05, "loss": 9.7064, "step": 43220 }, { "epoch": 0.21588554021323877, "grad_norm": 0.09422304481267929, "learning_rate": 2.358231745475481e-05, "loss": 9.6981, "step": 43230 }, { "epoch": 0.21593547903817822, "grad_norm": 0.09392604231834412, "learning_rate": 2.3580815539813264e-05, "loss": 9.702, "step": 43240 }, { "epoch": 0.21598541786311767, "grad_norm": 0.09568940103054047, "learning_rate": 2.357931362487171e-05, "loss": 9.6984, "step": 43250 }, { "epoch": 0.21603535668805712, "grad_norm": 0.09230421483516693, "learning_rate": 2.357781170993016e-05, "loss": 9.6961, "step": 43260 }, { "epoch": 0.21608529551299657, "grad_norm": 0.0997990295290947, "learning_rate": 2.357630979498861e-05, "loss": 9.6982, "step": 43270 }, { "epoch": 0.21613523433793602, "grad_norm": 0.09223617613315582, "learning_rate": 2.357480788004706e-05, "loss": 9.6975, "step": 43280 }, { "epoch": 0.21618517316287547, "grad_norm": 0.09891097992658615, "learning_rate": 2.3573305965105512e-05, "loss": 9.6933, "step": 43290 }, { "epoch": 0.21623511198781492, "grad_norm": 0.09234698116779327, "learning_rate": 2.357180405016396e-05, "loss": 9.6913, "step": 43300 }, { "epoch": 0.21628505081275437, "grad_norm": 0.0998927652835846, "learning_rate": 2.357030213522241e-05, "loss": 9.6922, "step": 43310 }, { "epoch": 0.21633498963769382, "grad_norm": 0.09355306625366211, "learning_rate": 2.356880022028086e-05, "loss": 9.6943, "step": 43320 }, { "epoch": 0.21638492846263327, "grad_norm": 0.09233734756708145, "learning_rate": 2.356729830533931e-05, "loss": 9.6983, "step": 43330 }, { "epoch": 0.21643486728757272, "grad_norm": 0.09967396408319473, "learning_rate": 2.356579639039776e-05, "loss": 9.6938, "step": 43340 }, { "epoch": 0.21648480611251217, "grad_norm": 0.09385713189840317, "learning_rate": 2.3564294475456206e-05, "loss": 9.6936, "step": 43350 }, { "epoch": 0.21653474493745162, "grad_norm": 0.09764021635055542, "learning_rate": 2.3562792560514656e-05, "loss": 9.6984, "step": 43360 }, { "epoch": 0.21658468376239107, "grad_norm": 0.0969097763299942, "learning_rate": 2.3561290645573107e-05, "loss": 9.6953, "step": 43370 }, { "epoch": 0.21663462258733052, "grad_norm": 0.09680244326591492, "learning_rate": 2.3559788730631557e-05, "loss": 9.6967, "step": 43380 }, { "epoch": 0.21668456141226997, "grad_norm": 0.09880103170871735, "learning_rate": 2.3558286815690007e-05, "loss": 9.703, "step": 43390 }, { "epoch": 0.21673450023720942, "grad_norm": 0.09156890958547592, "learning_rate": 2.3556784900748454e-05, "loss": 9.6993, "step": 43400 }, { "epoch": 0.21678443906214886, "grad_norm": 0.09816037863492966, "learning_rate": 2.3555282985806904e-05, "loss": 9.6899, "step": 43410 }, { "epoch": 0.21683437788708831, "grad_norm": 0.09547406435012817, "learning_rate": 2.3553781070865354e-05, "loss": 9.6893, "step": 43420 }, { "epoch": 0.21688431671202776, "grad_norm": 0.10225875675678253, "learning_rate": 2.3552279155923804e-05, "loss": 9.6887, "step": 43430 }, { "epoch": 0.2169342555369672, "grad_norm": 0.09828753769397736, "learning_rate": 2.3550777240982254e-05, "loss": 9.6982, "step": 43440 }, { "epoch": 0.21698419436190666, "grad_norm": 0.09586260467767715, "learning_rate": 2.35492753260407e-05, "loss": 9.6886, "step": 43450 }, { "epoch": 0.2170341331868461, "grad_norm": 0.09193310141563416, "learning_rate": 2.354777341109915e-05, "loss": 9.7037, "step": 43460 }, { "epoch": 0.21708407201178556, "grad_norm": 0.09209885448217392, "learning_rate": 2.35462714961576e-05, "loss": 9.692, "step": 43470 }, { "epoch": 0.217134010836725, "grad_norm": 0.09702476859092712, "learning_rate": 2.3544769581216052e-05, "loss": 9.6904, "step": 43480 }, { "epoch": 0.21718394966166446, "grad_norm": 0.0922791138291359, "learning_rate": 2.3543267666274502e-05, "loss": 9.6992, "step": 43490 }, { "epoch": 0.2172338884866039, "grad_norm": 0.09711603820323944, "learning_rate": 2.354176575133295e-05, "loss": 9.6947, "step": 43500 }, { "epoch": 0.21728382731154336, "grad_norm": 0.0968707948923111, "learning_rate": 2.35402638363914e-05, "loss": 9.6859, "step": 43510 }, { "epoch": 0.2173337661364828, "grad_norm": 0.09466083347797394, "learning_rate": 2.353876192144985e-05, "loss": 9.6967, "step": 43520 }, { "epoch": 0.21738370496142226, "grad_norm": 0.09528151154518127, "learning_rate": 2.35372600065083e-05, "loss": 9.6887, "step": 43530 }, { "epoch": 0.2174336437863617, "grad_norm": 0.09486556053161621, "learning_rate": 2.353575809156675e-05, "loss": 9.688, "step": 43540 }, { "epoch": 0.21748358261130116, "grad_norm": 0.09180723130702972, "learning_rate": 2.3534256176625196e-05, "loss": 9.6934, "step": 43550 }, { "epoch": 0.2175335214362406, "grad_norm": 0.09143299609422684, "learning_rate": 2.3532754261683646e-05, "loss": 9.6973, "step": 43560 }, { "epoch": 0.21758346026118006, "grad_norm": 0.09400318562984467, "learning_rate": 2.3531252346742097e-05, "loss": 9.6916, "step": 43570 }, { "epoch": 0.2176333990861195, "grad_norm": 0.09261635690927505, "learning_rate": 2.3529750431800547e-05, "loss": 9.701, "step": 43580 }, { "epoch": 0.21768333791105896, "grad_norm": 0.09949515014886856, "learning_rate": 2.3528248516858997e-05, "loss": 9.6943, "step": 43590 }, { "epoch": 0.2177332767359984, "grad_norm": 0.09125660359859467, "learning_rate": 2.3526746601917444e-05, "loss": 9.6932, "step": 43600 }, { "epoch": 0.21778321556093785, "grad_norm": 0.08959287405014038, "learning_rate": 2.3525244686975894e-05, "loss": 9.6986, "step": 43610 }, { "epoch": 0.2178331543858773, "grad_norm": 0.0992596298456192, "learning_rate": 2.3523742772034344e-05, "loss": 9.6865, "step": 43620 }, { "epoch": 0.21788309321081675, "grad_norm": 0.09467126429080963, "learning_rate": 2.3522240857092794e-05, "loss": 9.6988, "step": 43630 }, { "epoch": 0.2179330320357562, "grad_norm": 0.09459062665700912, "learning_rate": 2.3520738942151244e-05, "loss": 9.6991, "step": 43640 }, { "epoch": 0.21798297086069565, "grad_norm": 0.09231843799352646, "learning_rate": 2.3519237027209695e-05, "loss": 9.689, "step": 43650 }, { "epoch": 0.2180329096856351, "grad_norm": 0.09330347180366516, "learning_rate": 2.351773511226814e-05, "loss": 9.6886, "step": 43660 }, { "epoch": 0.21808284851057455, "grad_norm": 0.09178127348423004, "learning_rate": 2.351623319732659e-05, "loss": 9.6855, "step": 43670 }, { "epoch": 0.218132787335514, "grad_norm": 0.09343292564153671, "learning_rate": 2.3514731282385042e-05, "loss": 9.6845, "step": 43680 }, { "epoch": 0.21818272616045345, "grad_norm": 0.09548097103834152, "learning_rate": 2.3513229367443492e-05, "loss": 9.6896, "step": 43690 }, { "epoch": 0.2182326649853929, "grad_norm": 0.0939573422074318, "learning_rate": 2.3511727452501942e-05, "loss": 9.6855, "step": 43700 }, { "epoch": 0.21828260381033235, "grad_norm": 0.09515061974525452, "learning_rate": 2.351022553756039e-05, "loss": 9.6916, "step": 43710 }, { "epoch": 0.2183325426352718, "grad_norm": 0.09717291593551636, "learning_rate": 2.350872362261884e-05, "loss": 9.6881, "step": 43720 }, { "epoch": 0.21838248146021125, "grad_norm": 0.09339723736047745, "learning_rate": 2.350722170767729e-05, "loss": 9.6886, "step": 43730 }, { "epoch": 0.2184324202851507, "grad_norm": 0.0933520719408989, "learning_rate": 2.350571979273574e-05, "loss": 9.686, "step": 43740 }, { "epoch": 0.21848235911009015, "grad_norm": 0.09928298741579056, "learning_rate": 2.350421787779419e-05, "loss": 9.6908, "step": 43750 }, { "epoch": 0.2185322979350296, "grad_norm": 0.09567858278751373, "learning_rate": 2.3502715962852636e-05, "loss": 9.6884, "step": 43760 }, { "epoch": 0.21858223675996905, "grad_norm": 0.09107061475515366, "learning_rate": 2.3501214047911087e-05, "loss": 9.6938, "step": 43770 }, { "epoch": 0.2186321755849085, "grad_norm": 0.09627822786569595, "learning_rate": 2.3499712132969537e-05, "loss": 9.6838, "step": 43780 }, { "epoch": 0.21868211440984794, "grad_norm": 0.09654753655195236, "learning_rate": 2.3498210218027987e-05, "loss": 9.6865, "step": 43790 }, { "epoch": 0.2187320532347874, "grad_norm": 0.09376217424869537, "learning_rate": 2.3496708303086437e-05, "loss": 9.6885, "step": 43800 }, { "epoch": 0.21878199205972684, "grad_norm": 0.08824321627616882, "learning_rate": 2.3495206388144884e-05, "loss": 9.6851, "step": 43810 }, { "epoch": 0.2188319308846663, "grad_norm": 0.09631256759166718, "learning_rate": 2.3493704473203334e-05, "loss": 9.6929, "step": 43820 }, { "epoch": 0.21888186970960574, "grad_norm": 0.0864676758646965, "learning_rate": 2.3492202558261784e-05, "loss": 9.6841, "step": 43830 }, { "epoch": 0.2189318085345452, "grad_norm": 0.09546799957752228, "learning_rate": 2.3490700643320235e-05, "loss": 9.691, "step": 43840 }, { "epoch": 0.21898174735948464, "grad_norm": 0.09540455043315887, "learning_rate": 2.3489198728378685e-05, "loss": 9.6935, "step": 43850 }, { "epoch": 0.2190316861844241, "grad_norm": 0.0951889380812645, "learning_rate": 2.348769681343713e-05, "loss": 9.6776, "step": 43860 }, { "epoch": 0.21908162500936354, "grad_norm": 0.09509312361478806, "learning_rate": 2.348619489849558e-05, "loss": 9.6906, "step": 43870 }, { "epoch": 0.219131563834303, "grad_norm": 0.09178054332733154, "learning_rate": 2.3484692983554032e-05, "loss": 9.6842, "step": 43880 }, { "epoch": 0.21918150265924244, "grad_norm": 0.09988975524902344, "learning_rate": 2.3483191068612482e-05, "loss": 9.6777, "step": 43890 }, { "epoch": 0.2192314414841819, "grad_norm": 0.09594890475273132, "learning_rate": 2.3481689153670932e-05, "loss": 9.6763, "step": 43900 }, { "epoch": 0.21928138030912134, "grad_norm": 0.09186796098947525, "learning_rate": 2.348018723872938e-05, "loss": 9.6859, "step": 43910 }, { "epoch": 0.2193313191340608, "grad_norm": 0.10070665925741196, "learning_rate": 2.347868532378783e-05, "loss": 9.6894, "step": 43920 }, { "epoch": 0.21938125795900024, "grad_norm": 0.09850089251995087, "learning_rate": 2.347718340884628e-05, "loss": 9.6834, "step": 43930 }, { "epoch": 0.21943119678393969, "grad_norm": 0.09155581891536713, "learning_rate": 2.347568149390473e-05, "loss": 9.6808, "step": 43940 }, { "epoch": 0.21948113560887914, "grad_norm": 0.0951627716422081, "learning_rate": 2.347417957896318e-05, "loss": 9.6904, "step": 43950 }, { "epoch": 0.21953107443381858, "grad_norm": 0.09084520488977432, "learning_rate": 2.3472677664021626e-05, "loss": 9.6849, "step": 43960 }, { "epoch": 0.21958101325875803, "grad_norm": 0.09903666377067566, "learning_rate": 2.347117574908008e-05, "loss": 9.6876, "step": 43970 }, { "epoch": 0.21963095208369748, "grad_norm": 0.09081403911113739, "learning_rate": 2.3469673834138527e-05, "loss": 9.6861, "step": 43980 }, { "epoch": 0.21968089090863693, "grad_norm": 0.09616552293300629, "learning_rate": 2.3468171919196977e-05, "loss": 9.6771, "step": 43990 }, { "epoch": 0.21973082973357638, "grad_norm": 0.09797076880931854, "learning_rate": 2.3466670004255427e-05, "loss": 9.6872, "step": 44000 }, { "epoch": 0.2197807685585158, "grad_norm": 0.09519995003938675, "learning_rate": 2.3465168089313874e-05, "loss": 9.6835, "step": 44010 }, { "epoch": 0.21983070738345525, "grad_norm": 0.0880652442574501, "learning_rate": 2.3463666174372328e-05, "loss": 9.68, "step": 44020 }, { "epoch": 0.2198806462083947, "grad_norm": 0.09858622401952744, "learning_rate": 2.3462164259430774e-05, "loss": 9.688, "step": 44030 }, { "epoch": 0.21993058503333415, "grad_norm": 0.09564345329999924, "learning_rate": 2.3460662344489225e-05, "loss": 9.6826, "step": 44040 }, { "epoch": 0.2199805238582736, "grad_norm": 0.0934138298034668, "learning_rate": 2.3459160429547675e-05, "loss": 9.6803, "step": 44050 }, { "epoch": 0.22003046268321305, "grad_norm": 0.09328287839889526, "learning_rate": 2.345765851460612e-05, "loss": 9.6871, "step": 44060 }, { "epoch": 0.2200804015081525, "grad_norm": 0.09457116574048996, "learning_rate": 2.3456156599664575e-05, "loss": 9.6918, "step": 44070 }, { "epoch": 0.22013034033309195, "grad_norm": 0.10091323405504227, "learning_rate": 2.3454654684723022e-05, "loss": 9.6813, "step": 44080 }, { "epoch": 0.2201802791580314, "grad_norm": 0.09307578206062317, "learning_rate": 2.3453152769781472e-05, "loss": 9.6833, "step": 44090 }, { "epoch": 0.22023021798297085, "grad_norm": 0.09910399466753006, "learning_rate": 2.3451650854839922e-05, "loss": 9.6762, "step": 44100 }, { "epoch": 0.2202801568079103, "grad_norm": 0.10042911767959595, "learning_rate": 2.345014893989837e-05, "loss": 9.6861, "step": 44110 }, { "epoch": 0.22033009563284975, "grad_norm": 0.09451974183320999, "learning_rate": 2.3448647024956823e-05, "loss": 9.6838, "step": 44120 }, { "epoch": 0.2203800344577892, "grad_norm": 0.09194231033325195, "learning_rate": 2.344714511001527e-05, "loss": 9.6873, "step": 44130 }, { "epoch": 0.22042997328272865, "grad_norm": 0.09405980259180069, "learning_rate": 2.344564319507372e-05, "loss": 9.6875, "step": 44140 }, { "epoch": 0.2204799121076681, "grad_norm": 0.09576237946748734, "learning_rate": 2.344414128013217e-05, "loss": 9.6794, "step": 44150 }, { "epoch": 0.22052985093260755, "grad_norm": 0.09872204065322876, "learning_rate": 2.3442639365190616e-05, "loss": 9.6833, "step": 44160 }, { "epoch": 0.220579789757547, "grad_norm": 0.09933184832334518, "learning_rate": 2.344113745024907e-05, "loss": 9.6823, "step": 44170 }, { "epoch": 0.22062972858248645, "grad_norm": 0.09750817716121674, "learning_rate": 2.3439635535307517e-05, "loss": 9.6704, "step": 44180 }, { "epoch": 0.2206796674074259, "grad_norm": 0.09613807499408722, "learning_rate": 2.3438133620365967e-05, "loss": 9.6826, "step": 44190 }, { "epoch": 0.22072960623236534, "grad_norm": 0.10209544748067856, "learning_rate": 2.3436631705424417e-05, "loss": 9.682, "step": 44200 }, { "epoch": 0.2207795450573048, "grad_norm": 0.0971246138215065, "learning_rate": 2.3435129790482864e-05, "loss": 9.6785, "step": 44210 }, { "epoch": 0.22082948388224424, "grad_norm": 0.10060384124517441, "learning_rate": 2.3433627875541318e-05, "loss": 9.6795, "step": 44220 }, { "epoch": 0.2208794227071837, "grad_norm": 0.09796610474586487, "learning_rate": 2.3432125960599764e-05, "loss": 9.6724, "step": 44230 }, { "epoch": 0.22092936153212314, "grad_norm": 0.09403125196695328, "learning_rate": 2.3430624045658215e-05, "loss": 9.6751, "step": 44240 }, { "epoch": 0.2209793003570626, "grad_norm": 0.0945911630988121, "learning_rate": 2.3429122130716665e-05, "loss": 9.6734, "step": 44250 }, { "epoch": 0.22102923918200204, "grad_norm": 0.09785491228103638, "learning_rate": 2.342762021577511e-05, "loss": 9.678, "step": 44260 }, { "epoch": 0.2210791780069415, "grad_norm": 0.09269008040428162, "learning_rate": 2.3426118300833565e-05, "loss": 9.682, "step": 44270 }, { "epoch": 0.22112911683188094, "grad_norm": 0.09193340688943863, "learning_rate": 2.3424616385892012e-05, "loss": 9.6794, "step": 44280 }, { "epoch": 0.2211790556568204, "grad_norm": 0.09782993048429489, "learning_rate": 2.3423114470950462e-05, "loss": 9.6675, "step": 44290 }, { "epoch": 0.22122899448175984, "grad_norm": 0.09436007589101791, "learning_rate": 2.3421612556008912e-05, "loss": 9.6741, "step": 44300 }, { "epoch": 0.2212789333066993, "grad_norm": 0.09482413530349731, "learning_rate": 2.342011064106736e-05, "loss": 9.6703, "step": 44310 }, { "epoch": 0.22132887213163874, "grad_norm": 0.09317035228013992, "learning_rate": 2.3418608726125813e-05, "loss": 9.6756, "step": 44320 }, { "epoch": 0.2213788109565782, "grad_norm": 0.09680955857038498, "learning_rate": 2.341710681118426e-05, "loss": 9.6781, "step": 44330 }, { "epoch": 0.22142874978151764, "grad_norm": 0.09705173224210739, "learning_rate": 2.3415604896242713e-05, "loss": 9.6713, "step": 44340 }, { "epoch": 0.22147868860645709, "grad_norm": 0.09335210174322128, "learning_rate": 2.341410298130116e-05, "loss": 9.6774, "step": 44350 }, { "epoch": 0.22152862743139654, "grad_norm": 0.09018994867801666, "learning_rate": 2.3412601066359607e-05, "loss": 9.673, "step": 44360 }, { "epoch": 0.22157856625633598, "grad_norm": 0.09429071098566055, "learning_rate": 2.341109915141806e-05, "loss": 9.681, "step": 44370 }, { "epoch": 0.22162850508127543, "grad_norm": 0.09190639108419418, "learning_rate": 2.3409597236476507e-05, "loss": 9.6753, "step": 44380 }, { "epoch": 0.22167844390621488, "grad_norm": 0.10482806712388992, "learning_rate": 2.340809532153496e-05, "loss": 9.6804, "step": 44390 }, { "epoch": 0.22172838273115433, "grad_norm": 0.09960668534040451, "learning_rate": 2.3406593406593407e-05, "loss": 9.6752, "step": 44400 }, { "epoch": 0.22177832155609378, "grad_norm": 0.09253636002540588, "learning_rate": 2.3405091491651854e-05, "loss": 9.6777, "step": 44410 }, { "epoch": 0.22182826038103323, "grad_norm": 0.09326852858066559, "learning_rate": 2.3403589576710308e-05, "loss": 9.673, "step": 44420 }, { "epoch": 0.22187819920597268, "grad_norm": 0.0913088470697403, "learning_rate": 2.3402087661768754e-05, "loss": 9.6732, "step": 44430 }, { "epoch": 0.22192813803091213, "grad_norm": 0.0977054163813591, "learning_rate": 2.3400585746827208e-05, "loss": 9.671, "step": 44440 }, { "epoch": 0.22197807685585158, "grad_norm": 0.09495818614959717, "learning_rate": 2.3399083831885655e-05, "loss": 9.6742, "step": 44450 }, { "epoch": 0.22202801568079103, "grad_norm": 0.09410752356052399, "learning_rate": 2.33975819169441e-05, "loss": 9.6658, "step": 44460 }, { "epoch": 0.22207795450573048, "grad_norm": 0.09136299043893814, "learning_rate": 2.3396080002002555e-05, "loss": 9.6762, "step": 44470 }, { "epoch": 0.22212789333066993, "grad_norm": 0.09776278585195541, "learning_rate": 2.3394578087061002e-05, "loss": 9.6718, "step": 44480 }, { "epoch": 0.22217783215560938, "grad_norm": 0.09050683677196503, "learning_rate": 2.3393076172119455e-05, "loss": 9.6827, "step": 44490 }, { "epoch": 0.22222777098054883, "grad_norm": 0.09264349937438965, "learning_rate": 2.3391574257177902e-05, "loss": 9.6754, "step": 44500 }, { "epoch": 0.22227770980548828, "grad_norm": 0.09283478558063507, "learning_rate": 2.339007234223635e-05, "loss": 9.6718, "step": 44510 }, { "epoch": 0.22232764863042773, "grad_norm": 0.09404191374778748, "learning_rate": 2.3388570427294803e-05, "loss": 9.6742, "step": 44520 }, { "epoch": 0.22237758745536718, "grad_norm": 0.09505549818277359, "learning_rate": 2.338706851235325e-05, "loss": 9.6828, "step": 44530 }, { "epoch": 0.22242752628030663, "grad_norm": 0.09772829711437225, "learning_rate": 2.3385566597411703e-05, "loss": 9.6717, "step": 44540 }, { "epoch": 0.22247746510524607, "grad_norm": 0.09645674377679825, "learning_rate": 2.338406468247015e-05, "loss": 9.6705, "step": 44550 }, { "epoch": 0.22252740393018552, "grad_norm": 0.0913274735212326, "learning_rate": 2.3382562767528597e-05, "loss": 9.6726, "step": 44560 }, { "epoch": 0.22257734275512497, "grad_norm": 0.09277801215648651, "learning_rate": 2.338106085258705e-05, "loss": 9.6706, "step": 44570 }, { "epoch": 0.22262728158006442, "grad_norm": 0.09037318825721741, "learning_rate": 2.3379558937645497e-05, "loss": 9.6727, "step": 44580 }, { "epoch": 0.22267722040500387, "grad_norm": 0.09477484226226807, "learning_rate": 2.337805702270395e-05, "loss": 9.6708, "step": 44590 }, { "epoch": 0.22272715922994332, "grad_norm": 0.09787924587726593, "learning_rate": 2.3376555107762397e-05, "loss": 9.6842, "step": 44600 }, { "epoch": 0.22277709805488277, "grad_norm": 0.09797964245080948, "learning_rate": 2.3375053192820844e-05, "loss": 9.6674, "step": 44610 }, { "epoch": 0.22282703687982222, "grad_norm": 0.09463818371295929, "learning_rate": 2.3373551277879298e-05, "loss": 9.6736, "step": 44620 }, { "epoch": 0.22287697570476167, "grad_norm": 0.09300590306520462, "learning_rate": 2.3372049362937744e-05, "loss": 9.6651, "step": 44630 }, { "epoch": 0.22292691452970112, "grad_norm": 0.09273765981197357, "learning_rate": 2.3370547447996198e-05, "loss": 9.6731, "step": 44640 }, { "epoch": 0.22297685335464057, "grad_norm": 0.09590218961238861, "learning_rate": 2.3369045533054645e-05, "loss": 9.6727, "step": 44650 }, { "epoch": 0.22302679217958002, "grad_norm": 0.09408383816480637, "learning_rate": 2.3367543618113095e-05, "loss": 9.6691, "step": 44660 }, { "epoch": 0.22307673100451947, "grad_norm": 0.101545050740242, "learning_rate": 2.3366041703171545e-05, "loss": 9.6705, "step": 44670 }, { "epoch": 0.22312666982945892, "grad_norm": 0.0954471305012703, "learning_rate": 2.3364539788229992e-05, "loss": 9.6699, "step": 44680 }, { "epoch": 0.22317660865439837, "grad_norm": 0.0932590514421463, "learning_rate": 2.3363037873288445e-05, "loss": 9.669, "step": 44690 }, { "epoch": 0.22322654747933782, "grad_norm": 0.09442409873008728, "learning_rate": 2.3361535958346892e-05, "loss": 9.6754, "step": 44700 }, { "epoch": 0.22327648630427727, "grad_norm": 0.09490916877985, "learning_rate": 2.3360034043405342e-05, "loss": 9.6672, "step": 44710 }, { "epoch": 0.22332642512921672, "grad_norm": 0.0963781476020813, "learning_rate": 2.3358532128463793e-05, "loss": 9.6703, "step": 44720 }, { "epoch": 0.22337636395415617, "grad_norm": 0.09573322534561157, "learning_rate": 2.335703021352224e-05, "loss": 9.6784, "step": 44730 }, { "epoch": 0.22342630277909561, "grad_norm": 0.09953943639993668, "learning_rate": 2.3355528298580693e-05, "loss": 9.6667, "step": 44740 }, { "epoch": 0.22347624160403506, "grad_norm": 0.0922081395983696, "learning_rate": 2.335402638363914e-05, "loss": 9.6731, "step": 44750 }, { "epoch": 0.2235261804289745, "grad_norm": 0.09071052074432373, "learning_rate": 2.335252446869759e-05, "loss": 9.6642, "step": 44760 }, { "epoch": 0.22357611925391396, "grad_norm": 0.09215296804904938, "learning_rate": 2.335102255375604e-05, "loss": 9.667, "step": 44770 }, { "epoch": 0.2236260580788534, "grad_norm": 0.10084545612335205, "learning_rate": 2.3349520638814487e-05, "loss": 9.6648, "step": 44780 }, { "epoch": 0.22367599690379286, "grad_norm": 0.09967309981584549, "learning_rate": 2.334801872387294e-05, "loss": 9.659, "step": 44790 }, { "epoch": 0.2237259357287323, "grad_norm": 0.09629634022712708, "learning_rate": 2.3346516808931387e-05, "loss": 9.6709, "step": 44800 }, { "epoch": 0.22377587455367176, "grad_norm": 0.09285489469766617, "learning_rate": 2.3345014893989837e-05, "loss": 9.6672, "step": 44810 }, { "epoch": 0.2238258133786112, "grad_norm": 0.09779229015111923, "learning_rate": 2.3343512979048288e-05, "loss": 9.6669, "step": 44820 }, { "epoch": 0.22387575220355066, "grad_norm": 0.09266447275876999, "learning_rate": 2.3342011064106734e-05, "loss": 9.6644, "step": 44830 }, { "epoch": 0.2239256910284901, "grad_norm": 0.09615839272737503, "learning_rate": 2.3340509149165188e-05, "loss": 9.6617, "step": 44840 }, { "epoch": 0.22397562985342956, "grad_norm": 0.09608431905508041, "learning_rate": 2.3339007234223635e-05, "loss": 9.663, "step": 44850 }, { "epoch": 0.224025568678369, "grad_norm": 0.09576740115880966, "learning_rate": 2.3337505319282085e-05, "loss": 9.6596, "step": 44860 }, { "epoch": 0.22407550750330846, "grad_norm": 0.09225677698850632, "learning_rate": 2.3336003404340535e-05, "loss": 9.6597, "step": 44870 }, { "epoch": 0.2241254463282479, "grad_norm": 0.09069118648767471, "learning_rate": 2.3334501489398982e-05, "loss": 9.6613, "step": 44880 }, { "epoch": 0.22417538515318736, "grad_norm": 0.10063677281141281, "learning_rate": 2.3332999574457435e-05, "loss": 9.6712, "step": 44890 }, { "epoch": 0.2242253239781268, "grad_norm": 0.0957692414522171, "learning_rate": 2.3331497659515882e-05, "loss": 9.6627, "step": 44900 }, { "epoch": 0.22427526280306626, "grad_norm": 0.0920589417219162, "learning_rate": 2.3329995744574332e-05, "loss": 9.6683, "step": 44910 }, { "epoch": 0.2243252016280057, "grad_norm": 0.09941014647483826, "learning_rate": 2.3328493829632783e-05, "loss": 9.6632, "step": 44920 }, { "epoch": 0.22437514045294515, "grad_norm": 0.09040158987045288, "learning_rate": 2.332699191469123e-05, "loss": 9.6632, "step": 44930 }, { "epoch": 0.2244250792778846, "grad_norm": 0.09432369470596313, "learning_rate": 2.3325489999749683e-05, "loss": 9.6624, "step": 44940 }, { "epoch": 0.22447501810282405, "grad_norm": 0.09329992532730103, "learning_rate": 2.332398808480813e-05, "loss": 9.6735, "step": 44950 }, { "epoch": 0.2245249569277635, "grad_norm": 0.09206376224756241, "learning_rate": 2.332248616986658e-05, "loss": 9.6687, "step": 44960 }, { "epoch": 0.22457489575270295, "grad_norm": 0.09406663477420807, "learning_rate": 2.332098425492503e-05, "loss": 9.6687, "step": 44970 }, { "epoch": 0.2246248345776424, "grad_norm": 0.09512316435575485, "learning_rate": 2.331948233998348e-05, "loss": 9.6635, "step": 44980 }, { "epoch": 0.22467477340258185, "grad_norm": 0.09398701786994934, "learning_rate": 2.331798042504193e-05, "loss": 9.6649, "step": 44990 }, { "epoch": 0.22472471222752127, "grad_norm": 0.09211496263742447, "learning_rate": 2.3316478510100377e-05, "loss": 9.6654, "step": 45000 }, { "epoch": 0.22477465105246072, "grad_norm": 0.09733974188566208, "learning_rate": 2.3314976595158827e-05, "loss": 9.6564, "step": 45010 }, { "epoch": 0.22482458987740017, "grad_norm": 0.09194665402173996, "learning_rate": 2.3313474680217278e-05, "loss": 9.6596, "step": 45020 }, { "epoch": 0.22487452870233962, "grad_norm": 0.08959387242794037, "learning_rate": 2.3311972765275728e-05, "loss": 9.6623, "step": 45030 }, { "epoch": 0.22492446752727907, "grad_norm": 0.09784001857042313, "learning_rate": 2.3310470850334178e-05, "loss": 9.6599, "step": 45040 }, { "epoch": 0.22497440635221852, "grad_norm": 0.09329735487699509, "learning_rate": 2.3308968935392625e-05, "loss": 9.6656, "step": 45050 }, { "epoch": 0.22502434517715797, "grad_norm": 0.08951118588447571, "learning_rate": 2.3307467020451075e-05, "loss": 9.6618, "step": 45060 }, { "epoch": 0.22507428400209742, "grad_norm": 0.09492070972919464, "learning_rate": 2.3305965105509525e-05, "loss": 9.6589, "step": 45070 }, { "epoch": 0.22512422282703687, "grad_norm": 0.09987737983465195, "learning_rate": 2.3304463190567975e-05, "loss": 9.6615, "step": 45080 }, { "epoch": 0.22517416165197632, "grad_norm": 0.10113034397363663, "learning_rate": 2.3302961275626425e-05, "loss": 9.6608, "step": 45090 }, { "epoch": 0.22522410047691577, "grad_norm": 0.09234467148780823, "learning_rate": 2.3301459360684872e-05, "loss": 9.669, "step": 45100 }, { "epoch": 0.22527403930185522, "grad_norm": 0.09484550356864929, "learning_rate": 2.3299957445743322e-05, "loss": 9.6537, "step": 45110 }, { "epoch": 0.22532397812679467, "grad_norm": 0.09227019548416138, "learning_rate": 2.3298455530801773e-05, "loss": 9.6642, "step": 45120 }, { "epoch": 0.22537391695173412, "grad_norm": 0.0964643582701683, "learning_rate": 2.3296953615860223e-05, "loss": 9.6621, "step": 45130 }, { "epoch": 0.22542385577667357, "grad_norm": 0.09387196600437164, "learning_rate": 2.3295451700918673e-05, "loss": 9.6668, "step": 45140 }, { "epoch": 0.22547379460161301, "grad_norm": 0.08830858767032623, "learning_rate": 2.329394978597712e-05, "loss": 9.6616, "step": 45150 }, { "epoch": 0.22552373342655246, "grad_norm": 0.09885809570550919, "learning_rate": 2.329244787103557e-05, "loss": 9.6554, "step": 45160 }, { "epoch": 0.2255736722514919, "grad_norm": 0.09711761027574539, "learning_rate": 2.329094595609402e-05, "loss": 9.6619, "step": 45170 }, { "epoch": 0.22562361107643136, "grad_norm": 0.0906030535697937, "learning_rate": 2.328944404115247e-05, "loss": 9.661, "step": 45180 }, { "epoch": 0.2256735499013708, "grad_norm": 0.09631696343421936, "learning_rate": 2.328794212621092e-05, "loss": 9.6565, "step": 45190 }, { "epoch": 0.22572348872631026, "grad_norm": 0.09392791241407394, "learning_rate": 2.3286440211269367e-05, "loss": 9.6611, "step": 45200 }, { "epoch": 0.2257734275512497, "grad_norm": 0.10017602145671844, "learning_rate": 2.3284938296327817e-05, "loss": 9.6605, "step": 45210 }, { "epoch": 0.22582336637618916, "grad_norm": 0.09598571807146072, "learning_rate": 2.3283436381386268e-05, "loss": 9.664, "step": 45220 }, { "epoch": 0.2258733052011286, "grad_norm": 0.09706757217645645, "learning_rate": 2.3281934466444718e-05, "loss": 9.6531, "step": 45230 }, { "epoch": 0.22592324402606806, "grad_norm": 0.09351912885904312, "learning_rate": 2.3280432551503168e-05, "loss": 9.6569, "step": 45240 }, { "epoch": 0.2259731828510075, "grad_norm": 0.08977209031581879, "learning_rate": 2.3278930636561615e-05, "loss": 9.6535, "step": 45250 }, { "epoch": 0.22602312167594696, "grad_norm": 0.09070810675621033, "learning_rate": 2.3277428721620065e-05, "loss": 9.6611, "step": 45260 }, { "epoch": 0.2260730605008864, "grad_norm": 0.09817781299352646, "learning_rate": 2.3275926806678515e-05, "loss": 9.654, "step": 45270 }, { "epoch": 0.22612299932582586, "grad_norm": 0.09307067096233368, "learning_rate": 2.3274424891736965e-05, "loss": 9.6607, "step": 45280 }, { "epoch": 0.2261729381507653, "grad_norm": 0.09242681413888931, "learning_rate": 2.3272922976795415e-05, "loss": 9.6639, "step": 45290 }, { "epoch": 0.22622287697570476, "grad_norm": 0.09181907773017883, "learning_rate": 2.3271421061853866e-05, "loss": 9.653, "step": 45300 }, { "epoch": 0.2262728158006442, "grad_norm": 0.09517987072467804, "learning_rate": 2.3269919146912312e-05, "loss": 9.6602, "step": 45310 }, { "epoch": 0.22632275462558366, "grad_norm": 0.0962613895535469, "learning_rate": 2.3268417231970763e-05, "loss": 9.6629, "step": 45320 }, { "epoch": 0.2263726934505231, "grad_norm": 0.09088754653930664, "learning_rate": 2.3266915317029213e-05, "loss": 9.6521, "step": 45330 }, { "epoch": 0.22642263227546255, "grad_norm": 0.09162914752960205, "learning_rate": 2.3265413402087663e-05, "loss": 9.6579, "step": 45340 }, { "epoch": 0.226472571100402, "grad_norm": 0.09387911111116409, "learning_rate": 2.3263911487146113e-05, "loss": 9.6558, "step": 45350 }, { "epoch": 0.22652250992534145, "grad_norm": 0.08971641957759857, "learning_rate": 2.326240957220456e-05, "loss": 9.6652, "step": 45360 }, { "epoch": 0.2265724487502809, "grad_norm": 0.090997114777565, "learning_rate": 2.326090765726301e-05, "loss": 9.6571, "step": 45370 }, { "epoch": 0.22662238757522035, "grad_norm": 0.09356195479631424, "learning_rate": 2.325940574232146e-05, "loss": 9.6564, "step": 45380 }, { "epoch": 0.2266723264001598, "grad_norm": 0.09340345114469528, "learning_rate": 2.325790382737991e-05, "loss": 9.6616, "step": 45390 }, { "epoch": 0.22672226522509925, "grad_norm": 0.09831879287958145, "learning_rate": 2.325640191243836e-05, "loss": 9.6553, "step": 45400 }, { "epoch": 0.2267722040500387, "grad_norm": 0.0913265198469162, "learning_rate": 2.3254899997496807e-05, "loss": 9.6529, "step": 45410 }, { "epoch": 0.22682214287497815, "grad_norm": 0.09678717702627182, "learning_rate": 2.3253398082555258e-05, "loss": 9.657, "step": 45420 }, { "epoch": 0.2268720816999176, "grad_norm": 0.0980067104101181, "learning_rate": 2.3251896167613708e-05, "loss": 9.6614, "step": 45430 }, { "epoch": 0.22692202052485705, "grad_norm": 0.09428383409976959, "learning_rate": 2.3250394252672158e-05, "loss": 9.6596, "step": 45440 }, { "epoch": 0.2269719593497965, "grad_norm": 0.09209676086902618, "learning_rate": 2.3248892337730608e-05, "loss": 9.6553, "step": 45450 }, { "epoch": 0.22702189817473595, "grad_norm": 0.09065406024456024, "learning_rate": 2.3247390422789055e-05, "loss": 9.6595, "step": 45460 }, { "epoch": 0.2270718369996754, "grad_norm": 0.0918533131480217, "learning_rate": 2.3245888507847505e-05, "loss": 9.6571, "step": 45470 }, { "epoch": 0.22712177582461485, "grad_norm": 0.0916934683918953, "learning_rate": 2.3244386592905955e-05, "loss": 9.667, "step": 45480 }, { "epoch": 0.2271717146495543, "grad_norm": 0.09149292856454849, "learning_rate": 2.3242884677964406e-05, "loss": 9.6524, "step": 45490 }, { "epoch": 0.22722165347449375, "grad_norm": 0.09491834789514542, "learning_rate": 2.3241382763022856e-05, "loss": 9.6487, "step": 45500 }, { "epoch": 0.2272715922994332, "grad_norm": 0.09356304258108139, "learning_rate": 2.3239880848081302e-05, "loss": 9.6529, "step": 45510 }, { "epoch": 0.22732153112437264, "grad_norm": 0.09925433993339539, "learning_rate": 2.3238378933139753e-05, "loss": 9.6515, "step": 45520 }, { "epoch": 0.2273714699493121, "grad_norm": 0.09688141942024231, "learning_rate": 2.3236877018198203e-05, "loss": 9.656, "step": 45530 }, { "epoch": 0.22742140877425154, "grad_norm": 0.08831819891929626, "learning_rate": 2.3235375103256653e-05, "loss": 9.6533, "step": 45540 }, { "epoch": 0.227471347599191, "grad_norm": 0.0944901704788208, "learning_rate": 2.3233873188315103e-05, "loss": 9.657, "step": 45550 }, { "epoch": 0.22752128642413044, "grad_norm": 0.0912630707025528, "learning_rate": 2.323237127337355e-05, "loss": 9.6442, "step": 45560 }, { "epoch": 0.2275712252490699, "grad_norm": 0.09489890933036804, "learning_rate": 2.3230869358432e-05, "loss": 9.6535, "step": 45570 }, { "epoch": 0.22762116407400934, "grad_norm": 0.09343596547842026, "learning_rate": 2.322936744349045e-05, "loss": 9.6522, "step": 45580 }, { "epoch": 0.2276711028989488, "grad_norm": 0.0928681418299675, "learning_rate": 2.32278655285489e-05, "loss": 9.6557, "step": 45590 }, { "epoch": 0.22772104172388824, "grad_norm": 0.09620653092861176, "learning_rate": 2.322636361360735e-05, "loss": 9.6563, "step": 45600 }, { "epoch": 0.2277709805488277, "grad_norm": 0.09333667159080505, "learning_rate": 2.3224861698665797e-05, "loss": 9.6608, "step": 45610 }, { "epoch": 0.22782091937376714, "grad_norm": 0.0948384627699852, "learning_rate": 2.322335978372425e-05, "loss": 9.6551, "step": 45620 }, { "epoch": 0.2278708581987066, "grad_norm": 0.09374862164258957, "learning_rate": 2.3221857868782698e-05, "loss": 9.6495, "step": 45630 }, { "epoch": 0.22792079702364604, "grad_norm": 0.09943927079439163, "learning_rate": 2.3220355953841148e-05, "loss": 9.657, "step": 45640 }, { "epoch": 0.2279707358485855, "grad_norm": 0.09763931483030319, "learning_rate": 2.3218854038899598e-05, "loss": 9.6523, "step": 45650 }, { "epoch": 0.22802067467352494, "grad_norm": 0.09670069813728333, "learning_rate": 2.3217352123958045e-05, "loss": 9.652, "step": 45660 }, { "epoch": 0.2280706134984644, "grad_norm": 0.09232018142938614, "learning_rate": 2.32158502090165e-05, "loss": 9.657, "step": 45670 }, { "epoch": 0.22812055232340384, "grad_norm": 0.09801393002271652, "learning_rate": 2.3214348294074945e-05, "loss": 9.6503, "step": 45680 }, { "epoch": 0.22817049114834329, "grad_norm": 0.09959478676319122, "learning_rate": 2.3212846379133396e-05, "loss": 9.6495, "step": 45690 }, { "epoch": 0.22822042997328273, "grad_norm": 0.09530535340309143, "learning_rate": 2.3211344464191846e-05, "loss": 9.6492, "step": 45700 }, { "epoch": 0.22827036879822218, "grad_norm": 0.08995451778173447, "learning_rate": 2.3209842549250292e-05, "loss": 9.6496, "step": 45710 }, { "epoch": 0.22832030762316163, "grad_norm": 0.09303654730319977, "learning_rate": 2.3208340634308746e-05, "loss": 9.6446, "step": 45720 }, { "epoch": 0.22837024644810108, "grad_norm": 0.09486031532287598, "learning_rate": 2.3206838719367193e-05, "loss": 9.6531, "step": 45730 }, { "epoch": 0.22842018527304053, "grad_norm": 0.0937868058681488, "learning_rate": 2.3205336804425643e-05, "loss": 9.65, "step": 45740 }, { "epoch": 0.22847012409797998, "grad_norm": 0.09792537987232208, "learning_rate": 2.3203834889484093e-05, "loss": 9.651, "step": 45750 }, { "epoch": 0.22852006292291943, "grad_norm": 0.09761232137680054, "learning_rate": 2.320233297454254e-05, "loss": 9.6487, "step": 45760 }, { "epoch": 0.22857000174785888, "grad_norm": 0.09970896691083908, "learning_rate": 2.3200831059600994e-05, "loss": 9.6473, "step": 45770 }, { "epoch": 0.22861994057279833, "grad_norm": 0.096064992249012, "learning_rate": 2.319932914465944e-05, "loss": 9.6548, "step": 45780 }, { "epoch": 0.22866987939773778, "grad_norm": 0.09543237090110779, "learning_rate": 2.319782722971789e-05, "loss": 9.6464, "step": 45790 }, { "epoch": 0.22871981822267723, "grad_norm": 0.09486628323793411, "learning_rate": 2.319632531477634e-05, "loss": 9.6529, "step": 45800 }, { "epoch": 0.22876975704761668, "grad_norm": 0.09722953289747238, "learning_rate": 2.3194823399834788e-05, "loss": 9.6543, "step": 45810 }, { "epoch": 0.22881969587255613, "grad_norm": 0.09415905922651291, "learning_rate": 2.319332148489324e-05, "loss": 9.6438, "step": 45820 }, { "epoch": 0.22886963469749558, "grad_norm": 0.08979658782482147, "learning_rate": 2.3191819569951688e-05, "loss": 9.653, "step": 45830 }, { "epoch": 0.22891957352243503, "grad_norm": 0.09261351078748703, "learning_rate": 2.3190317655010138e-05, "loss": 9.6534, "step": 45840 }, { "epoch": 0.22896951234737448, "grad_norm": 0.09331527352333069, "learning_rate": 2.3188815740068588e-05, "loss": 9.6481, "step": 45850 }, { "epoch": 0.22901945117231393, "grad_norm": 0.09803377836942673, "learning_rate": 2.3187313825127035e-05, "loss": 9.6467, "step": 45860 }, { "epoch": 0.22906938999725338, "grad_norm": 0.09804876148700714, "learning_rate": 2.318581191018549e-05, "loss": 9.6431, "step": 45870 }, { "epoch": 0.22911932882219282, "grad_norm": 0.09359174221754074, "learning_rate": 2.3184309995243935e-05, "loss": 9.6487, "step": 45880 }, { "epoch": 0.22916926764713227, "grad_norm": 0.09482156485319138, "learning_rate": 2.3182808080302386e-05, "loss": 9.6482, "step": 45890 }, { "epoch": 0.22921920647207172, "grad_norm": 0.09959374368190765, "learning_rate": 2.3181306165360836e-05, "loss": 9.6447, "step": 45900 }, { "epoch": 0.22926914529701117, "grad_norm": 0.09299067407846451, "learning_rate": 2.3179804250419283e-05, "loss": 9.6411, "step": 45910 }, { "epoch": 0.22931908412195062, "grad_norm": 0.0913899689912796, "learning_rate": 2.3178302335477736e-05, "loss": 9.6476, "step": 45920 }, { "epoch": 0.22936902294689007, "grad_norm": 0.09546446055173874, "learning_rate": 2.3176800420536183e-05, "loss": 9.6434, "step": 45930 }, { "epoch": 0.22941896177182952, "grad_norm": 0.09306006133556366, "learning_rate": 2.3175298505594636e-05, "loss": 9.6393, "step": 45940 }, { "epoch": 0.22946890059676897, "grad_norm": 0.09446984529495239, "learning_rate": 2.3173796590653083e-05, "loss": 9.6481, "step": 45950 }, { "epoch": 0.22951883942170842, "grad_norm": 0.09467601031064987, "learning_rate": 2.317229467571153e-05, "loss": 9.6485, "step": 45960 }, { "epoch": 0.22956877824664787, "grad_norm": 0.09725523740053177, "learning_rate": 2.3170792760769984e-05, "loss": 9.6504, "step": 45970 }, { "epoch": 0.22961871707158732, "grad_norm": 0.09193247556686401, "learning_rate": 2.316929084582843e-05, "loss": 9.6495, "step": 45980 }, { "epoch": 0.22966865589652674, "grad_norm": 0.09462161362171173, "learning_rate": 2.3167788930886884e-05, "loss": 9.6387, "step": 45990 }, { "epoch": 0.2297185947214662, "grad_norm": 0.09702228009700775, "learning_rate": 2.316628701594533e-05, "loss": 9.6417, "step": 46000 }, { "epoch": 0.22976853354640564, "grad_norm": 0.09509331732988358, "learning_rate": 2.316478510100378e-05, "loss": 9.6414, "step": 46010 }, { "epoch": 0.2298184723713451, "grad_norm": 0.09766579419374466, "learning_rate": 2.316328318606223e-05, "loss": 9.6406, "step": 46020 }, { "epoch": 0.22986841119628454, "grad_norm": 0.10346722602844238, "learning_rate": 2.3161781271120678e-05, "loss": 9.6464, "step": 46030 }, { "epoch": 0.229918350021224, "grad_norm": 0.09228979051113129, "learning_rate": 2.316027935617913e-05, "loss": 9.6473, "step": 46040 }, { "epoch": 0.22996828884616344, "grad_norm": 0.10133613646030426, "learning_rate": 2.3158777441237578e-05, "loss": 9.6507, "step": 46050 }, { "epoch": 0.2300182276711029, "grad_norm": 0.09652470797300339, "learning_rate": 2.315727552629603e-05, "loss": 9.6408, "step": 46060 }, { "epoch": 0.23006816649604234, "grad_norm": 0.09428387135267258, "learning_rate": 2.315577361135448e-05, "loss": 9.6406, "step": 46070 }, { "epoch": 0.2301181053209818, "grad_norm": 0.09347372502088547, "learning_rate": 2.3154271696412925e-05, "loss": 9.6449, "step": 46080 }, { "epoch": 0.23016804414592124, "grad_norm": 0.09821302443742752, "learning_rate": 2.315276978147138e-05, "loss": 9.6412, "step": 46090 }, { "epoch": 0.23021798297086069, "grad_norm": 0.09411518275737762, "learning_rate": 2.3151267866529826e-05, "loss": 9.6504, "step": 46100 }, { "epoch": 0.23026792179580013, "grad_norm": 0.09326085448265076, "learning_rate": 2.3149765951588276e-05, "loss": 9.6424, "step": 46110 }, { "epoch": 0.23031786062073958, "grad_norm": 0.09685725718736649, "learning_rate": 2.3148264036646726e-05, "loss": 9.6439, "step": 46120 }, { "epoch": 0.23036779944567903, "grad_norm": 0.09456616640090942, "learning_rate": 2.3146762121705173e-05, "loss": 9.649, "step": 46130 }, { "epoch": 0.23041773827061848, "grad_norm": 0.08801116794347763, "learning_rate": 2.3145260206763626e-05, "loss": 9.6457, "step": 46140 }, { "epoch": 0.23046767709555793, "grad_norm": 0.09126181155443192, "learning_rate": 2.3143758291822073e-05, "loss": 9.6452, "step": 46150 }, { "epoch": 0.23051761592049738, "grad_norm": 0.0932285413146019, "learning_rate": 2.3142256376880523e-05, "loss": 9.6401, "step": 46160 }, { "epoch": 0.23056755474543683, "grad_norm": 0.10316822677850723, "learning_rate": 2.3140754461938974e-05, "loss": 9.6444, "step": 46170 }, { "epoch": 0.23061749357037628, "grad_norm": 0.0956098735332489, "learning_rate": 2.313925254699742e-05, "loss": 9.6408, "step": 46180 }, { "epoch": 0.23066743239531573, "grad_norm": 0.09577090293169022, "learning_rate": 2.3137750632055874e-05, "loss": 9.6406, "step": 46190 }, { "epoch": 0.23071737122025518, "grad_norm": 0.09496666491031647, "learning_rate": 2.313624871711432e-05, "loss": 9.6419, "step": 46200 }, { "epoch": 0.23076731004519463, "grad_norm": 0.09271476417779922, "learning_rate": 2.313474680217277e-05, "loss": 9.6484, "step": 46210 }, { "epoch": 0.23081724887013408, "grad_norm": 0.09295427054166794, "learning_rate": 2.313324488723122e-05, "loss": 9.6385, "step": 46220 }, { "epoch": 0.23086718769507353, "grad_norm": 0.09634546935558319, "learning_rate": 2.3131742972289668e-05, "loss": 9.6381, "step": 46230 }, { "epoch": 0.23091712652001298, "grad_norm": 0.09890441596508026, "learning_rate": 2.313024105734812e-05, "loss": 9.6422, "step": 46240 }, { "epoch": 0.23096706534495243, "grad_norm": 0.0954676941037178, "learning_rate": 2.3128739142406568e-05, "loss": 9.6382, "step": 46250 }, { "epoch": 0.23101700416989188, "grad_norm": 0.09203564375638962, "learning_rate": 2.312723722746502e-05, "loss": 9.6359, "step": 46260 }, { "epoch": 0.23106694299483133, "grad_norm": 0.09297526627779007, "learning_rate": 2.312573531252347e-05, "loss": 9.6386, "step": 46270 }, { "epoch": 0.23111688181977078, "grad_norm": 0.09271712601184845, "learning_rate": 2.3124233397581915e-05, "loss": 9.6389, "step": 46280 }, { "epoch": 0.23116682064471022, "grad_norm": 0.09680858999490738, "learning_rate": 2.312273148264037e-05, "loss": 9.6354, "step": 46290 }, { "epoch": 0.23121675946964967, "grad_norm": 0.09343792498111725, "learning_rate": 2.3121229567698816e-05, "loss": 9.6396, "step": 46300 }, { "epoch": 0.23126669829458912, "grad_norm": 0.10061474144458771, "learning_rate": 2.311972765275727e-05, "loss": 9.6373, "step": 46310 }, { "epoch": 0.23131663711952857, "grad_norm": 0.09283909946680069, "learning_rate": 2.3118225737815716e-05, "loss": 9.6334, "step": 46320 }, { "epoch": 0.23136657594446802, "grad_norm": 0.09406852722167969, "learning_rate": 2.3116723822874163e-05, "loss": 9.6364, "step": 46330 }, { "epoch": 0.23141651476940747, "grad_norm": 0.0919579491019249, "learning_rate": 2.3115221907932616e-05, "loss": 9.6377, "step": 46340 }, { "epoch": 0.23146645359434692, "grad_norm": 0.09919621050357819, "learning_rate": 2.3113719992991063e-05, "loss": 9.6411, "step": 46350 }, { "epoch": 0.23151639241928637, "grad_norm": 0.09455198049545288, "learning_rate": 2.3112218078049517e-05, "loss": 9.6333, "step": 46360 }, { "epoch": 0.23156633124422582, "grad_norm": 0.09368839114904404, "learning_rate": 2.3110716163107964e-05, "loss": 9.6295, "step": 46370 }, { "epoch": 0.23161627006916527, "grad_norm": 0.09375040233135223, "learning_rate": 2.310921424816641e-05, "loss": 9.6388, "step": 46380 }, { "epoch": 0.23166620889410472, "grad_norm": 0.09624538570642471, "learning_rate": 2.3107712333224864e-05, "loss": 9.6401, "step": 46390 }, { "epoch": 0.23171614771904417, "grad_norm": 0.09258920699357986, "learning_rate": 2.310621041828331e-05, "loss": 9.6337, "step": 46400 }, { "epoch": 0.23176608654398362, "grad_norm": 0.09907475113868713, "learning_rate": 2.3104708503341764e-05, "loss": 9.632, "step": 46410 }, { "epoch": 0.23181602536892307, "grad_norm": 0.094297394156456, "learning_rate": 2.310320658840021e-05, "loss": 9.6353, "step": 46420 }, { "epoch": 0.23186596419386252, "grad_norm": 0.09265825897455215, "learning_rate": 2.3101704673458658e-05, "loss": 9.6436, "step": 46430 }, { "epoch": 0.23191590301880197, "grad_norm": 0.09331150352954865, "learning_rate": 2.310020275851711e-05, "loss": 9.6316, "step": 46440 }, { "epoch": 0.23196584184374142, "grad_norm": 0.0924522653222084, "learning_rate": 2.3098700843575558e-05, "loss": 9.6313, "step": 46450 }, { "epoch": 0.23201578066868087, "grad_norm": 0.09283218532800674, "learning_rate": 2.3097198928634012e-05, "loss": 9.6353, "step": 46460 }, { "epoch": 0.23206571949362031, "grad_norm": 0.09375794231891632, "learning_rate": 2.309569701369246e-05, "loss": 9.6385, "step": 46470 }, { "epoch": 0.23211565831855976, "grad_norm": 0.0972442626953125, "learning_rate": 2.3094195098750905e-05, "loss": 9.6413, "step": 46480 }, { "epoch": 0.2321655971434992, "grad_norm": 0.09976401925086975, "learning_rate": 2.309269318380936e-05, "loss": 9.6418, "step": 46490 }, { "epoch": 0.23221553596843866, "grad_norm": 0.09310724586248398, "learning_rate": 2.3091191268867806e-05, "loss": 9.642, "step": 46500 }, { "epoch": 0.2322654747933781, "grad_norm": 0.09604420512914658, "learning_rate": 2.308968935392626e-05, "loss": 9.6406, "step": 46510 }, { "epoch": 0.23231541361831756, "grad_norm": 0.09983832389116287, "learning_rate": 2.3088187438984706e-05, "loss": 9.6365, "step": 46520 }, { "epoch": 0.232365352443257, "grad_norm": 0.09731082618236542, "learning_rate": 2.3086685524043153e-05, "loss": 9.6396, "step": 46530 }, { "epoch": 0.23241529126819646, "grad_norm": 0.09260038286447525, "learning_rate": 2.3085183609101606e-05, "loss": 9.6343, "step": 46540 }, { "epoch": 0.2324652300931359, "grad_norm": 0.09418473392724991, "learning_rate": 2.3083681694160053e-05, "loss": 9.6387, "step": 46550 }, { "epoch": 0.23251516891807536, "grad_norm": 0.09961055219173431, "learning_rate": 2.3082179779218507e-05, "loss": 9.6389, "step": 46560 }, { "epoch": 0.2325651077430148, "grad_norm": 0.0885944813489914, "learning_rate": 2.3080677864276954e-05, "loss": 9.6307, "step": 46570 }, { "epoch": 0.23261504656795426, "grad_norm": 0.0929674431681633, "learning_rate": 2.3079175949335404e-05, "loss": 9.6414, "step": 46580 }, { "epoch": 0.2326649853928937, "grad_norm": 0.09185917675495148, "learning_rate": 2.3077674034393854e-05, "loss": 9.6348, "step": 46590 }, { "epoch": 0.23271492421783316, "grad_norm": 0.09937243163585663, "learning_rate": 2.30761721194523e-05, "loss": 9.6294, "step": 46600 }, { "epoch": 0.2327648630427726, "grad_norm": 0.09624093770980835, "learning_rate": 2.3074670204510754e-05, "loss": 9.6273, "step": 46610 }, { "epoch": 0.23281480186771206, "grad_norm": 0.09259884804487228, "learning_rate": 2.30731682895692e-05, "loss": 9.6259, "step": 46620 }, { "epoch": 0.2328647406926515, "grad_norm": 0.09767475724220276, "learning_rate": 2.307166637462765e-05, "loss": 9.6371, "step": 46630 }, { "epoch": 0.23291467951759096, "grad_norm": 0.09269337356090546, "learning_rate": 2.30701644596861e-05, "loss": 9.6273, "step": 46640 }, { "epoch": 0.2329646183425304, "grad_norm": 0.09703583270311356, "learning_rate": 2.3068662544744548e-05, "loss": 9.6257, "step": 46650 }, { "epoch": 0.23301455716746985, "grad_norm": 0.09539482742547989, "learning_rate": 2.3067160629803002e-05, "loss": 9.631, "step": 46660 }, { "epoch": 0.2330644959924093, "grad_norm": 0.09522981196641922, "learning_rate": 2.306565871486145e-05, "loss": 9.6335, "step": 46670 }, { "epoch": 0.23311443481734875, "grad_norm": 0.09748247265815735, "learning_rate": 2.30641567999199e-05, "loss": 9.6364, "step": 46680 }, { "epoch": 0.2331643736422882, "grad_norm": 0.09750822186470032, "learning_rate": 2.306265488497835e-05, "loss": 9.6327, "step": 46690 }, { "epoch": 0.23321431246722765, "grad_norm": 0.09520017355680466, "learning_rate": 2.3061152970036796e-05, "loss": 9.6306, "step": 46700 }, { "epoch": 0.2332642512921671, "grad_norm": 0.09403938800096512, "learning_rate": 2.305965105509525e-05, "loss": 9.6276, "step": 46710 }, { "epoch": 0.23331419011710655, "grad_norm": 0.09592479467391968, "learning_rate": 2.3058149140153696e-05, "loss": 9.6331, "step": 46720 }, { "epoch": 0.233364128942046, "grad_norm": 0.09444668143987656, "learning_rate": 2.3056647225212146e-05, "loss": 9.6345, "step": 46730 }, { "epoch": 0.23341406776698545, "grad_norm": 0.09617168456315994, "learning_rate": 2.3055145310270596e-05, "loss": 9.6335, "step": 46740 }, { "epoch": 0.2334640065919249, "grad_norm": 0.09481420367956161, "learning_rate": 2.3053643395329043e-05, "loss": 9.6353, "step": 46750 }, { "epoch": 0.23351394541686435, "grad_norm": 0.09595254808664322, "learning_rate": 2.3052141480387497e-05, "loss": 9.6378, "step": 46760 }, { "epoch": 0.2335638842418038, "grad_norm": 0.09526467323303223, "learning_rate": 2.3050639565445944e-05, "loss": 9.6292, "step": 46770 }, { "epoch": 0.23361382306674325, "grad_norm": 0.09057611972093582, "learning_rate": 2.3049137650504394e-05, "loss": 9.6269, "step": 46780 }, { "epoch": 0.2336637618916827, "grad_norm": 0.09435748308897018, "learning_rate": 2.3047635735562844e-05, "loss": 9.638, "step": 46790 }, { "epoch": 0.23371370071662215, "grad_norm": 0.09341932833194733, "learning_rate": 2.304613382062129e-05, "loss": 9.6302, "step": 46800 }, { "epoch": 0.2337636395415616, "grad_norm": 0.09780159592628479, "learning_rate": 2.3044631905679744e-05, "loss": 9.6343, "step": 46810 }, { "epoch": 0.23381357836650105, "grad_norm": 0.09608551114797592, "learning_rate": 2.304312999073819e-05, "loss": 9.6324, "step": 46820 }, { "epoch": 0.2338635171914405, "grad_norm": 0.10504171252250671, "learning_rate": 2.304162807579664e-05, "loss": 9.6361, "step": 46830 }, { "epoch": 0.23391345601637994, "grad_norm": 0.09910661727190018, "learning_rate": 2.304012616085509e-05, "loss": 9.6292, "step": 46840 }, { "epoch": 0.2339633948413194, "grad_norm": 0.10046257078647614, "learning_rate": 2.3038624245913538e-05, "loss": 9.6332, "step": 46850 }, { "epoch": 0.23401333366625884, "grad_norm": 0.09717267006635666, "learning_rate": 2.3037122330971992e-05, "loss": 9.6163, "step": 46860 }, { "epoch": 0.2340632724911983, "grad_norm": 0.09229366481304169, "learning_rate": 2.303562041603044e-05, "loss": 9.6261, "step": 46870 }, { "epoch": 0.23411321131613774, "grad_norm": 0.09404976665973663, "learning_rate": 2.303411850108889e-05, "loss": 9.6323, "step": 46880 }, { "epoch": 0.2341631501410772, "grad_norm": 0.09583253413438797, "learning_rate": 2.303261658614734e-05, "loss": 9.6264, "step": 46890 }, { "epoch": 0.23421308896601664, "grad_norm": 0.09781666100025177, "learning_rate": 2.303111467120579e-05, "loss": 9.6268, "step": 46900 }, { "epoch": 0.2342630277909561, "grad_norm": 0.09339626133441925, "learning_rate": 2.302961275626424e-05, "loss": 9.6277, "step": 46910 }, { "epoch": 0.23431296661589554, "grad_norm": 0.09981784224510193, "learning_rate": 2.3028110841322686e-05, "loss": 9.6338, "step": 46920 }, { "epoch": 0.234362905440835, "grad_norm": 0.09911654889583588, "learning_rate": 2.3026608926381136e-05, "loss": 9.6334, "step": 46930 }, { "epoch": 0.23441284426577444, "grad_norm": 0.09392616897821426, "learning_rate": 2.3025107011439587e-05, "loss": 9.6212, "step": 46940 }, { "epoch": 0.2344627830907139, "grad_norm": 0.10090331733226776, "learning_rate": 2.3023605096498037e-05, "loss": 9.6232, "step": 46950 }, { "epoch": 0.23451272191565334, "grad_norm": 0.09081251919269562, "learning_rate": 2.3022103181556487e-05, "loss": 9.6258, "step": 46960 }, { "epoch": 0.2345626607405928, "grad_norm": 0.09683696180582047, "learning_rate": 2.3020601266614934e-05, "loss": 9.6245, "step": 46970 }, { "epoch": 0.2346125995655322, "grad_norm": 0.09155721217393875, "learning_rate": 2.3019099351673384e-05, "loss": 9.6267, "step": 46980 }, { "epoch": 0.23466253839047166, "grad_norm": 0.09573863446712494, "learning_rate": 2.3017597436731834e-05, "loss": 9.6289, "step": 46990 }, { "epoch": 0.2347124772154111, "grad_norm": 0.09463366866111755, "learning_rate": 2.3016095521790284e-05, "loss": 9.6195, "step": 47000 }, { "epoch": 0.23476241604035056, "grad_norm": 0.10097618401050568, "learning_rate": 2.3014593606848734e-05, "loss": 9.6101, "step": 47010 }, { "epoch": 0.23481235486529, "grad_norm": 0.09264184534549713, "learning_rate": 2.301309169190718e-05, "loss": 9.621, "step": 47020 }, { "epoch": 0.23486229369022946, "grad_norm": 0.09526827186346054, "learning_rate": 2.301158977696563e-05, "loss": 9.6284, "step": 47030 }, { "epoch": 0.2349122325151689, "grad_norm": 0.09512627869844437, "learning_rate": 2.301008786202408e-05, "loss": 9.6286, "step": 47040 }, { "epoch": 0.23496217134010836, "grad_norm": 0.09627030789852142, "learning_rate": 2.300858594708253e-05, "loss": 9.6311, "step": 47050 }, { "epoch": 0.2350121101650478, "grad_norm": 0.08953950554132462, "learning_rate": 2.3007084032140982e-05, "loss": 9.6195, "step": 47060 }, { "epoch": 0.23506204898998725, "grad_norm": 0.09324587136507034, "learning_rate": 2.300558211719943e-05, "loss": 9.6292, "step": 47070 }, { "epoch": 0.2351119878149267, "grad_norm": 0.09066016227006912, "learning_rate": 2.300408020225788e-05, "loss": 9.6237, "step": 47080 }, { "epoch": 0.23516192663986615, "grad_norm": 0.08983506262302399, "learning_rate": 2.300257828731633e-05, "loss": 9.6283, "step": 47090 }, { "epoch": 0.2352118654648056, "grad_norm": 0.09250736236572266, "learning_rate": 2.300107637237478e-05, "loss": 9.6245, "step": 47100 }, { "epoch": 0.23526180428974505, "grad_norm": 0.09599629789590836, "learning_rate": 2.299957445743323e-05, "loss": 9.6202, "step": 47110 }, { "epoch": 0.2353117431146845, "grad_norm": 0.09286179393529892, "learning_rate": 2.2998072542491676e-05, "loss": 9.6168, "step": 47120 }, { "epoch": 0.23536168193962395, "grad_norm": 0.09755680710077286, "learning_rate": 2.2996570627550126e-05, "loss": 9.6249, "step": 47130 }, { "epoch": 0.2354116207645634, "grad_norm": 0.09816092252731323, "learning_rate": 2.2995068712608577e-05, "loss": 9.6213, "step": 47140 }, { "epoch": 0.23546155958950285, "grad_norm": 0.08990190178155899, "learning_rate": 2.2993566797667027e-05, "loss": 9.6237, "step": 47150 }, { "epoch": 0.2355114984144423, "grad_norm": 0.09264282137155533, "learning_rate": 2.2992064882725477e-05, "loss": 9.6222, "step": 47160 }, { "epoch": 0.23556143723938175, "grad_norm": 0.0953594446182251, "learning_rate": 2.2990562967783924e-05, "loss": 9.6219, "step": 47170 }, { "epoch": 0.2356113760643212, "grad_norm": 0.09457553923130035, "learning_rate": 2.2989061052842374e-05, "loss": 9.6167, "step": 47180 }, { "epoch": 0.23566131488926065, "grad_norm": 0.09653682261705399, "learning_rate": 2.2987559137900824e-05, "loss": 9.6214, "step": 47190 }, { "epoch": 0.2357112537142001, "grad_norm": 0.09378449618816376, "learning_rate": 2.2986057222959274e-05, "loss": 9.6282, "step": 47200 }, { "epoch": 0.23576119253913955, "grad_norm": 0.0928981825709343, "learning_rate": 2.2984555308017724e-05, "loss": 9.6326, "step": 47210 }, { "epoch": 0.235811131364079, "grad_norm": 0.0937650203704834, "learning_rate": 2.298305339307617e-05, "loss": 9.6249, "step": 47220 }, { "epoch": 0.23586107018901845, "grad_norm": 0.09575259685516357, "learning_rate": 2.298155147813462e-05, "loss": 9.6179, "step": 47230 }, { "epoch": 0.2359110090139579, "grad_norm": 75396.46875, "learning_rate": 2.298004956319307e-05, "loss": 9.9622, "step": 47240 }, { "epoch": 0.23596094783889734, "grad_norm": 0.09559078514575958, "learning_rate": 2.2978547648251522e-05, "loss": 9.6256, "step": 47250 }, { "epoch": 0.2360108866638368, "grad_norm": 0.09144776314496994, "learning_rate": 2.2977045733309972e-05, "loss": 17.3369, "step": 47260 }, { "epoch": 0.23606082548877624, "grad_norm": 0.09429620951414108, "learning_rate": 2.2975543818368422e-05, "loss": 12.2017, "step": 47270 }, { "epoch": 0.2361107643137157, "grad_norm": 0.09179428964853287, "learning_rate": 2.297404190342687e-05, "loss": 9.6117, "step": 47280 }, { "epoch": 0.23616070313865514, "grad_norm": 0.09685272723436356, "learning_rate": 2.297253998848532e-05, "loss": 9.6269, "step": 47290 }, { "epoch": 0.2362106419635946, "grad_norm": 0.09493184089660645, "learning_rate": 2.297103807354377e-05, "loss": 9.6188, "step": 47300 }, { "epoch": 0.23626058078853404, "grad_norm": 0.10096865147352219, "learning_rate": 2.296953615860222e-05, "loss": 9.6169, "step": 47310 }, { "epoch": 0.2363105196134735, "grad_norm": 0.10004733502864838, "learning_rate": 2.296803424366067e-05, "loss": 9.6244, "step": 47320 }, { "epoch": 0.23636045843841294, "grad_norm": 0.09608258306980133, "learning_rate": 2.2966532328719116e-05, "loss": 9.6154, "step": 47330 }, { "epoch": 0.2364103972633524, "grad_norm": 0.09426455199718475, "learning_rate": 2.2965030413777567e-05, "loss": 9.6204, "step": 47340 }, { "epoch": 0.23646033608829184, "grad_norm": 0.09753237664699554, "learning_rate": 2.2963528498836017e-05, "loss": 9.6185, "step": 47350 }, { "epoch": 0.2365102749132313, "grad_norm": 0.09157487750053406, "learning_rate": 2.2962026583894467e-05, "loss": 9.6327, "step": 47360 }, { "epoch": 0.23656021373817074, "grad_norm": 0.10023976117372513, "learning_rate": 2.2960524668952917e-05, "loss": 9.6226, "step": 47370 }, { "epoch": 0.2366101525631102, "grad_norm": 0.09586095809936523, "learning_rate": 2.2959022754011364e-05, "loss": 9.6217, "step": 47380 }, { "epoch": 0.23666009138804964, "grad_norm": 0.0944489985704422, "learning_rate": 2.2957520839069814e-05, "loss": 9.6157, "step": 47390 }, { "epoch": 0.2367100302129891, "grad_norm": 0.09229391068220139, "learning_rate": 2.2956018924128264e-05, "loss": 9.6213, "step": 47400 }, { "epoch": 0.23675996903792854, "grad_norm": 0.09394244849681854, "learning_rate": 2.2954517009186714e-05, "loss": 9.6235, "step": 47410 }, { "epoch": 0.23680990786286799, "grad_norm": 0.0892910584807396, "learning_rate": 2.2953015094245165e-05, "loss": 9.6202, "step": 47420 }, { "epoch": 0.23685984668780743, "grad_norm": 0.09789809584617615, "learning_rate": 2.295151317930361e-05, "loss": 9.6224, "step": 47430 }, { "epoch": 0.23690978551274688, "grad_norm": 0.0950971245765686, "learning_rate": 2.295001126436206e-05, "loss": 9.6107, "step": 47440 }, { "epoch": 0.23695972433768633, "grad_norm": 0.09064780175685883, "learning_rate": 2.2948509349420512e-05, "loss": 9.6138, "step": 47450 }, { "epoch": 0.23700966316262578, "grad_norm": 0.09859991818666458, "learning_rate": 2.2947007434478962e-05, "loss": 9.6197, "step": 47460 }, { "epoch": 0.23705960198756523, "grad_norm": 0.0948246568441391, "learning_rate": 2.2945505519537412e-05, "loss": 9.6189, "step": 47470 }, { "epoch": 0.23710954081250468, "grad_norm": 0.09848882257938385, "learning_rate": 2.294400360459586e-05, "loss": 9.611, "step": 47480 }, { "epoch": 0.23715947963744413, "grad_norm": 0.10083547234535217, "learning_rate": 2.294250168965431e-05, "loss": 9.6214, "step": 47490 }, { "epoch": 0.23720941846238358, "grad_norm": 0.09797154366970062, "learning_rate": 2.294099977471276e-05, "loss": 9.6184, "step": 47500 }, { "epoch": 0.23725935728732303, "grad_norm": 0.0983353927731514, "learning_rate": 2.293949785977121e-05, "loss": 9.6119, "step": 47510 }, { "epoch": 0.23730929611226248, "grad_norm": 0.10210006684064865, "learning_rate": 2.293799594482966e-05, "loss": 9.6136, "step": 47520 }, { "epoch": 0.23735923493720193, "grad_norm": 0.10113011300563812, "learning_rate": 2.2936494029888106e-05, "loss": 9.6137, "step": 47530 }, { "epoch": 0.23740917376214138, "grad_norm": 0.0893891304731369, "learning_rate": 2.2934992114946557e-05, "loss": 9.6155, "step": 47540 }, { "epoch": 0.23745911258708083, "grad_norm": 0.09196850657463074, "learning_rate": 2.2933490200005007e-05, "loss": 9.6032, "step": 47550 }, { "epoch": 0.23750905141202028, "grad_norm": 0.08714939653873444, "learning_rate": 2.2931988285063457e-05, "loss": 9.6155, "step": 47560 }, { "epoch": 0.23755899023695973, "grad_norm": 0.0882970318198204, "learning_rate": 2.2930486370121907e-05, "loss": 9.6081, "step": 47570 }, { "epoch": 0.23760892906189918, "grad_norm": 0.09797061234712601, "learning_rate": 2.2928984455180354e-05, "loss": 9.6092, "step": 47580 }, { "epoch": 0.23765886788683863, "grad_norm": 0.09515173733234406, "learning_rate": 2.2927482540238807e-05, "loss": 9.6153, "step": 47590 }, { "epoch": 0.23770880671177808, "grad_norm": 0.09541980177164078, "learning_rate": 2.2925980625297254e-05, "loss": 9.6167, "step": 47600 }, { "epoch": 0.23775874553671753, "grad_norm": 0.09236780554056168, "learning_rate": 2.2924478710355704e-05, "loss": 9.6251, "step": 47610 }, { "epoch": 0.23780868436165697, "grad_norm": 0.09183859080076218, "learning_rate": 2.2922976795414155e-05, "loss": 9.6104, "step": 47620 }, { "epoch": 0.23785862318659642, "grad_norm": 0.1002270057797432, "learning_rate": 2.29214748804726e-05, "loss": 9.6142, "step": 47630 }, { "epoch": 0.23790856201153587, "grad_norm": 0.09763851016759872, "learning_rate": 2.2919972965531055e-05, "loss": 9.6067, "step": 47640 }, { "epoch": 0.23795850083647532, "grad_norm": 0.09223950654268265, "learning_rate": 2.2918471050589502e-05, "loss": 9.6198, "step": 47650 }, { "epoch": 0.23800843966141477, "grad_norm": 0.09290538728237152, "learning_rate": 2.2916969135647952e-05, "loss": 9.6121, "step": 47660 }, { "epoch": 0.23805837848635422, "grad_norm": 0.09840866923332214, "learning_rate": 2.2915467220706402e-05, "loss": 9.6148, "step": 47670 }, { "epoch": 0.23810831731129367, "grad_norm": 0.0904998853802681, "learning_rate": 2.291396530576485e-05, "loss": 9.6134, "step": 47680 }, { "epoch": 0.23815825613623312, "grad_norm": 0.09012880176305771, "learning_rate": 2.2912463390823302e-05, "loss": 9.6154, "step": 47690 }, { "epoch": 0.23820819496117257, "grad_norm": 0.08958422392606735, "learning_rate": 2.291096147588175e-05, "loss": 9.6192, "step": 47700 }, { "epoch": 0.23825813378611202, "grad_norm": 0.09270545840263367, "learning_rate": 2.29094595609402e-05, "loss": 9.6096, "step": 47710 }, { "epoch": 0.23830807261105147, "grad_norm": 0.09306894987821579, "learning_rate": 2.290795764599865e-05, "loss": 9.604, "step": 47720 }, { "epoch": 0.23835801143599092, "grad_norm": 0.1006632074713707, "learning_rate": 2.2906455731057096e-05, "loss": 9.6153, "step": 47730 }, { "epoch": 0.23840795026093037, "grad_norm": 0.08890628069639206, "learning_rate": 2.290495381611555e-05, "loss": 9.6092, "step": 47740 }, { "epoch": 0.23845788908586982, "grad_norm": 0.09797194600105286, "learning_rate": 2.2903451901173997e-05, "loss": 9.6124, "step": 47750 }, { "epoch": 0.23850782791080927, "grad_norm": 0.0914837047457695, "learning_rate": 2.2901949986232447e-05, "loss": 9.6155, "step": 47760 }, { "epoch": 0.23855776673574872, "grad_norm": 0.09398198127746582, "learning_rate": 2.2900448071290897e-05, "loss": 9.6183, "step": 47770 }, { "epoch": 0.23860770556068817, "grad_norm": 0.10565483570098877, "learning_rate": 2.2898946156349344e-05, "loss": 9.6146, "step": 47780 }, { "epoch": 0.23865764438562762, "grad_norm": 0.09536195546388626, "learning_rate": 2.2897444241407797e-05, "loss": 9.6134, "step": 47790 }, { "epoch": 0.23870758321056706, "grad_norm": 0.0907558798789978, "learning_rate": 2.2895942326466244e-05, "loss": 9.6117, "step": 47800 }, { "epoch": 0.23875752203550651, "grad_norm": 0.09679032117128372, "learning_rate": 2.2894440411524694e-05, "loss": 9.6129, "step": 47810 }, { "epoch": 0.23880746086044596, "grad_norm": 0.08943892270326614, "learning_rate": 2.2892938496583145e-05, "loss": 9.618, "step": 47820 }, { "epoch": 0.2388573996853854, "grad_norm": 0.0918293297290802, "learning_rate": 2.289143658164159e-05, "loss": 9.6098, "step": 47830 }, { "epoch": 0.23890733851032486, "grad_norm": 0.09785201400518417, "learning_rate": 2.2889934666700045e-05, "loss": 9.6052, "step": 47840 }, { "epoch": 0.2389572773352643, "grad_norm": 0.09538784623146057, "learning_rate": 2.2888432751758492e-05, "loss": 9.5993, "step": 47850 }, { "epoch": 0.23900721616020376, "grad_norm": 0.09343953430652618, "learning_rate": 2.2886930836816942e-05, "loss": 9.6063, "step": 47860 }, { "epoch": 0.2390571549851432, "grad_norm": 0.0926864743232727, "learning_rate": 2.2885428921875392e-05, "loss": 9.6103, "step": 47870 }, { "epoch": 0.23910709381008266, "grad_norm": 0.09175824373960495, "learning_rate": 2.288392700693384e-05, "loss": 9.6072, "step": 47880 }, { "epoch": 0.2391570326350221, "grad_norm": 0.09742142260074615, "learning_rate": 2.2882425091992292e-05, "loss": 9.6058, "step": 47890 }, { "epoch": 0.23920697145996156, "grad_norm": 0.09516909718513489, "learning_rate": 2.288092317705074e-05, "loss": 9.6085, "step": 47900 }, { "epoch": 0.239256910284901, "grad_norm": 0.09717977792024612, "learning_rate": 2.2879421262109193e-05, "loss": 9.6036, "step": 47910 }, { "epoch": 0.23930684910984046, "grad_norm": 0.09463629871606827, "learning_rate": 2.287791934716764e-05, "loss": 9.6029, "step": 47920 }, { "epoch": 0.2393567879347799, "grad_norm": 0.09392336010932922, "learning_rate": 2.2876417432226086e-05, "loss": 9.6177, "step": 47930 }, { "epoch": 0.23940672675971936, "grad_norm": 0.09007874131202698, "learning_rate": 2.287491551728454e-05, "loss": 9.6079, "step": 47940 }, { "epoch": 0.2394566655846588, "grad_norm": 0.09397672861814499, "learning_rate": 2.2873413602342987e-05, "loss": 9.6104, "step": 47950 }, { "epoch": 0.23950660440959823, "grad_norm": 0.08829968422651291, "learning_rate": 2.287191168740144e-05, "loss": 9.6029, "step": 47960 }, { "epoch": 0.23955654323453768, "grad_norm": 0.09608025848865509, "learning_rate": 2.2870409772459887e-05, "loss": 9.6063, "step": 47970 }, { "epoch": 0.23960648205947713, "grad_norm": 0.09699834138154984, "learning_rate": 2.2868907857518334e-05, "loss": 9.6119, "step": 47980 }, { "epoch": 0.23965642088441658, "grad_norm": 0.08946532756090164, "learning_rate": 2.2867405942576787e-05, "loss": 9.6014, "step": 47990 }, { "epoch": 0.23970635970935603, "grad_norm": 0.0944380983710289, "learning_rate": 2.2865904027635234e-05, "loss": 9.6076, "step": 48000 }, { "epoch": 0.23975629853429548, "grad_norm": 0.09391778707504272, "learning_rate": 2.2864402112693688e-05, "loss": 9.6034, "step": 48010 }, { "epoch": 0.23980623735923493, "grad_norm": 0.09647521376609802, "learning_rate": 2.2862900197752135e-05, "loss": 9.6077, "step": 48020 }, { "epoch": 0.23985617618417437, "grad_norm": 0.09497778117656708, "learning_rate": 2.286139828281058e-05, "loss": 9.6047, "step": 48030 }, { "epoch": 0.23990611500911382, "grad_norm": 0.10870006680488586, "learning_rate": 2.2859896367869035e-05, "loss": 9.6094, "step": 48040 }, { "epoch": 0.23995605383405327, "grad_norm": 0.09138607233762741, "learning_rate": 2.2858394452927482e-05, "loss": 9.6093, "step": 48050 }, { "epoch": 0.24000599265899272, "grad_norm": 0.09472063183784485, "learning_rate": 2.2856892537985935e-05, "loss": 9.6105, "step": 48060 }, { "epoch": 0.24005593148393217, "grad_norm": 0.09620670229196548, "learning_rate": 2.2855390623044382e-05, "loss": 9.5972, "step": 48070 }, { "epoch": 0.24010587030887162, "grad_norm": 0.09534239768981934, "learning_rate": 2.285388870810283e-05, "loss": 9.6064, "step": 48080 }, { "epoch": 0.24015580913381107, "grad_norm": 0.09170860797166824, "learning_rate": 2.2852386793161282e-05, "loss": 9.5999, "step": 48090 }, { "epoch": 0.24020574795875052, "grad_norm": 0.09515348076820374, "learning_rate": 2.285088487821973e-05, "loss": 9.6172, "step": 48100 }, { "epoch": 0.24025568678368997, "grad_norm": 0.09469642490148544, "learning_rate": 2.2849382963278183e-05, "loss": 9.6089, "step": 48110 }, { "epoch": 0.24030562560862942, "grad_norm": 0.09230419993400574, "learning_rate": 2.284788104833663e-05, "loss": 9.6098, "step": 48120 }, { "epoch": 0.24035556443356887, "grad_norm": 0.09317916631698608, "learning_rate": 2.2846379133395076e-05, "loss": 9.5989, "step": 48130 }, { "epoch": 0.24040550325850832, "grad_norm": 0.0909963995218277, "learning_rate": 2.284487721845353e-05, "loss": 9.6125, "step": 48140 }, { "epoch": 0.24045544208344777, "grad_norm": 0.09574473649263382, "learning_rate": 2.2843375303511977e-05, "loss": 9.601, "step": 48150 }, { "epoch": 0.24050538090838722, "grad_norm": 0.09136365354061127, "learning_rate": 2.284187338857043e-05, "loss": 9.6029, "step": 48160 }, { "epoch": 0.24055531973332667, "grad_norm": 0.0943564847111702, "learning_rate": 2.2840371473628877e-05, "loss": 9.6088, "step": 48170 }, { "epoch": 0.24060525855826612, "grad_norm": 0.09551718086004257, "learning_rate": 2.2838869558687324e-05, "loss": 9.6124, "step": 48180 }, { "epoch": 0.24065519738320557, "grad_norm": 0.09354714304208755, "learning_rate": 2.2837367643745777e-05, "loss": 9.6024, "step": 48190 }, { "epoch": 0.24070513620814502, "grad_norm": 0.09227943420410156, "learning_rate": 2.2835865728804224e-05, "loss": 9.6047, "step": 48200 }, { "epoch": 0.24075507503308446, "grad_norm": 0.09799333661794662, "learning_rate": 2.2834363813862678e-05, "loss": 9.6027, "step": 48210 }, { "epoch": 0.24080501385802391, "grad_norm": 0.09457400441169739, "learning_rate": 2.2832861898921125e-05, "loss": 9.6017, "step": 48220 }, { "epoch": 0.24085495268296336, "grad_norm": 0.09242776036262512, "learning_rate": 2.2831359983979575e-05, "loss": 9.6013, "step": 48230 }, { "epoch": 0.2409048915079028, "grad_norm": 0.09647472202777863, "learning_rate": 2.2829858069038025e-05, "loss": 9.5992, "step": 48240 }, { "epoch": 0.24095483033284226, "grad_norm": 0.09526066482067108, "learning_rate": 2.2828356154096472e-05, "loss": 9.6049, "step": 48250 }, { "epoch": 0.2410047691577817, "grad_norm": 0.09362996369600296, "learning_rate": 2.2826854239154925e-05, "loss": 9.6031, "step": 48260 }, { "epoch": 0.24105470798272116, "grad_norm": 0.0959758386015892, "learning_rate": 2.2825352324213372e-05, "loss": 9.6, "step": 48270 }, { "epoch": 0.2411046468076606, "grad_norm": 0.09466012567281723, "learning_rate": 2.2823850409271822e-05, "loss": 9.6062, "step": 48280 }, { "epoch": 0.24115458563260006, "grad_norm": 0.08923938125371933, "learning_rate": 2.2822348494330272e-05, "loss": 9.5986, "step": 48290 }, { "epoch": 0.2412045244575395, "grad_norm": 0.09768188744783401, "learning_rate": 2.282084657938872e-05, "loss": 9.5975, "step": 48300 }, { "epoch": 0.24125446328247896, "grad_norm": 0.0997987762093544, "learning_rate": 2.2819344664447173e-05, "loss": 9.5978, "step": 48310 }, { "epoch": 0.2413044021074184, "grad_norm": 0.0951227992773056, "learning_rate": 2.281784274950562e-05, "loss": 9.6114, "step": 48320 }, { "epoch": 0.24135434093235786, "grad_norm": 0.08892112970352173, "learning_rate": 2.281634083456407e-05, "loss": 9.6082, "step": 48330 }, { "epoch": 0.2414042797572973, "grad_norm": 0.0971013680100441, "learning_rate": 2.281483891962252e-05, "loss": 9.6065, "step": 48340 }, { "epoch": 0.24145421858223676, "grad_norm": 0.09072565287351608, "learning_rate": 2.2813337004680967e-05, "loss": 9.5978, "step": 48350 }, { "epoch": 0.2415041574071762, "grad_norm": 0.08944379538297653, "learning_rate": 2.281183508973942e-05, "loss": 9.5961, "step": 48360 }, { "epoch": 0.24155409623211566, "grad_norm": 0.09630691260099411, "learning_rate": 2.2810333174797867e-05, "loss": 9.6005, "step": 48370 }, { "epoch": 0.2416040350570551, "grad_norm": 0.09116937965154648, "learning_rate": 2.2808831259856317e-05, "loss": 9.6018, "step": 48380 }, { "epoch": 0.24165397388199455, "grad_norm": 0.09356780350208282, "learning_rate": 2.2807329344914768e-05, "loss": 9.6053, "step": 48390 }, { "epoch": 0.241703912706934, "grad_norm": 0.09499246627092361, "learning_rate": 2.2805827429973214e-05, "loss": 9.6004, "step": 48400 }, { "epoch": 0.24175385153187345, "grad_norm": 0.09654933959245682, "learning_rate": 2.2804325515031668e-05, "loss": 9.5968, "step": 48410 }, { "epoch": 0.2418037903568129, "grad_norm": 0.09912944585084915, "learning_rate": 2.2802823600090115e-05, "loss": 9.5996, "step": 48420 }, { "epoch": 0.24185372918175235, "grad_norm": 0.09671138226985931, "learning_rate": 2.2801321685148565e-05, "loss": 9.6, "step": 48430 }, { "epoch": 0.2419036680066918, "grad_norm": 0.09061242640018463, "learning_rate": 2.2799819770207015e-05, "loss": 9.6022, "step": 48440 }, { "epoch": 0.24195360683163125, "grad_norm": 0.09459752589464188, "learning_rate": 2.2798317855265462e-05, "loss": 9.5978, "step": 48450 }, { "epoch": 0.2420035456565707, "grad_norm": 0.08927514404058456, "learning_rate": 2.2796815940323915e-05, "loss": 9.6072, "step": 48460 }, { "epoch": 0.24205348448151015, "grad_norm": 0.09183401614427567, "learning_rate": 2.2795314025382362e-05, "loss": 9.5982, "step": 48470 }, { "epoch": 0.2421034233064496, "grad_norm": 0.09792063385248184, "learning_rate": 2.2793812110440812e-05, "loss": 9.5965, "step": 48480 }, { "epoch": 0.24215336213138905, "grad_norm": 0.09104972332715988, "learning_rate": 2.2792310195499263e-05, "loss": 9.5929, "step": 48490 }, { "epoch": 0.2422033009563285, "grad_norm": 0.09637004882097244, "learning_rate": 2.279080828055771e-05, "loss": 9.6063, "step": 48500 }, { "epoch": 0.24225323978126795, "grad_norm": 0.09254331886768341, "learning_rate": 2.2789306365616163e-05, "loss": 9.6029, "step": 48510 }, { "epoch": 0.2423031786062074, "grad_norm": 0.09216886013746262, "learning_rate": 2.278780445067461e-05, "loss": 9.5954, "step": 48520 }, { "epoch": 0.24235311743114685, "grad_norm": 0.09522795677185059, "learning_rate": 2.278630253573306e-05, "loss": 9.6019, "step": 48530 }, { "epoch": 0.2424030562560863, "grad_norm": 0.09067758917808533, "learning_rate": 2.278480062079151e-05, "loss": 9.5966, "step": 48540 }, { "epoch": 0.24245299508102575, "grad_norm": 0.10162653774023056, "learning_rate": 2.278329870584996e-05, "loss": 9.5896, "step": 48550 }, { "epoch": 0.2425029339059652, "grad_norm": 0.09732117503881454, "learning_rate": 2.278179679090841e-05, "loss": 9.5988, "step": 48560 }, { "epoch": 0.24255287273090465, "grad_norm": 0.09825015068054199, "learning_rate": 2.2780294875966857e-05, "loss": 9.586, "step": 48570 }, { "epoch": 0.2426028115558441, "grad_norm": 0.09816539287567139, "learning_rate": 2.2778792961025307e-05, "loss": 9.6021, "step": 48580 }, { "epoch": 0.24265275038078354, "grad_norm": 0.09649891406297684, "learning_rate": 2.2777291046083758e-05, "loss": 9.5972, "step": 48590 }, { "epoch": 0.242702689205723, "grad_norm": 0.09952486306428909, "learning_rate": 2.2775789131142208e-05, "loss": 9.6009, "step": 48600 }, { "epoch": 0.24275262803066244, "grad_norm": 0.0953533872961998, "learning_rate": 2.2774287216200658e-05, "loss": 9.602, "step": 48610 }, { "epoch": 0.2428025668556019, "grad_norm": 0.09693508595228195, "learning_rate": 2.2772785301259105e-05, "loss": 9.5935, "step": 48620 }, { "epoch": 0.24285250568054134, "grad_norm": 0.08956372737884521, "learning_rate": 2.2771283386317555e-05, "loss": 9.5966, "step": 48630 }, { "epoch": 0.2429024445054808, "grad_norm": 0.10089200735092163, "learning_rate": 2.2769781471376005e-05, "loss": 9.5974, "step": 48640 }, { "epoch": 0.24295238333042024, "grad_norm": 0.09470240026712418, "learning_rate": 2.2768279556434455e-05, "loss": 9.5923, "step": 48650 }, { "epoch": 0.2430023221553597, "grad_norm": 0.08645424246788025, "learning_rate": 2.2766777641492905e-05, "loss": 9.597, "step": 48660 }, { "epoch": 0.24305226098029914, "grad_norm": 0.09936508536338806, "learning_rate": 2.2765275726551352e-05, "loss": 9.5877, "step": 48670 }, { "epoch": 0.2431021998052386, "grad_norm": 0.09653277695178986, "learning_rate": 2.2763773811609802e-05, "loss": 9.5927, "step": 48680 }, { "epoch": 0.24315213863017804, "grad_norm": 0.09702669084072113, "learning_rate": 2.2762271896668253e-05, "loss": 9.5947, "step": 48690 }, { "epoch": 0.2432020774551175, "grad_norm": 0.09212882816791534, "learning_rate": 2.2760769981726703e-05, "loss": 9.5822, "step": 48700 }, { "epoch": 0.24325201628005694, "grad_norm": 0.09269695729017258, "learning_rate": 2.2759268066785153e-05, "loss": 9.5927, "step": 48710 }, { "epoch": 0.2433019551049964, "grad_norm": 0.09830940514802933, "learning_rate": 2.27577661518436e-05, "loss": 9.5933, "step": 48720 }, { "epoch": 0.24335189392993584, "grad_norm": 0.08732043951749802, "learning_rate": 2.275626423690205e-05, "loss": 9.5939, "step": 48730 }, { "epoch": 0.24340183275487529, "grad_norm": 0.09308458864688873, "learning_rate": 2.27547623219605e-05, "loss": 9.5926, "step": 48740 }, { "epoch": 0.24345177157981474, "grad_norm": 0.09478411078453064, "learning_rate": 2.275326040701895e-05, "loss": 9.5954, "step": 48750 }, { "epoch": 0.24350171040475418, "grad_norm": 0.09362474828958511, "learning_rate": 2.27517584920774e-05, "loss": 9.5937, "step": 48760 }, { "epoch": 0.24355164922969363, "grad_norm": 0.09029746055603027, "learning_rate": 2.2750256577135847e-05, "loss": 9.5973, "step": 48770 }, { "epoch": 0.24360158805463308, "grad_norm": 0.10020599514245987, "learning_rate": 2.2748754662194297e-05, "loss": 9.5866, "step": 48780 }, { "epoch": 0.24365152687957253, "grad_norm": 0.1007223054766655, "learning_rate": 2.2747252747252748e-05, "loss": 9.5955, "step": 48790 }, { "epoch": 0.24370146570451198, "grad_norm": 0.0981803610920906, "learning_rate": 2.2745750832311198e-05, "loss": 9.5805, "step": 48800 }, { "epoch": 0.24375140452945143, "grad_norm": 0.09510128200054169, "learning_rate": 2.2744248917369648e-05, "loss": 9.5954, "step": 48810 }, { "epoch": 0.24380134335439088, "grad_norm": 0.09191947430372238, "learning_rate": 2.2742747002428095e-05, "loss": 9.6015, "step": 48820 }, { "epoch": 0.24385128217933033, "grad_norm": 0.09448053687810898, "learning_rate": 2.2741245087486545e-05, "loss": 9.5898, "step": 48830 }, { "epoch": 0.24390122100426978, "grad_norm": 0.09542269259691238, "learning_rate": 2.2739743172544995e-05, "loss": 9.591, "step": 48840 }, { "epoch": 0.24395115982920923, "grad_norm": 0.09461547434329987, "learning_rate": 2.2738241257603445e-05, "loss": 9.5968, "step": 48850 }, { "epoch": 0.24400109865414868, "grad_norm": 0.09337666630744934, "learning_rate": 2.2736739342661895e-05, "loss": 9.5898, "step": 48860 }, { "epoch": 0.24405103747908813, "grad_norm": 0.09441894292831421, "learning_rate": 2.2735237427720346e-05, "loss": 9.5903, "step": 48870 }, { "epoch": 0.24410097630402758, "grad_norm": 0.0947941318154335, "learning_rate": 2.2733735512778792e-05, "loss": 9.5884, "step": 48880 }, { "epoch": 0.24415091512896703, "grad_norm": 0.10225453972816467, "learning_rate": 2.2732233597837243e-05, "loss": 9.5906, "step": 48890 }, { "epoch": 0.24420085395390648, "grad_norm": 0.09397625923156738, "learning_rate": 2.2730731682895693e-05, "loss": 9.5955, "step": 48900 }, { "epoch": 0.24425079277884593, "grad_norm": 0.0943833664059639, "learning_rate": 2.2729229767954143e-05, "loss": 9.5859, "step": 48910 }, { "epoch": 0.24430073160378538, "grad_norm": 0.09139799326658249, "learning_rate": 2.2727727853012593e-05, "loss": 9.5942, "step": 48920 }, { "epoch": 0.24435067042872483, "grad_norm": 0.09776920080184937, "learning_rate": 2.272622593807104e-05, "loss": 9.5919, "step": 48930 }, { "epoch": 0.24440060925366427, "grad_norm": 0.09115762263536453, "learning_rate": 2.272472402312949e-05, "loss": 9.5915, "step": 48940 }, { "epoch": 0.2444505480786037, "grad_norm": 0.09906817972660065, "learning_rate": 2.272322210818794e-05, "loss": 9.593, "step": 48950 }, { "epoch": 0.24450048690354315, "grad_norm": 0.09307815879583359, "learning_rate": 2.272172019324639e-05, "loss": 9.5819, "step": 48960 }, { "epoch": 0.2445504257284826, "grad_norm": 0.10123563557863235, "learning_rate": 2.272021827830484e-05, "loss": 9.59, "step": 48970 }, { "epoch": 0.24460036455342204, "grad_norm": 0.09160536527633667, "learning_rate": 2.2718716363363287e-05, "loss": 9.5937, "step": 48980 }, { "epoch": 0.2446503033783615, "grad_norm": 0.0956059992313385, "learning_rate": 2.2717214448421738e-05, "loss": 9.5891, "step": 48990 }, { "epoch": 0.24470024220330094, "grad_norm": 0.08904523402452469, "learning_rate": 2.2715712533480188e-05, "loss": 9.5975, "step": 49000 }, { "epoch": 0.2447501810282404, "grad_norm": 0.09728097915649414, "learning_rate": 2.2714210618538638e-05, "loss": 9.5892, "step": 49010 }, { "epoch": 0.24480011985317984, "grad_norm": 0.0932324156165123, "learning_rate": 2.2712708703597088e-05, "loss": 9.5944, "step": 49020 }, { "epoch": 0.2448500586781193, "grad_norm": 0.09640751779079437, "learning_rate": 2.2711206788655535e-05, "loss": 9.5948, "step": 49030 }, { "epoch": 0.24489999750305874, "grad_norm": 0.09052955359220505, "learning_rate": 2.2709704873713985e-05, "loss": 9.5832, "step": 49040 }, { "epoch": 0.2449499363279982, "grad_norm": 0.09775557368993759, "learning_rate": 2.2708202958772435e-05, "loss": 9.5852, "step": 49050 }, { "epoch": 0.24499987515293764, "grad_norm": 0.0953780934214592, "learning_rate": 2.2706701043830885e-05, "loss": 9.5839, "step": 49060 }, { "epoch": 0.2450498139778771, "grad_norm": 0.09656697511672974, "learning_rate": 2.2705199128889336e-05, "loss": 9.5869, "step": 49070 }, { "epoch": 0.24509975280281654, "grad_norm": 0.09594185650348663, "learning_rate": 2.2703697213947782e-05, "loss": 9.5924, "step": 49080 }, { "epoch": 0.245149691627756, "grad_norm": 0.09402401000261307, "learning_rate": 2.2702195299006233e-05, "loss": 9.5812, "step": 49090 }, { "epoch": 0.24519963045269544, "grad_norm": 0.09970910847187042, "learning_rate": 2.2700693384064683e-05, "loss": 9.5868, "step": 49100 }, { "epoch": 0.2452495692776349, "grad_norm": 0.09406423568725586, "learning_rate": 2.2699191469123133e-05, "loss": 9.5864, "step": 49110 }, { "epoch": 0.24529950810257434, "grad_norm": 0.09630411863327026, "learning_rate": 2.2697689554181583e-05, "loss": 9.586, "step": 49120 }, { "epoch": 0.2453494469275138, "grad_norm": 0.09320022165775299, "learning_rate": 2.269618763924003e-05, "loss": 9.5852, "step": 49130 }, { "epoch": 0.24539938575245324, "grad_norm": 0.0887378454208374, "learning_rate": 2.269468572429848e-05, "loss": 9.5917, "step": 49140 }, { "epoch": 0.24544932457739269, "grad_norm": 0.09708340466022491, "learning_rate": 2.269318380935693e-05, "loss": 9.5828, "step": 49150 }, { "epoch": 0.24549926340233214, "grad_norm": 0.09893514215946198, "learning_rate": 2.269168189441538e-05, "loss": 9.5893, "step": 49160 }, { "epoch": 0.24554920222727158, "grad_norm": 0.09499365836381912, "learning_rate": 2.269017997947383e-05, "loss": 9.5846, "step": 49170 }, { "epoch": 0.24559914105221103, "grad_norm": 0.09623656421899796, "learning_rate": 2.2688678064532277e-05, "loss": 9.5843, "step": 49180 }, { "epoch": 0.24564907987715048, "grad_norm": 0.09261438250541687, "learning_rate": 2.268717614959073e-05, "loss": 9.5892, "step": 49190 }, { "epoch": 0.24569901870208993, "grad_norm": 0.08927629142999649, "learning_rate": 2.2685674234649178e-05, "loss": 9.5861, "step": 49200 }, { "epoch": 0.24574895752702938, "grad_norm": 0.08987867087125778, "learning_rate": 2.2684172319707628e-05, "loss": 9.5839, "step": 49210 }, { "epoch": 0.24579889635196883, "grad_norm": 0.09199423342943192, "learning_rate": 2.2682670404766078e-05, "loss": 9.5921, "step": 49220 }, { "epoch": 0.24584883517690828, "grad_norm": 0.09492474794387817, "learning_rate": 2.2681168489824525e-05, "loss": 9.5821, "step": 49230 }, { "epoch": 0.24589877400184773, "grad_norm": 0.09393247216939926, "learning_rate": 2.267966657488298e-05, "loss": 9.5838, "step": 49240 }, { "epoch": 0.24594871282678718, "grad_norm": 0.09099004417657852, "learning_rate": 2.2678164659941425e-05, "loss": 9.5917, "step": 49250 }, { "epoch": 0.24599865165172663, "grad_norm": 0.09554600715637207, "learning_rate": 2.2676662744999875e-05, "loss": 9.5849, "step": 49260 }, { "epoch": 0.24604859047666608, "grad_norm": 0.09146859496831894, "learning_rate": 2.2675160830058326e-05, "loss": 9.5806, "step": 49270 }, { "epoch": 0.24609852930160553, "grad_norm": 0.09467651695013046, "learning_rate": 2.2673658915116772e-05, "loss": 9.5945, "step": 49280 }, { "epoch": 0.24614846812654498, "grad_norm": 0.09306614845991135, "learning_rate": 2.2672157000175226e-05, "loss": 9.5833, "step": 49290 }, { "epoch": 0.24619840695148443, "grad_norm": 0.09221172332763672, "learning_rate": 2.2670655085233673e-05, "loss": 9.576, "step": 49300 }, { "epoch": 0.24624834577642388, "grad_norm": 0.09125102311372757, "learning_rate": 2.2669153170292123e-05, "loss": 9.5789, "step": 49310 }, { "epoch": 0.24629828460136333, "grad_norm": 0.09555398672819138, "learning_rate": 2.2667651255350573e-05, "loss": 9.5835, "step": 49320 }, { "epoch": 0.24634822342630278, "grad_norm": 0.09381726384162903, "learning_rate": 2.266614934040902e-05, "loss": 9.5803, "step": 49330 }, { "epoch": 0.24639816225124223, "grad_norm": 0.09570520371198654, "learning_rate": 2.2664647425467473e-05, "loss": 9.5808, "step": 49340 }, { "epoch": 0.24644810107618167, "grad_norm": 0.08948148041963577, "learning_rate": 2.266314551052592e-05, "loss": 9.5837, "step": 49350 }, { "epoch": 0.24649803990112112, "grad_norm": 0.09439452737569809, "learning_rate": 2.266164359558437e-05, "loss": 9.5819, "step": 49360 }, { "epoch": 0.24654797872606057, "grad_norm": 0.09754544496536255, "learning_rate": 2.266014168064282e-05, "loss": 9.5824, "step": 49370 }, { "epoch": 0.24659791755100002, "grad_norm": 0.09239448606967926, "learning_rate": 2.2658639765701267e-05, "loss": 9.5796, "step": 49380 }, { "epoch": 0.24664785637593947, "grad_norm": 0.09098103642463684, "learning_rate": 2.265713785075972e-05, "loss": 9.5934, "step": 49390 }, { "epoch": 0.24669779520087892, "grad_norm": 0.09144534915685654, "learning_rate": 2.2655635935818168e-05, "loss": 9.5866, "step": 49400 }, { "epoch": 0.24674773402581837, "grad_norm": 0.09623269736766815, "learning_rate": 2.2654134020876618e-05, "loss": 9.5856, "step": 49410 }, { "epoch": 0.24679767285075782, "grad_norm": 0.09903118759393692, "learning_rate": 2.2652632105935068e-05, "loss": 9.5846, "step": 49420 }, { "epoch": 0.24684761167569727, "grad_norm": 0.09627585858106613, "learning_rate": 2.2651130190993515e-05, "loss": 9.578, "step": 49430 }, { "epoch": 0.24689755050063672, "grad_norm": 0.09699337929487228, "learning_rate": 2.264962827605197e-05, "loss": 9.5867, "step": 49440 }, { "epoch": 0.24694748932557617, "grad_norm": 0.08975996822118759, "learning_rate": 2.2648126361110415e-05, "loss": 9.5786, "step": 49450 }, { "epoch": 0.24699742815051562, "grad_norm": 0.09180519729852676, "learning_rate": 2.2646624446168865e-05, "loss": 9.5793, "step": 49460 }, { "epoch": 0.24704736697545507, "grad_norm": 0.09850934892892838, "learning_rate": 2.2645122531227316e-05, "loss": 9.581, "step": 49470 }, { "epoch": 0.24709730580039452, "grad_norm": 0.09343817830085754, "learning_rate": 2.2643620616285762e-05, "loss": 9.5812, "step": 49480 }, { "epoch": 0.24714724462533397, "grad_norm": 0.09533870965242386, "learning_rate": 2.2642118701344216e-05, "loss": 9.5801, "step": 49490 }, { "epoch": 0.24719718345027342, "grad_norm": 0.08947062492370605, "learning_rate": 2.2640616786402663e-05, "loss": 9.5813, "step": 49500 }, { "epoch": 0.24724712227521287, "grad_norm": 0.09306909888982773, "learning_rate": 2.2639114871461116e-05, "loss": 9.5802, "step": 49510 }, { "epoch": 0.24729706110015232, "grad_norm": 0.09695802628993988, "learning_rate": 2.2637612956519563e-05, "loss": 9.5719, "step": 49520 }, { "epoch": 0.24734699992509177, "grad_norm": 0.09378542751073837, "learning_rate": 2.263611104157801e-05, "loss": 9.5747, "step": 49530 }, { "epoch": 0.24739693875003121, "grad_norm": 0.0942843034863472, "learning_rate": 2.2634609126636463e-05, "loss": 9.5746, "step": 49540 }, { "epoch": 0.24744687757497066, "grad_norm": 0.09266909956932068, "learning_rate": 2.263310721169491e-05, "loss": 9.5729, "step": 49550 }, { "epoch": 0.2474968163999101, "grad_norm": 0.09565820544958115, "learning_rate": 2.2631605296753364e-05, "loss": 9.5685, "step": 49560 }, { "epoch": 0.24754675522484956, "grad_norm": 0.09224898368120193, "learning_rate": 2.263010338181181e-05, "loss": 9.586, "step": 49570 }, { "epoch": 0.247596694049789, "grad_norm": 0.09254671633243561, "learning_rate": 2.2628601466870257e-05, "loss": 9.5773, "step": 49580 }, { "epoch": 0.24764663287472846, "grad_norm": 0.09858168661594391, "learning_rate": 2.262709955192871e-05, "loss": 9.5796, "step": 49590 }, { "epoch": 0.2476965716996679, "grad_norm": 0.0942915603518486, "learning_rate": 2.2625597636987158e-05, "loss": 9.5813, "step": 49600 }, { "epoch": 0.24774651052460736, "grad_norm": 0.09052585065364838, "learning_rate": 2.262409572204561e-05, "loss": 9.5741, "step": 49610 }, { "epoch": 0.2477964493495468, "grad_norm": 0.09947668015956879, "learning_rate": 2.2622593807104058e-05, "loss": 9.5746, "step": 49620 }, { "epoch": 0.24784638817448626, "grad_norm": 0.09705556184053421, "learning_rate": 2.2621091892162505e-05, "loss": 9.5732, "step": 49630 }, { "epoch": 0.2478963269994257, "grad_norm": 0.0956898182630539, "learning_rate": 2.261958997722096e-05, "loss": 9.5757, "step": 49640 }, { "epoch": 0.24794626582436516, "grad_norm": 0.09402977675199509, "learning_rate": 2.2618088062279405e-05, "loss": 9.5683, "step": 49650 }, { "epoch": 0.2479962046493046, "grad_norm": 0.09913989156484604, "learning_rate": 2.261658614733786e-05, "loss": 9.5776, "step": 49660 }, { "epoch": 0.24804614347424406, "grad_norm": 0.09860474616289139, "learning_rate": 2.2615084232396306e-05, "loss": 9.5795, "step": 49670 }, { "epoch": 0.2480960822991835, "grad_norm": 0.0972980335354805, "learning_rate": 2.2613582317454752e-05, "loss": 9.567, "step": 49680 }, { "epoch": 0.24814602112412296, "grad_norm": 0.09505248814821243, "learning_rate": 2.2612080402513206e-05, "loss": 9.5719, "step": 49690 }, { "epoch": 0.2481959599490624, "grad_norm": 0.09183008968830109, "learning_rate": 2.2610578487571653e-05, "loss": 9.5819, "step": 49700 }, { "epoch": 0.24824589877400186, "grad_norm": 0.09852754324674606, "learning_rate": 2.2609076572630106e-05, "loss": 9.577, "step": 49710 }, { "epoch": 0.2482958375989413, "grad_norm": 0.09413760155439377, "learning_rate": 2.2607574657688553e-05, "loss": 9.5743, "step": 49720 }, { "epoch": 0.24834577642388075, "grad_norm": 0.09190001338720322, "learning_rate": 2.2606072742747e-05, "loss": 9.5768, "step": 49730 }, { "epoch": 0.2483957152488202, "grad_norm": 0.09404946118593216, "learning_rate": 2.2604570827805453e-05, "loss": 9.5756, "step": 49740 }, { "epoch": 0.24844565407375965, "grad_norm": 0.09731113910675049, "learning_rate": 2.26030689128639e-05, "loss": 9.5738, "step": 49750 }, { "epoch": 0.2484955928986991, "grad_norm": 0.09584534913301468, "learning_rate": 2.2601566997922354e-05, "loss": 9.5745, "step": 49760 }, { "epoch": 0.24854553172363855, "grad_norm": 0.09506174921989441, "learning_rate": 2.26000650829808e-05, "loss": 9.5702, "step": 49770 }, { "epoch": 0.248595470548578, "grad_norm": 0.09768600016832352, "learning_rate": 2.2598563168039247e-05, "loss": 9.5804, "step": 49780 }, { "epoch": 0.24864540937351745, "grad_norm": 0.09207095205783844, "learning_rate": 2.25970612530977e-05, "loss": 9.569, "step": 49790 }, { "epoch": 0.2486953481984569, "grad_norm": 0.09260823577642441, "learning_rate": 2.2595559338156148e-05, "loss": 9.5739, "step": 49800 }, { "epoch": 0.24874528702339635, "grad_norm": 0.09007450938224792, "learning_rate": 2.25940574232146e-05, "loss": 9.5792, "step": 49810 }, { "epoch": 0.2487952258483358, "grad_norm": 0.10160695761442184, "learning_rate": 2.2592555508273048e-05, "loss": 9.5803, "step": 49820 }, { "epoch": 0.24884516467327525, "grad_norm": 0.09343059360980988, "learning_rate": 2.25910535933315e-05, "loss": 9.5758, "step": 49830 }, { "epoch": 0.2488951034982147, "grad_norm": 0.09455156326293945, "learning_rate": 2.258955167838995e-05, "loss": 9.5728, "step": 49840 }, { "epoch": 0.24894504232315415, "grad_norm": 0.09680726379156113, "learning_rate": 2.2588049763448395e-05, "loss": 9.5752, "step": 49850 }, { "epoch": 0.2489949811480936, "grad_norm": 0.09316403418779373, "learning_rate": 2.258654784850685e-05, "loss": 9.5631, "step": 49860 }, { "epoch": 0.24904491997303305, "grad_norm": 0.0938340425491333, "learning_rate": 2.2585045933565296e-05, "loss": 9.5747, "step": 49870 }, { "epoch": 0.2490948587979725, "grad_norm": 0.094419926404953, "learning_rate": 2.2583544018623746e-05, "loss": 9.5744, "step": 49880 }, { "epoch": 0.24914479762291195, "grad_norm": 0.09464523941278458, "learning_rate": 2.2582042103682196e-05, "loss": 9.5676, "step": 49890 }, { "epoch": 0.2491947364478514, "grad_norm": 0.09369165450334549, "learning_rate": 2.2580540188740643e-05, "loss": 9.5752, "step": 49900 }, { "epoch": 0.24924467527279084, "grad_norm": 0.09760770201683044, "learning_rate": 2.2579038273799096e-05, "loss": 9.5642, "step": 49910 }, { "epoch": 0.2492946140977303, "grad_norm": 0.09786707162857056, "learning_rate": 2.2577536358857543e-05, "loss": 9.5733, "step": 49920 }, { "epoch": 0.24934455292266974, "grad_norm": 0.09324178099632263, "learning_rate": 2.2576034443915993e-05, "loss": 9.5683, "step": 49930 }, { "epoch": 0.24939449174760916, "grad_norm": 0.09547551721334457, "learning_rate": 2.2574532528974444e-05, "loss": 9.5693, "step": 49940 }, { "epoch": 0.24944443057254861, "grad_norm": 0.09578131884336472, "learning_rate": 2.257303061403289e-05, "loss": 9.5682, "step": 49950 }, { "epoch": 0.24949436939748806, "grad_norm": 0.09277050942182541, "learning_rate": 2.2571528699091344e-05, "loss": 9.5733, "step": 49960 }, { "epoch": 0.2495443082224275, "grad_norm": 0.09631931036710739, "learning_rate": 2.257002678414979e-05, "loss": 9.5709, "step": 49970 }, { "epoch": 0.24959424704736696, "grad_norm": 0.09734215587377548, "learning_rate": 2.256852486920824e-05, "loss": 9.5697, "step": 49980 }, { "epoch": 0.2496441858723064, "grad_norm": 0.09608133882284164, "learning_rate": 2.256702295426669e-05, "loss": 9.5712, "step": 49990 }, { "epoch": 0.24969412469724586, "grad_norm": 0.09397196024656296, "learning_rate": 2.2565521039325138e-05, "loss": 9.571, "step": 50000 }, { "epoch": 0.2497440635221853, "grad_norm": 0.09108542650938034, "learning_rate": 2.256401912438359e-05, "loss": 9.5636, "step": 50010 }, { "epoch": 0.24979400234712476, "grad_norm": 0.09850990772247314, "learning_rate": 2.2562517209442038e-05, "loss": 9.5599, "step": 50020 }, { "epoch": 0.2498439411720642, "grad_norm": 0.10261461138725281, "learning_rate": 2.256101529450049e-05, "loss": 9.5668, "step": 50030 }, { "epoch": 0.24989387999700366, "grad_norm": 0.09602981060743332, "learning_rate": 2.255951337955894e-05, "loss": 9.5784, "step": 50040 }, { "epoch": 0.2499438188219431, "grad_norm": 0.10035931318998337, "learning_rate": 2.2558011464617385e-05, "loss": 9.5678, "step": 50050 }, { "epoch": 0.24999375764688256, "grad_norm": 0.09186173975467682, "learning_rate": 2.255650954967584e-05, "loss": 9.5682, "step": 50060 }, { "epoch": 0.250043696471822, "grad_norm": 0.09531144052743912, "learning_rate": 2.2555007634734286e-05, "loss": 9.5753, "step": 50070 }, { "epoch": 0.2500936352967615, "grad_norm": 0.0925971195101738, "learning_rate": 2.2553505719792736e-05, "loss": 9.5576, "step": 50080 }, { "epoch": 0.2501435741217009, "grad_norm": 0.10341840982437134, "learning_rate": 2.2552003804851186e-05, "loss": 9.5649, "step": 50090 }, { "epoch": 0.2501935129466404, "grad_norm": 0.08949480205774307, "learning_rate": 2.2550501889909633e-05, "loss": 9.5681, "step": 50100 }, { "epoch": 0.2502434517715798, "grad_norm": 0.0952087789773941, "learning_rate": 2.2548999974968086e-05, "loss": 9.5719, "step": 50110 }, { "epoch": 0.2502933905965193, "grad_norm": 0.09297273308038712, "learning_rate": 2.2547498060026533e-05, "loss": 9.5602, "step": 50120 }, { "epoch": 0.2503433294214587, "grad_norm": 0.09558509290218353, "learning_rate": 2.2545996145084983e-05, "loss": 9.5775, "step": 50130 }, { "epoch": 0.2503932682463982, "grad_norm": 0.09406739473342896, "learning_rate": 2.2544494230143434e-05, "loss": 9.5728, "step": 50140 }, { "epoch": 0.2504432070713376, "grad_norm": 0.08946365118026733, "learning_rate": 2.254299231520188e-05, "loss": 9.5646, "step": 50150 }, { "epoch": 0.2504931458962771, "grad_norm": 0.0948345735669136, "learning_rate": 2.2541490400260334e-05, "loss": 9.5688, "step": 50160 }, { "epoch": 0.2505430847212165, "grad_norm": 0.09642423689365387, "learning_rate": 2.253998848531878e-05, "loss": 9.5646, "step": 50170 }, { "epoch": 0.250593023546156, "grad_norm": 0.09099261462688446, "learning_rate": 2.253848657037723e-05, "loss": 9.5648, "step": 50180 }, { "epoch": 0.2506429623710954, "grad_norm": 0.09220701456069946, "learning_rate": 2.253698465543568e-05, "loss": 9.5665, "step": 50190 }, { "epoch": 0.2506929011960349, "grad_norm": 0.09915616363286972, "learning_rate": 2.253548274049413e-05, "loss": 9.5697, "step": 50200 }, { "epoch": 0.2507428400209743, "grad_norm": 0.0929388552904129, "learning_rate": 2.253398082555258e-05, "loss": 9.5623, "step": 50210 }, { "epoch": 0.2507927788459138, "grad_norm": 0.09379485994577408, "learning_rate": 2.2532478910611028e-05, "loss": 9.5631, "step": 50220 }, { "epoch": 0.2508427176708532, "grad_norm": 0.0981220230460167, "learning_rate": 2.253097699566948e-05, "loss": 9.5638, "step": 50230 }, { "epoch": 0.2508926564957927, "grad_norm": 0.09549273550510406, "learning_rate": 2.252947508072793e-05, "loss": 9.5607, "step": 50240 }, { "epoch": 0.2509425953207321, "grad_norm": 0.09716612845659256, "learning_rate": 2.252797316578638e-05, "loss": 9.5685, "step": 50250 }, { "epoch": 0.2509925341456716, "grad_norm": 0.09582599252462387, "learning_rate": 2.252647125084483e-05, "loss": 9.5682, "step": 50260 }, { "epoch": 0.251042472970611, "grad_norm": 0.09806785732507706, "learning_rate": 2.2524969335903276e-05, "loss": 9.5639, "step": 50270 }, { "epoch": 0.2510924117955505, "grad_norm": 0.10009253770112991, "learning_rate": 2.2523467420961726e-05, "loss": 9.5696, "step": 50280 }, { "epoch": 0.2511423506204899, "grad_norm": 0.10564589500427246, "learning_rate": 2.2521965506020176e-05, "loss": 9.5722, "step": 50290 }, { "epoch": 0.2511922894454294, "grad_norm": 0.09391903877258301, "learning_rate": 2.2520463591078626e-05, "loss": 9.5661, "step": 50300 }, { "epoch": 0.2512422282703688, "grad_norm": 0.09433561563491821, "learning_rate": 2.2518961676137076e-05, "loss": 9.5701, "step": 50310 }, { "epoch": 0.25129216709530827, "grad_norm": 0.09320453554391861, "learning_rate": 2.2517459761195523e-05, "loss": 9.5589, "step": 50320 }, { "epoch": 0.2513421059202477, "grad_norm": 0.0926797166466713, "learning_rate": 2.2515957846253973e-05, "loss": 9.5641, "step": 50330 }, { "epoch": 0.25139204474518717, "grad_norm": 0.09965445101261139, "learning_rate": 2.2514455931312424e-05, "loss": 9.572, "step": 50340 }, { "epoch": 0.2514419835701266, "grad_norm": 0.09860392659902573, "learning_rate": 2.2512954016370874e-05, "loss": 9.569, "step": 50350 }, { "epoch": 0.25149192239506607, "grad_norm": 0.09547075629234314, "learning_rate": 2.2511452101429324e-05, "loss": 9.5689, "step": 50360 }, { "epoch": 0.2515418612200055, "grad_norm": 0.09795694053173065, "learning_rate": 2.250995018648777e-05, "loss": 9.5628, "step": 50370 }, { "epoch": 0.25159180004494497, "grad_norm": 0.09307505935430527, "learning_rate": 2.250844827154622e-05, "loss": 9.5662, "step": 50380 }, { "epoch": 0.2516417388698844, "grad_norm": 0.10071365535259247, "learning_rate": 2.250694635660467e-05, "loss": 9.569, "step": 50390 }, { "epoch": 0.25169167769482387, "grad_norm": 0.09713767468929291, "learning_rate": 2.250544444166312e-05, "loss": 9.5577, "step": 50400 }, { "epoch": 0.2517416165197633, "grad_norm": 0.09521119296550751, "learning_rate": 2.250394252672157e-05, "loss": 9.5673, "step": 50410 }, { "epoch": 0.25179155534470277, "grad_norm": 0.09349004924297333, "learning_rate": 2.2502440611780018e-05, "loss": 9.5571, "step": 50420 }, { "epoch": 0.2518414941696422, "grad_norm": 0.0950050801038742, "learning_rate": 2.250093869683847e-05, "loss": 9.5636, "step": 50430 }, { "epoch": 0.2518914329945816, "grad_norm": 0.09199751913547516, "learning_rate": 2.249943678189692e-05, "loss": 9.5595, "step": 50440 }, { "epoch": 0.2519413718195211, "grad_norm": 0.09100238978862762, "learning_rate": 2.249793486695537e-05, "loss": 9.5609, "step": 50450 }, { "epoch": 0.2519913106444605, "grad_norm": 0.09246649593114853, "learning_rate": 2.249643295201382e-05, "loss": 9.5659, "step": 50460 }, { "epoch": 0.2520412494694, "grad_norm": 0.09442534297704697, "learning_rate": 2.2494931037072266e-05, "loss": 9.5646, "step": 50470 }, { "epoch": 0.2520911882943394, "grad_norm": 0.08858856558799744, "learning_rate": 2.249342912213072e-05, "loss": 9.5523, "step": 50480 }, { "epoch": 0.2521411271192789, "grad_norm": 0.09395717829465866, "learning_rate": 2.2491927207189166e-05, "loss": 9.5661, "step": 50490 }, { "epoch": 0.2521910659442183, "grad_norm": 0.101358562707901, "learning_rate": 2.2490425292247616e-05, "loss": 9.5592, "step": 50500 }, { "epoch": 0.2522410047691578, "grad_norm": 0.09509819746017456, "learning_rate": 2.2488923377306066e-05, "loss": 9.5605, "step": 50510 }, { "epoch": 0.2522909435940972, "grad_norm": 0.09509771317243576, "learning_rate": 2.2487421462364517e-05, "loss": 9.561, "step": 50520 }, { "epoch": 0.2523408824190367, "grad_norm": 0.09046350419521332, "learning_rate": 2.2485919547422967e-05, "loss": 9.5595, "step": 50530 }, { "epoch": 0.2523908212439761, "grad_norm": 0.09777220338582993, "learning_rate": 2.2484417632481414e-05, "loss": 9.5526, "step": 50540 }, { "epoch": 0.2524407600689156, "grad_norm": 0.09290176630020142, "learning_rate": 2.2482915717539864e-05, "loss": 9.562, "step": 50550 }, { "epoch": 0.252490698893855, "grad_norm": 0.0935697928071022, "learning_rate": 2.2481413802598314e-05, "loss": 9.5649, "step": 50560 }, { "epoch": 0.2525406377187945, "grad_norm": 0.09431051462888718, "learning_rate": 2.2479911887656764e-05, "loss": 9.5558, "step": 50570 }, { "epoch": 0.2525905765437339, "grad_norm": 0.09408137947320938, "learning_rate": 2.2478409972715214e-05, "loss": 9.5587, "step": 50580 }, { "epoch": 0.2526405153686734, "grad_norm": 0.0971836969256401, "learning_rate": 2.247690805777366e-05, "loss": 9.5572, "step": 50590 }, { "epoch": 0.2526904541936128, "grad_norm": 0.09811126440763474, "learning_rate": 2.247540614283211e-05, "loss": 9.5591, "step": 50600 }, { "epoch": 0.2527403930185523, "grad_norm": 0.09719320386648178, "learning_rate": 2.247390422789056e-05, "loss": 9.5642, "step": 50610 }, { "epoch": 0.2527903318434917, "grad_norm": 0.09279009699821472, "learning_rate": 2.247240231294901e-05, "loss": 9.554, "step": 50620 }, { "epoch": 0.2528402706684312, "grad_norm": 0.09156560897827148, "learning_rate": 2.2470900398007462e-05, "loss": 9.5613, "step": 50630 }, { "epoch": 0.2528902094933706, "grad_norm": 0.08856366574764252, "learning_rate": 2.246939848306591e-05, "loss": 9.5541, "step": 50640 }, { "epoch": 0.2529401483183101, "grad_norm": 0.09104395657777786, "learning_rate": 2.246789656812436e-05, "loss": 9.5616, "step": 50650 }, { "epoch": 0.2529900871432495, "grad_norm": 0.09743545204401016, "learning_rate": 2.246639465318281e-05, "loss": 9.5614, "step": 50660 }, { "epoch": 0.253040025968189, "grad_norm": 0.09513422101736069, "learning_rate": 2.246489273824126e-05, "loss": 9.5536, "step": 50670 }, { "epoch": 0.2530899647931284, "grad_norm": 0.09526988118886948, "learning_rate": 2.246339082329971e-05, "loss": 9.5638, "step": 50680 }, { "epoch": 0.2531399036180679, "grad_norm": 0.0918373093008995, "learning_rate": 2.2461888908358156e-05, "loss": 9.5538, "step": 50690 }, { "epoch": 0.2531898424430073, "grad_norm": 0.09240086376667023, "learning_rate": 2.2460386993416606e-05, "loss": 9.5696, "step": 50700 }, { "epoch": 0.2532397812679468, "grad_norm": 0.09589746594429016, "learning_rate": 2.2458885078475056e-05, "loss": 9.5527, "step": 50710 }, { "epoch": 0.2532897200928862, "grad_norm": 0.0918283686041832, "learning_rate": 2.2457383163533507e-05, "loss": 9.5575, "step": 50720 }, { "epoch": 0.25333965891782567, "grad_norm": 0.09194007515907288, "learning_rate": 2.2455881248591957e-05, "loss": 9.5578, "step": 50730 }, { "epoch": 0.2533895977427651, "grad_norm": 0.09658579528331757, "learning_rate": 2.2454379333650404e-05, "loss": 9.5613, "step": 50740 }, { "epoch": 0.25343953656770457, "grad_norm": 0.09084783494472504, "learning_rate": 2.2452877418708854e-05, "loss": 9.5606, "step": 50750 }, { "epoch": 0.253489475392644, "grad_norm": 0.09604687243700027, "learning_rate": 2.2451375503767304e-05, "loss": 9.5488, "step": 50760 }, { "epoch": 0.25353941421758347, "grad_norm": 0.08728381246328354, "learning_rate": 2.2449873588825754e-05, "loss": 9.555, "step": 50770 }, { "epoch": 0.2535893530425229, "grad_norm": 0.09549268335103989, "learning_rate": 2.2448371673884204e-05, "loss": 9.5534, "step": 50780 }, { "epoch": 0.25363929186746237, "grad_norm": 0.0957789346575737, "learning_rate": 2.244686975894265e-05, "loss": 9.5565, "step": 50790 }, { "epoch": 0.2536892306924018, "grad_norm": 0.09618537127971649, "learning_rate": 2.24453678440011e-05, "loss": 9.5515, "step": 50800 }, { "epoch": 0.25373916951734127, "grad_norm": 0.0949334055185318, "learning_rate": 2.244386592905955e-05, "loss": 9.5482, "step": 50810 }, { "epoch": 0.2537891083422807, "grad_norm": 0.09194260835647583, "learning_rate": 2.2442364014118e-05, "loss": 9.5547, "step": 50820 }, { "epoch": 0.25383904716722017, "grad_norm": 0.09208869934082031, "learning_rate": 2.2440862099176452e-05, "loss": 9.561, "step": 50830 }, { "epoch": 0.2538889859921596, "grad_norm": 0.0971684604883194, "learning_rate": 2.2439360184234902e-05, "loss": 9.559, "step": 50840 }, { "epoch": 0.25393892481709907, "grad_norm": 0.09714303910732269, "learning_rate": 2.243785826929335e-05, "loss": 9.5505, "step": 50850 }, { "epoch": 0.2539888636420385, "grad_norm": 0.09106447547674179, "learning_rate": 2.24363563543518e-05, "loss": 9.5475, "step": 50860 }, { "epoch": 0.25403880246697796, "grad_norm": 0.09171400219202042, "learning_rate": 2.243485443941025e-05, "loss": 9.5539, "step": 50870 }, { "epoch": 0.2540887412919174, "grad_norm": 0.09836923331022263, "learning_rate": 2.24333525244687e-05, "loss": 9.5509, "step": 50880 }, { "epoch": 0.25413868011685686, "grad_norm": 0.09668246656656265, "learning_rate": 2.243185060952715e-05, "loss": 9.5561, "step": 50890 }, { "epoch": 0.2541886189417963, "grad_norm": 0.09654278308153152, "learning_rate": 2.2430348694585596e-05, "loss": 9.5565, "step": 50900 }, { "epoch": 0.25423855776673576, "grad_norm": 0.09737430512905121, "learning_rate": 2.2428846779644046e-05, "loss": 9.5577, "step": 50910 }, { "epoch": 0.2542884965916752, "grad_norm": 0.09896323084831238, "learning_rate": 2.2427344864702497e-05, "loss": 9.5504, "step": 50920 }, { "epoch": 0.25433843541661466, "grad_norm": 0.09133461862802505, "learning_rate": 2.2425842949760947e-05, "loss": 9.5474, "step": 50930 }, { "epoch": 0.2543883742415541, "grad_norm": 0.09348398447036743, "learning_rate": 2.2424341034819397e-05, "loss": 9.5559, "step": 50940 }, { "epoch": 0.25443831306649356, "grad_norm": 0.09214718639850616, "learning_rate": 2.2422839119877844e-05, "loss": 9.5534, "step": 50950 }, { "epoch": 0.254488251891433, "grad_norm": 0.09419059008359909, "learning_rate": 2.2421337204936294e-05, "loss": 9.5525, "step": 50960 }, { "epoch": 0.25453819071637246, "grad_norm": 0.10455143451690674, "learning_rate": 2.2419835289994744e-05, "loss": 9.5567, "step": 50970 }, { "epoch": 0.2545881295413119, "grad_norm": 0.09650573879480362, "learning_rate": 2.2418333375053194e-05, "loss": 9.5585, "step": 50980 }, { "epoch": 0.25463806836625136, "grad_norm": 0.09631022065877914, "learning_rate": 2.2416831460111644e-05, "loss": 9.5525, "step": 50990 }, { "epoch": 0.2546880071911908, "grad_norm": 0.09867773205041885, "learning_rate": 2.241532954517009e-05, "loss": 9.5604, "step": 51000 }, { "epoch": 0.25473794601613026, "grad_norm": 0.09457637369632721, "learning_rate": 2.241382763022854e-05, "loss": 9.5552, "step": 51010 }, { "epoch": 0.2547878848410697, "grad_norm": 0.09107904881238937, "learning_rate": 2.241232571528699e-05, "loss": 9.5587, "step": 51020 }, { "epoch": 0.25483782366600916, "grad_norm": 0.0893888846039772, "learning_rate": 2.2410823800345442e-05, "loss": 9.5496, "step": 51030 }, { "epoch": 0.2548877624909486, "grad_norm": 0.09376450628042221, "learning_rate": 2.2409321885403892e-05, "loss": 9.5533, "step": 51040 }, { "epoch": 0.25493770131588805, "grad_norm": 0.09373288601636887, "learning_rate": 2.240781997046234e-05, "loss": 9.5508, "step": 51050 }, { "epoch": 0.2549876401408275, "grad_norm": 0.08769457787275314, "learning_rate": 2.240631805552079e-05, "loss": 9.5622, "step": 51060 }, { "epoch": 0.25503757896576695, "grad_norm": 0.09537973999977112, "learning_rate": 2.240481614057924e-05, "loss": 9.5541, "step": 51070 }, { "epoch": 0.2550875177907064, "grad_norm": 0.09325198084115982, "learning_rate": 2.240331422563769e-05, "loss": 9.5471, "step": 51080 }, { "epoch": 0.25513745661564585, "grad_norm": 0.08798488229513168, "learning_rate": 2.240181231069614e-05, "loss": 9.5516, "step": 51090 }, { "epoch": 0.2551873954405853, "grad_norm": 0.09604234993457794, "learning_rate": 2.2400310395754586e-05, "loss": 9.5472, "step": 51100 }, { "epoch": 0.25523733426552475, "grad_norm": 0.0939023569226265, "learning_rate": 2.2398808480813036e-05, "loss": 9.5409, "step": 51110 }, { "epoch": 0.2552872730904642, "grad_norm": 0.09348843991756439, "learning_rate": 2.2397306565871487e-05, "loss": 9.5484, "step": 51120 }, { "epoch": 0.25533721191540365, "grad_norm": 0.0899486169219017, "learning_rate": 2.2395804650929937e-05, "loss": 9.547, "step": 51130 }, { "epoch": 0.25538715074034307, "grad_norm": 0.0945720225572586, "learning_rate": 2.2394302735988387e-05, "loss": 9.5554, "step": 51140 }, { "epoch": 0.25543708956528255, "grad_norm": 0.0940125435590744, "learning_rate": 2.2392800821046834e-05, "loss": 9.5527, "step": 51150 }, { "epoch": 0.25548702839022197, "grad_norm": 0.09849490225315094, "learning_rate": 2.2391298906105287e-05, "loss": 9.5505, "step": 51160 }, { "epoch": 0.25553696721516145, "grad_norm": 0.08866614103317261, "learning_rate": 2.2389796991163734e-05, "loss": 9.5502, "step": 51170 }, { "epoch": 0.25558690604010087, "grad_norm": 0.09326382726430893, "learning_rate": 2.2388295076222184e-05, "loss": 9.5456, "step": 51180 }, { "epoch": 0.25563684486504035, "grad_norm": 0.09560631960630417, "learning_rate": 2.2386793161280634e-05, "loss": 9.544, "step": 51190 }, { "epoch": 0.25568678368997977, "grad_norm": 0.09622514992952347, "learning_rate": 2.238529124633908e-05, "loss": 9.5561, "step": 51200 }, { "epoch": 0.25573672251491925, "grad_norm": 0.09464744478464127, "learning_rate": 2.2383789331397535e-05, "loss": 9.5411, "step": 51210 }, { "epoch": 0.25578666133985867, "grad_norm": 0.09231311827898026, "learning_rate": 2.238228741645598e-05, "loss": 9.5537, "step": 51220 }, { "epoch": 0.25583660016479814, "grad_norm": 0.09209968149662018, "learning_rate": 2.2380785501514432e-05, "loss": 9.5526, "step": 51230 }, { "epoch": 0.25588653898973757, "grad_norm": 0.09474463015794754, "learning_rate": 2.2379283586572882e-05, "loss": 9.5419, "step": 51240 }, { "epoch": 0.25593647781467704, "grad_norm": 0.09012725204229355, "learning_rate": 2.237778167163133e-05, "loss": 9.5522, "step": 51250 }, { "epoch": 0.25598641663961647, "grad_norm": 0.09033194929361343, "learning_rate": 2.2376279756689782e-05, "loss": 9.5544, "step": 51260 }, { "epoch": 0.25603635546455594, "grad_norm": 0.0948658287525177, "learning_rate": 2.237477784174823e-05, "loss": 9.546, "step": 51270 }, { "epoch": 0.25608629428949536, "grad_norm": 0.09429352730512619, "learning_rate": 2.237327592680668e-05, "loss": 9.5531, "step": 51280 }, { "epoch": 0.25613623311443484, "grad_norm": 0.09330032765865326, "learning_rate": 2.237177401186513e-05, "loss": 9.5449, "step": 51290 }, { "epoch": 0.25618617193937426, "grad_norm": 0.08929108828306198, "learning_rate": 2.2370272096923576e-05, "loss": 9.551, "step": 51300 }, { "epoch": 0.25623611076431374, "grad_norm": 0.0942007303237915, "learning_rate": 2.236877018198203e-05, "loss": 9.5506, "step": 51310 }, { "epoch": 0.25628604958925316, "grad_norm": 0.09233163297176361, "learning_rate": 2.2367268267040477e-05, "loss": 9.5422, "step": 51320 }, { "epoch": 0.25633598841419264, "grad_norm": 0.09945209324359894, "learning_rate": 2.2365766352098927e-05, "loss": 9.539, "step": 51330 }, { "epoch": 0.25638592723913206, "grad_norm": 0.09467910975217819, "learning_rate": 2.2364264437157377e-05, "loss": 9.551, "step": 51340 }, { "epoch": 0.25643586606407154, "grad_norm": 0.09829029440879822, "learning_rate": 2.2362762522215824e-05, "loss": 9.5447, "step": 51350 }, { "epoch": 0.25648580488901096, "grad_norm": 0.09287157654762268, "learning_rate": 2.2361260607274277e-05, "loss": 9.5401, "step": 51360 }, { "epoch": 0.25653574371395044, "grad_norm": 0.09296029806137085, "learning_rate": 2.2359758692332724e-05, "loss": 9.5441, "step": 51370 }, { "epoch": 0.25658568253888986, "grad_norm": 0.09422246366739273, "learning_rate": 2.2358256777391174e-05, "loss": 9.5485, "step": 51380 }, { "epoch": 0.25663562136382934, "grad_norm": 0.0900813490152359, "learning_rate": 2.2356754862449624e-05, "loss": 9.5456, "step": 51390 }, { "epoch": 0.25668556018876876, "grad_norm": 0.09812017530202866, "learning_rate": 2.235525294750807e-05, "loss": 9.5497, "step": 51400 }, { "epoch": 0.25673549901370823, "grad_norm": 0.09745459258556366, "learning_rate": 2.2353751032566525e-05, "loss": 9.5479, "step": 51410 }, { "epoch": 0.25678543783864766, "grad_norm": 0.09394165873527527, "learning_rate": 2.235224911762497e-05, "loss": 9.5428, "step": 51420 }, { "epoch": 0.2568353766635871, "grad_norm": 0.09442657977342606, "learning_rate": 2.2350747202683422e-05, "loss": 9.5507, "step": 51430 }, { "epoch": 0.25688531548852656, "grad_norm": 0.10503870993852615, "learning_rate": 2.2349245287741872e-05, "loss": 9.5499, "step": 51440 }, { "epoch": 0.256935254313466, "grad_norm": 0.09587554633617401, "learning_rate": 2.234774337280032e-05, "loss": 9.5433, "step": 51450 }, { "epoch": 0.25698519313840545, "grad_norm": 0.09741827845573425, "learning_rate": 2.2346241457858772e-05, "loss": 9.5378, "step": 51460 }, { "epoch": 0.2570351319633449, "grad_norm": 0.09930811822414398, "learning_rate": 2.234473954291722e-05, "loss": 9.5404, "step": 51470 }, { "epoch": 0.25708507078828435, "grad_norm": 0.09290921688079834, "learning_rate": 2.2343237627975673e-05, "loss": 9.5358, "step": 51480 }, { "epoch": 0.2571350096132238, "grad_norm": 0.09306775778532028, "learning_rate": 2.234173571303412e-05, "loss": 9.5465, "step": 51490 }, { "epoch": 0.25718494843816325, "grad_norm": 0.09969160705804825, "learning_rate": 2.2340233798092566e-05, "loss": 9.5441, "step": 51500 }, { "epoch": 0.2572348872631027, "grad_norm": 0.0934043824672699, "learning_rate": 2.233873188315102e-05, "loss": 9.5415, "step": 51510 }, { "epoch": 0.25728482608804215, "grad_norm": 0.10059630125761032, "learning_rate": 2.2337229968209467e-05, "loss": 9.5438, "step": 51520 }, { "epoch": 0.2573347649129816, "grad_norm": 0.09175658226013184, "learning_rate": 2.233572805326792e-05, "loss": 9.5439, "step": 51530 }, { "epoch": 0.25738470373792105, "grad_norm": 0.09227610379457474, "learning_rate": 2.2334226138326367e-05, "loss": 9.545, "step": 51540 }, { "epoch": 0.25743464256286047, "grad_norm": 0.09750757366418839, "learning_rate": 2.2332724223384814e-05, "loss": 9.5387, "step": 51550 }, { "epoch": 0.25748458138779995, "grad_norm": 0.09199635684490204, "learning_rate": 2.2331222308443267e-05, "loss": 9.5451, "step": 51560 }, { "epoch": 0.25753452021273937, "grad_norm": 0.09396953880786896, "learning_rate": 2.2329720393501714e-05, "loss": 9.5419, "step": 51570 }, { "epoch": 0.25758445903767885, "grad_norm": 0.0948280543088913, "learning_rate": 2.2328218478560168e-05, "loss": 9.543, "step": 51580 }, { "epoch": 0.25763439786261827, "grad_norm": 0.09518975019454956, "learning_rate": 2.2326716563618615e-05, "loss": 9.5468, "step": 51590 }, { "epoch": 0.25768433668755775, "grad_norm": 0.0924508273601532, "learning_rate": 2.232521464867706e-05, "loss": 9.5395, "step": 51600 }, { "epoch": 0.25773427551249717, "grad_norm": 0.09529732912778854, "learning_rate": 2.2323712733735515e-05, "loss": 9.54, "step": 51610 }, { "epoch": 0.25778421433743665, "grad_norm": 0.09561322629451752, "learning_rate": 2.232221081879396e-05, "loss": 9.5452, "step": 51620 }, { "epoch": 0.25783415316237607, "grad_norm": 0.0942932739853859, "learning_rate": 2.2320708903852415e-05, "loss": 9.5309, "step": 51630 }, { "epoch": 0.25788409198731554, "grad_norm": 0.09694642573595047, "learning_rate": 2.2319206988910862e-05, "loss": 9.5398, "step": 51640 }, { "epoch": 0.25793403081225497, "grad_norm": 0.09523008018732071, "learning_rate": 2.231770507396931e-05, "loss": 9.536, "step": 51650 }, { "epoch": 0.25798396963719444, "grad_norm": 0.09009785205125809, "learning_rate": 2.2316203159027762e-05, "loss": 9.5383, "step": 51660 }, { "epoch": 0.25803390846213387, "grad_norm": 0.09198308736085892, "learning_rate": 2.231470124408621e-05, "loss": 9.5422, "step": 51670 }, { "epoch": 0.25808384728707334, "grad_norm": 0.0955272763967514, "learning_rate": 2.2313199329144663e-05, "loss": 9.5352, "step": 51680 }, { "epoch": 0.25813378611201276, "grad_norm": 0.0900355726480484, "learning_rate": 2.231169741420311e-05, "loss": 9.5293, "step": 51690 }, { "epoch": 0.25818372493695224, "grad_norm": 0.0917731374502182, "learning_rate": 2.2310195499261556e-05, "loss": 9.5424, "step": 51700 }, { "epoch": 0.25823366376189166, "grad_norm": 0.09225501120090485, "learning_rate": 2.230869358432001e-05, "loss": 9.5442, "step": 51710 }, { "epoch": 0.25828360258683114, "grad_norm": 0.09712720662355423, "learning_rate": 2.2307191669378457e-05, "loss": 9.5388, "step": 51720 }, { "epoch": 0.25833354141177056, "grad_norm": 0.09137700498104095, "learning_rate": 2.230568975443691e-05, "loss": 9.5402, "step": 51730 }, { "epoch": 0.25838348023671004, "grad_norm": 0.09159164875745773, "learning_rate": 2.2304187839495357e-05, "loss": 9.5458, "step": 51740 }, { "epoch": 0.25843341906164946, "grad_norm": 0.09769763052463531, "learning_rate": 2.2302685924553804e-05, "loss": 9.5476, "step": 51750 }, { "epoch": 0.25848335788658894, "grad_norm": 0.09766492992639542, "learning_rate": 2.2301184009612257e-05, "loss": 9.5393, "step": 51760 }, { "epoch": 0.25853329671152836, "grad_norm": 0.09677687287330627, "learning_rate": 2.2299682094670704e-05, "loss": 9.534, "step": 51770 }, { "epoch": 0.25858323553646784, "grad_norm": 0.09470915794372559, "learning_rate": 2.2298180179729158e-05, "loss": 9.5395, "step": 51780 }, { "epoch": 0.25863317436140726, "grad_norm": 0.08983705192804337, "learning_rate": 2.2296678264787605e-05, "loss": 9.5329, "step": 51790 }, { "epoch": 0.25868311318634674, "grad_norm": 0.10104523599147797, "learning_rate": 2.2295176349846055e-05, "loss": 9.5324, "step": 51800 }, { "epoch": 0.25873305201128616, "grad_norm": 0.09755957871675491, "learning_rate": 2.2293674434904505e-05, "loss": 9.5345, "step": 51810 }, { "epoch": 0.25878299083622563, "grad_norm": 0.09510654956102371, "learning_rate": 2.229217251996295e-05, "loss": 9.5396, "step": 51820 }, { "epoch": 0.25883292966116506, "grad_norm": 0.10025627911090851, "learning_rate": 2.2290670605021405e-05, "loss": 9.541, "step": 51830 }, { "epoch": 0.25888286848610453, "grad_norm": 0.0947519913315773, "learning_rate": 2.2289168690079852e-05, "loss": 9.5382, "step": 51840 }, { "epoch": 0.25893280731104396, "grad_norm": 0.09344780445098877, "learning_rate": 2.2287666775138302e-05, "loss": 9.5367, "step": 51850 }, { "epoch": 0.25898274613598343, "grad_norm": 0.09429247677326202, "learning_rate": 2.2286164860196752e-05, "loss": 9.535, "step": 51860 }, { "epoch": 0.25903268496092285, "grad_norm": 0.09451945871114731, "learning_rate": 2.22846629452552e-05, "loss": 9.5271, "step": 51870 }, { "epoch": 0.25908262378586233, "grad_norm": 0.09508265554904938, "learning_rate": 2.2283161030313653e-05, "loss": 9.5307, "step": 51880 }, { "epoch": 0.25913256261080175, "grad_norm": 0.09736274182796478, "learning_rate": 2.22816591153721e-05, "loss": 9.5394, "step": 51890 }, { "epoch": 0.25918250143574123, "grad_norm": 0.09334583580493927, "learning_rate": 2.228015720043055e-05, "loss": 9.5279, "step": 51900 }, { "epoch": 0.25923244026068065, "grad_norm": 0.0936061292886734, "learning_rate": 2.2278655285489e-05, "loss": 9.5434, "step": 51910 }, { "epoch": 0.25928237908562013, "grad_norm": 0.09247171133756638, "learning_rate": 2.2277153370547447e-05, "loss": 9.5407, "step": 51920 }, { "epoch": 0.25933231791055955, "grad_norm": 0.09118923544883728, "learning_rate": 2.22756514556059e-05, "loss": 9.533, "step": 51930 }, { "epoch": 0.25938225673549903, "grad_norm": 0.09702643752098083, "learning_rate": 2.2274149540664347e-05, "loss": 9.5396, "step": 51940 }, { "epoch": 0.25943219556043845, "grad_norm": 0.0896030142903328, "learning_rate": 2.2272647625722797e-05, "loss": 9.5384, "step": 51950 }, { "epoch": 0.2594821343853779, "grad_norm": 0.09771367162466049, "learning_rate": 2.2271145710781247e-05, "loss": 9.531, "step": 51960 }, { "epoch": 0.25953207321031735, "grad_norm": 0.0948578342795372, "learning_rate": 2.2269643795839694e-05, "loss": 9.5386, "step": 51970 }, { "epoch": 0.2595820120352568, "grad_norm": 0.10098248720169067, "learning_rate": 2.2268141880898148e-05, "loss": 9.5335, "step": 51980 }, { "epoch": 0.25963195086019625, "grad_norm": 0.08970336616039276, "learning_rate": 2.2266639965956595e-05, "loss": 9.5386, "step": 51990 }, { "epoch": 0.2596818896851357, "grad_norm": 0.09408001601696014, "learning_rate": 2.2265138051015045e-05, "loss": 9.5355, "step": 52000 }, { "epoch": 0.25973182851007515, "grad_norm": 0.08999434113502502, "learning_rate": 2.2263636136073495e-05, "loss": 9.5373, "step": 52010 }, { "epoch": 0.2597817673350146, "grad_norm": 0.09277333319187164, "learning_rate": 2.226213422113194e-05, "loss": 9.5405, "step": 52020 }, { "epoch": 0.25983170615995405, "grad_norm": 0.09205369651317596, "learning_rate": 2.2260632306190395e-05, "loss": 9.5254, "step": 52030 }, { "epoch": 0.2598816449848935, "grad_norm": 0.09553728252649307, "learning_rate": 2.2259130391248842e-05, "loss": 9.5262, "step": 52040 }, { "epoch": 0.25993158380983294, "grad_norm": 0.08813092112541199, "learning_rate": 2.2257628476307292e-05, "loss": 9.536, "step": 52050 }, { "epoch": 0.2599815226347724, "grad_norm": 0.09862591326236725, "learning_rate": 2.2256126561365742e-05, "loss": 9.537, "step": 52060 }, { "epoch": 0.26003146145971184, "grad_norm": 0.09814270585775375, "learning_rate": 2.225462464642419e-05, "loss": 9.535, "step": 52070 }, { "epoch": 0.2600814002846513, "grad_norm": 0.08804214745759964, "learning_rate": 2.2253122731482643e-05, "loss": 9.5421, "step": 52080 }, { "epoch": 0.26013133910959074, "grad_norm": 0.09738943725824356, "learning_rate": 2.225162081654109e-05, "loss": 9.5307, "step": 52090 }, { "epoch": 0.2601812779345302, "grad_norm": 0.09532274305820465, "learning_rate": 2.225011890159954e-05, "loss": 9.541, "step": 52100 }, { "epoch": 0.26023121675946964, "grad_norm": 0.09409879148006439, "learning_rate": 2.224861698665799e-05, "loss": 9.5364, "step": 52110 }, { "epoch": 0.2602811555844091, "grad_norm": 0.09654781222343445, "learning_rate": 2.224711507171644e-05, "loss": 9.5443, "step": 52120 }, { "epoch": 0.26033109440934854, "grad_norm": 0.09525051712989807, "learning_rate": 2.224561315677489e-05, "loss": 9.5404, "step": 52130 }, { "epoch": 0.260381033234288, "grad_norm": 0.09068022668361664, "learning_rate": 2.2244111241833337e-05, "loss": 9.529, "step": 52140 }, { "epoch": 0.26043097205922744, "grad_norm": 0.09324584156274796, "learning_rate": 2.2242609326891787e-05, "loss": 9.5402, "step": 52150 }, { "epoch": 0.2604809108841669, "grad_norm": 0.0990164652466774, "learning_rate": 2.2241107411950237e-05, "loss": 9.5354, "step": 52160 }, { "epoch": 0.26053084970910634, "grad_norm": 0.09122282266616821, "learning_rate": 2.2239605497008688e-05, "loss": 9.538, "step": 52170 }, { "epoch": 0.2605807885340458, "grad_norm": 0.09253876656293869, "learning_rate": 2.2238103582067138e-05, "loss": 9.5287, "step": 52180 }, { "epoch": 0.26063072735898524, "grad_norm": 0.09256315976381302, "learning_rate": 2.2236601667125585e-05, "loss": 9.5346, "step": 52190 }, { "epoch": 0.2606806661839247, "grad_norm": 0.09512501209974289, "learning_rate": 2.2235099752184035e-05, "loss": 9.533, "step": 52200 }, { "epoch": 0.26073060500886414, "grad_norm": 0.09339733421802521, "learning_rate": 2.2233597837242485e-05, "loss": 9.5352, "step": 52210 }, { "epoch": 0.2607805438338036, "grad_norm": 0.10032928735017776, "learning_rate": 2.2232095922300935e-05, "loss": 9.5266, "step": 52220 }, { "epoch": 0.26083048265874303, "grad_norm": 0.09287399053573608, "learning_rate": 2.2230594007359385e-05, "loss": 9.5329, "step": 52230 }, { "epoch": 0.2608804214836825, "grad_norm": 0.09904525429010391, "learning_rate": 2.2229092092417832e-05, "loss": 9.534, "step": 52240 }, { "epoch": 0.26093036030862193, "grad_norm": 0.10111285001039505, "learning_rate": 2.2227590177476282e-05, "loss": 9.5349, "step": 52250 }, { "epoch": 0.2609802991335614, "grad_norm": 0.10239645838737488, "learning_rate": 2.2226088262534732e-05, "loss": 9.5331, "step": 52260 }, { "epoch": 0.26103023795850083, "grad_norm": 0.09370391070842743, "learning_rate": 2.2224586347593183e-05, "loss": 9.5251, "step": 52270 }, { "epoch": 0.2610801767834403, "grad_norm": 0.09311920404434204, "learning_rate": 2.2223084432651633e-05, "loss": 9.5298, "step": 52280 }, { "epoch": 0.26113011560837973, "grad_norm": 0.0964302271604538, "learning_rate": 2.222158251771008e-05, "loss": 9.5224, "step": 52290 }, { "epoch": 0.2611800544333192, "grad_norm": 0.09276770055294037, "learning_rate": 2.222008060276853e-05, "loss": 9.5257, "step": 52300 }, { "epoch": 0.26122999325825863, "grad_norm": 0.09495028853416443, "learning_rate": 2.221857868782698e-05, "loss": 9.5282, "step": 52310 }, { "epoch": 0.2612799320831981, "grad_norm": 0.09358720481395721, "learning_rate": 2.221707677288543e-05, "loss": 9.5337, "step": 52320 }, { "epoch": 0.26132987090813753, "grad_norm": 0.09454384446144104, "learning_rate": 2.221557485794388e-05, "loss": 9.5262, "step": 52330 }, { "epoch": 0.261379809733077, "grad_norm": 0.09213569760322571, "learning_rate": 2.2214072943002327e-05, "loss": 9.5294, "step": 52340 }, { "epoch": 0.26142974855801643, "grad_norm": 0.08967851847410202, "learning_rate": 2.2212571028060777e-05, "loss": 9.5267, "step": 52350 }, { "epoch": 0.2614796873829559, "grad_norm": 0.09236017614603043, "learning_rate": 2.2211069113119227e-05, "loss": 9.5257, "step": 52360 }, { "epoch": 0.2615296262078953, "grad_norm": 0.09722200036048889, "learning_rate": 2.2209567198177678e-05, "loss": 9.526, "step": 52370 }, { "epoch": 0.2615795650328348, "grad_norm": 0.09535818547010422, "learning_rate": 2.2208065283236128e-05, "loss": 9.5367, "step": 52380 }, { "epoch": 0.2616295038577742, "grad_norm": 0.09433511644601822, "learning_rate": 2.2206563368294575e-05, "loss": 9.5386, "step": 52390 }, { "epoch": 0.2616794426827137, "grad_norm": 0.09581523388624191, "learning_rate": 2.2205061453353025e-05, "loss": 9.5319, "step": 52400 }, { "epoch": 0.2617293815076531, "grad_norm": 0.0966779887676239, "learning_rate": 2.2203559538411475e-05, "loss": 9.5217, "step": 52410 }, { "epoch": 0.26177932033259255, "grad_norm": 0.09320307523012161, "learning_rate": 2.2202057623469925e-05, "loss": 9.5237, "step": 52420 }, { "epoch": 0.261829259157532, "grad_norm": 0.09114668518304825, "learning_rate": 2.2200555708528375e-05, "loss": 9.53, "step": 52430 }, { "epoch": 0.26187919798247145, "grad_norm": 0.09709704667329788, "learning_rate": 2.2199053793586825e-05, "loss": 9.5339, "step": 52440 }, { "epoch": 0.2619291368074109, "grad_norm": 0.09141276776790619, "learning_rate": 2.2197551878645272e-05, "loss": 9.52, "step": 52450 }, { "epoch": 0.26197907563235034, "grad_norm": 0.09864918142557144, "learning_rate": 2.2196049963703722e-05, "loss": 9.525, "step": 52460 }, { "epoch": 0.2620290144572898, "grad_norm": 0.09360401332378387, "learning_rate": 2.2194548048762173e-05, "loss": 9.5368, "step": 52470 }, { "epoch": 0.26207895328222924, "grad_norm": 0.09955708682537079, "learning_rate": 2.2193046133820623e-05, "loss": 9.5265, "step": 52480 }, { "epoch": 0.2621288921071687, "grad_norm": 0.09114803373813629, "learning_rate": 2.2191544218879073e-05, "loss": 9.5272, "step": 52490 }, { "epoch": 0.26217883093210814, "grad_norm": 0.09551452845335007, "learning_rate": 2.219004230393752e-05, "loss": 9.5356, "step": 52500 }, { "epoch": 0.2622287697570476, "grad_norm": 0.09264279901981354, "learning_rate": 2.218854038899597e-05, "loss": 9.5254, "step": 52510 }, { "epoch": 0.26227870858198704, "grad_norm": 0.09141562134027481, "learning_rate": 2.218703847405442e-05, "loss": 9.5245, "step": 52520 }, { "epoch": 0.2623286474069265, "grad_norm": 0.0920877605676651, "learning_rate": 2.218553655911287e-05, "loss": 9.5334, "step": 52530 }, { "epoch": 0.26237858623186594, "grad_norm": 0.09428731352090836, "learning_rate": 2.218403464417132e-05, "loss": 9.5331, "step": 52540 }, { "epoch": 0.2624285250568054, "grad_norm": 0.09918078780174255, "learning_rate": 2.2182532729229767e-05, "loss": 9.515, "step": 52550 }, { "epoch": 0.26247846388174484, "grad_norm": 0.08825034648180008, "learning_rate": 2.2181030814288217e-05, "loss": 9.5283, "step": 52560 }, { "epoch": 0.2625284027066843, "grad_norm": 0.09419302642345428, "learning_rate": 2.2179528899346668e-05, "loss": 9.5242, "step": 52570 }, { "epoch": 0.26257834153162374, "grad_norm": 0.09222215414047241, "learning_rate": 2.2178026984405118e-05, "loss": 9.5165, "step": 52580 }, { "epoch": 0.2626282803565632, "grad_norm": 0.0944628119468689, "learning_rate": 2.2176525069463568e-05, "loss": 9.519, "step": 52590 }, { "epoch": 0.26267821918150264, "grad_norm": 0.09280452877283096, "learning_rate": 2.2175023154522015e-05, "loss": 9.5191, "step": 52600 }, { "epoch": 0.2627281580064421, "grad_norm": 0.09857330471277237, "learning_rate": 2.2173521239580465e-05, "loss": 9.5195, "step": 52610 }, { "epoch": 0.26277809683138154, "grad_norm": 0.10611597448587418, "learning_rate": 2.2172019324638915e-05, "loss": 9.5201, "step": 52620 }, { "epoch": 0.262828035656321, "grad_norm": 0.09467349946498871, "learning_rate": 2.2170517409697365e-05, "loss": 9.5355, "step": 52630 }, { "epoch": 0.26287797448126043, "grad_norm": 0.0893920361995697, "learning_rate": 2.2169015494755815e-05, "loss": 9.5229, "step": 52640 }, { "epoch": 0.2629279133061999, "grad_norm": 0.09524576365947723, "learning_rate": 2.2167513579814262e-05, "loss": 9.5212, "step": 52650 }, { "epoch": 0.26297785213113933, "grad_norm": 0.09462094306945801, "learning_rate": 2.2166011664872712e-05, "loss": 9.5331, "step": 52660 }, { "epoch": 0.2630277909560788, "grad_norm": 0.1007109135389328, "learning_rate": 2.2164509749931163e-05, "loss": 9.5255, "step": 52670 }, { "epoch": 0.26307772978101823, "grad_norm": 0.09446932375431061, "learning_rate": 2.2163007834989613e-05, "loss": 9.5272, "step": 52680 }, { "epoch": 0.2631276686059577, "grad_norm": 0.08853420615196228, "learning_rate": 2.2161505920048063e-05, "loss": 9.5298, "step": 52690 }, { "epoch": 0.26317760743089713, "grad_norm": 0.09556553512811661, "learning_rate": 2.216000400510651e-05, "loss": 9.5255, "step": 52700 }, { "epoch": 0.2632275462558366, "grad_norm": 0.09378375113010406, "learning_rate": 2.215850209016496e-05, "loss": 9.5139, "step": 52710 }, { "epoch": 0.26327748508077603, "grad_norm": 0.09631894528865814, "learning_rate": 2.215700017522341e-05, "loss": 9.5168, "step": 52720 }, { "epoch": 0.2633274239057155, "grad_norm": 0.09347805380821228, "learning_rate": 2.215549826028186e-05, "loss": 9.5312, "step": 52730 }, { "epoch": 0.26337736273065493, "grad_norm": 0.09734681993722916, "learning_rate": 2.215399634534031e-05, "loss": 9.5144, "step": 52740 }, { "epoch": 0.2634273015555944, "grad_norm": 0.08855658769607544, "learning_rate": 2.2152494430398757e-05, "loss": 9.5155, "step": 52750 }, { "epoch": 0.26347724038053383, "grad_norm": 0.09233860671520233, "learning_rate": 2.215099251545721e-05, "loss": 9.5132, "step": 52760 }, { "epoch": 0.2635271792054733, "grad_norm": 0.0949973613023758, "learning_rate": 2.2149490600515658e-05, "loss": 9.5156, "step": 52770 }, { "epoch": 0.2635771180304127, "grad_norm": 0.10060823708772659, "learning_rate": 2.2147988685574108e-05, "loss": 9.5138, "step": 52780 }, { "epoch": 0.2636270568553522, "grad_norm": 0.09591439366340637, "learning_rate": 2.2146486770632558e-05, "loss": 9.5292, "step": 52790 }, { "epoch": 0.2636769956802916, "grad_norm": 0.09454584121704102, "learning_rate": 2.2144984855691005e-05, "loss": 9.5177, "step": 52800 }, { "epoch": 0.2637269345052311, "grad_norm": 0.09631575644016266, "learning_rate": 2.214348294074946e-05, "loss": 9.5153, "step": 52810 }, { "epoch": 0.2637768733301705, "grad_norm": 0.09541364014148712, "learning_rate": 2.2141981025807905e-05, "loss": 9.5259, "step": 52820 }, { "epoch": 0.26382681215511, "grad_norm": 0.09758414328098297, "learning_rate": 2.2140479110866355e-05, "loss": 9.5273, "step": 52830 }, { "epoch": 0.2638767509800494, "grad_norm": 0.09002380818128586, "learning_rate": 2.2138977195924805e-05, "loss": 9.5144, "step": 52840 }, { "epoch": 0.2639266898049889, "grad_norm": 0.09718296676874161, "learning_rate": 2.2137475280983252e-05, "loss": 9.515, "step": 52850 }, { "epoch": 0.2639766286299283, "grad_norm": 0.09190072864294052, "learning_rate": 2.2135973366041706e-05, "loss": 9.5239, "step": 52860 }, { "epoch": 0.2640265674548678, "grad_norm": 0.0970335304737091, "learning_rate": 2.2134471451100153e-05, "loss": 9.5219, "step": 52870 }, { "epoch": 0.2640765062798072, "grad_norm": 0.09237813204526901, "learning_rate": 2.2132969536158603e-05, "loss": 9.5324, "step": 52880 }, { "epoch": 0.2641264451047467, "grad_norm": 0.09487534314393997, "learning_rate": 2.2131467621217053e-05, "loss": 9.5275, "step": 52890 }, { "epoch": 0.2641763839296861, "grad_norm": 0.09346019476652145, "learning_rate": 2.21299657062755e-05, "loss": 9.521, "step": 52900 }, { "epoch": 0.2642263227546256, "grad_norm": 0.09257887303829193, "learning_rate": 2.2128463791333953e-05, "loss": 9.5117, "step": 52910 }, { "epoch": 0.264276261579565, "grad_norm": 0.09638823568820953, "learning_rate": 2.21269618763924e-05, "loss": 9.5236, "step": 52920 }, { "epoch": 0.2643262004045045, "grad_norm": 0.09802195429801941, "learning_rate": 2.212545996145085e-05, "loss": 9.5187, "step": 52930 }, { "epoch": 0.2643761392294439, "grad_norm": 0.09367150813341141, "learning_rate": 2.21239580465093e-05, "loss": 9.5275, "step": 52940 }, { "epoch": 0.2644260780543834, "grad_norm": 0.09576943516731262, "learning_rate": 2.2122456131567747e-05, "loss": 9.5152, "step": 52950 }, { "epoch": 0.2644760168793228, "grad_norm": 0.10221412777900696, "learning_rate": 2.21209542166262e-05, "loss": 9.5211, "step": 52960 }, { "epoch": 0.2645259557042623, "grad_norm": 0.08980675041675568, "learning_rate": 2.2119452301684648e-05, "loss": 9.5274, "step": 52970 }, { "epoch": 0.2645758945292017, "grad_norm": 0.0930297002196312, "learning_rate": 2.2117950386743098e-05, "loss": 9.5114, "step": 52980 }, { "epoch": 0.2646258333541412, "grad_norm": 0.09479690343141556, "learning_rate": 2.2116448471801548e-05, "loss": 9.5105, "step": 52990 }, { "epoch": 0.2646757721790806, "grad_norm": 0.0916314572095871, "learning_rate": 2.2114946556859995e-05, "loss": 9.5178, "step": 53000 }, { "epoch": 0.2647257110040201, "grad_norm": 0.09060303866863251, "learning_rate": 2.211344464191845e-05, "loss": 9.5146, "step": 53010 }, { "epoch": 0.2647756498289595, "grad_norm": 0.09311266988515854, "learning_rate": 2.2111942726976895e-05, "loss": 9.5126, "step": 53020 }, { "epoch": 0.264825588653899, "grad_norm": 0.09283456206321716, "learning_rate": 2.2110440812035345e-05, "loss": 9.5175, "step": 53030 }, { "epoch": 0.2648755274788384, "grad_norm": 0.09691060334444046, "learning_rate": 2.2108938897093796e-05, "loss": 9.5163, "step": 53040 }, { "epoch": 0.2649254663037779, "grad_norm": 0.09444165974855423, "learning_rate": 2.2107436982152242e-05, "loss": 9.5194, "step": 53050 }, { "epoch": 0.2649754051287173, "grad_norm": 0.0917515829205513, "learning_rate": 2.2105935067210696e-05, "loss": 9.522, "step": 53060 }, { "epoch": 0.2650253439536568, "grad_norm": 0.093546062707901, "learning_rate": 2.2104433152269143e-05, "loss": 9.5124, "step": 53070 }, { "epoch": 0.2650752827785962, "grad_norm": 0.09106338769197464, "learning_rate": 2.2102931237327593e-05, "loss": 9.5194, "step": 53080 }, { "epoch": 0.2651252216035357, "grad_norm": 0.09387896209955215, "learning_rate": 2.2101429322386043e-05, "loss": 9.5174, "step": 53090 }, { "epoch": 0.2651751604284751, "grad_norm": 0.09550568461418152, "learning_rate": 2.209992740744449e-05, "loss": 9.5169, "step": 53100 }, { "epoch": 0.2652250992534146, "grad_norm": 0.09244140982627869, "learning_rate": 2.2098425492502943e-05, "loss": 9.513, "step": 53110 }, { "epoch": 0.265275038078354, "grad_norm": 0.09430118650197983, "learning_rate": 2.209692357756139e-05, "loss": 9.5206, "step": 53120 }, { "epoch": 0.2653249769032935, "grad_norm": 0.10021936148405075, "learning_rate": 2.2095421662619844e-05, "loss": 9.5203, "step": 53130 }, { "epoch": 0.2653749157282329, "grad_norm": 0.09453583508729935, "learning_rate": 2.209391974767829e-05, "loss": 9.5158, "step": 53140 }, { "epoch": 0.2654248545531724, "grad_norm": 0.09471450746059418, "learning_rate": 2.2092417832736737e-05, "loss": 9.5179, "step": 53150 }, { "epoch": 0.2654747933781118, "grad_norm": 0.0972927138209343, "learning_rate": 2.209091591779519e-05, "loss": 9.5145, "step": 53160 }, { "epoch": 0.2655247322030513, "grad_norm": 0.09311800450086594, "learning_rate": 2.2089414002853638e-05, "loss": 9.5106, "step": 53170 }, { "epoch": 0.2655746710279907, "grad_norm": 0.09145248681306839, "learning_rate": 2.208791208791209e-05, "loss": 9.5156, "step": 53180 }, { "epoch": 0.2656246098529302, "grad_norm": 0.09487327933311462, "learning_rate": 2.2086410172970538e-05, "loss": 9.5138, "step": 53190 }, { "epoch": 0.2656745486778696, "grad_norm": 0.09175003319978714, "learning_rate": 2.2084908258028985e-05, "loss": 9.5251, "step": 53200 }, { "epoch": 0.2657244875028091, "grad_norm": 0.09299546480178833, "learning_rate": 2.208340634308744e-05, "loss": 9.5187, "step": 53210 }, { "epoch": 0.2657744263277485, "grad_norm": 0.09463711827993393, "learning_rate": 2.2081904428145885e-05, "loss": 9.5196, "step": 53220 }, { "epoch": 0.265824365152688, "grad_norm": 0.09696915000677109, "learning_rate": 2.208040251320434e-05, "loss": 9.5094, "step": 53230 }, { "epoch": 0.2658743039776274, "grad_norm": 0.1047021821141243, "learning_rate": 2.2078900598262786e-05, "loss": 9.5022, "step": 53240 }, { "epoch": 0.2659242428025669, "grad_norm": 0.09355685114860535, "learning_rate": 2.2077398683321232e-05, "loss": 9.5123, "step": 53250 }, { "epoch": 0.2659741816275063, "grad_norm": 0.09120495617389679, "learning_rate": 2.2075896768379686e-05, "loss": 9.5105, "step": 53260 }, { "epoch": 0.2660241204524458, "grad_norm": 0.08929228782653809, "learning_rate": 2.2074394853438133e-05, "loss": 9.5146, "step": 53270 }, { "epoch": 0.2660740592773852, "grad_norm": 0.09182194620370865, "learning_rate": 2.2072892938496586e-05, "loss": 9.5202, "step": 53280 }, { "epoch": 0.2661239981023247, "grad_norm": 0.0908316969871521, "learning_rate": 2.2071391023555033e-05, "loss": 9.515, "step": 53290 }, { "epoch": 0.2661739369272641, "grad_norm": 0.09303124248981476, "learning_rate": 2.206988910861348e-05, "loss": 9.5074, "step": 53300 }, { "epoch": 0.2662238757522036, "grad_norm": 0.09067052602767944, "learning_rate": 2.2068387193671933e-05, "loss": 9.5062, "step": 53310 }, { "epoch": 0.266273814577143, "grad_norm": 0.09253434091806412, "learning_rate": 2.206688527873038e-05, "loss": 9.5053, "step": 53320 }, { "epoch": 0.2663237534020825, "grad_norm": 0.09222152829170227, "learning_rate": 2.2065383363788834e-05, "loss": 9.5116, "step": 53330 }, { "epoch": 0.2663736922270219, "grad_norm": 0.09831732511520386, "learning_rate": 2.206388144884728e-05, "loss": 9.5086, "step": 53340 }, { "epoch": 0.2664236310519614, "grad_norm": 0.08993234485387802, "learning_rate": 2.2062379533905727e-05, "loss": 9.5213, "step": 53350 }, { "epoch": 0.2664735698769008, "grad_norm": 0.09357697516679764, "learning_rate": 2.206087761896418e-05, "loss": 9.5129, "step": 53360 }, { "epoch": 0.2665235087018403, "grad_norm": 0.09632600098848343, "learning_rate": 2.2059375704022628e-05, "loss": 9.5079, "step": 53370 }, { "epoch": 0.2665734475267797, "grad_norm": 0.09463771432638168, "learning_rate": 2.205787378908108e-05, "loss": 9.5108, "step": 53380 }, { "epoch": 0.26662338635171917, "grad_norm": 0.09664889425039291, "learning_rate": 2.2056371874139528e-05, "loss": 9.5136, "step": 53390 }, { "epoch": 0.2666733251766586, "grad_norm": 0.0904850959777832, "learning_rate": 2.2054869959197975e-05, "loss": 9.5074, "step": 53400 }, { "epoch": 0.266723264001598, "grad_norm": 0.0935087651014328, "learning_rate": 2.205336804425643e-05, "loss": 9.5163, "step": 53410 }, { "epoch": 0.2667732028265375, "grad_norm": 0.09283655881881714, "learning_rate": 2.2051866129314875e-05, "loss": 9.517, "step": 53420 }, { "epoch": 0.2668231416514769, "grad_norm": 0.09471392631530762, "learning_rate": 2.205036421437333e-05, "loss": 9.5124, "step": 53430 }, { "epoch": 0.2668730804764164, "grad_norm": 0.10181424766778946, "learning_rate": 2.2048862299431776e-05, "loss": 9.4963, "step": 53440 }, { "epoch": 0.2669230193013558, "grad_norm": 0.09600277245044708, "learning_rate": 2.2047360384490226e-05, "loss": 9.5115, "step": 53450 }, { "epoch": 0.2669729581262953, "grad_norm": 0.10142569988965988, "learning_rate": 2.2045858469548676e-05, "loss": 9.5058, "step": 53460 }, { "epoch": 0.2670228969512347, "grad_norm": 0.09453926235437393, "learning_rate": 2.2044356554607123e-05, "loss": 9.5034, "step": 53470 }, { "epoch": 0.2670728357761742, "grad_norm": 0.09854920208454132, "learning_rate": 2.2042854639665576e-05, "loss": 9.5109, "step": 53480 }, { "epoch": 0.2671227746011136, "grad_norm": 0.09525490552186966, "learning_rate": 2.2041352724724023e-05, "loss": 9.5093, "step": 53490 }, { "epoch": 0.2671727134260531, "grad_norm": 0.10124125331640244, "learning_rate": 2.2039850809782473e-05, "loss": 9.5103, "step": 53500 }, { "epoch": 0.2672226522509925, "grad_norm": 0.09206513315439224, "learning_rate": 2.2038348894840923e-05, "loss": 9.5106, "step": 53510 }, { "epoch": 0.267272591075932, "grad_norm": 0.09180595725774765, "learning_rate": 2.203684697989937e-05, "loss": 9.5057, "step": 53520 }, { "epoch": 0.2673225299008714, "grad_norm": 0.09658707678318024, "learning_rate": 2.2035345064957824e-05, "loss": 9.5024, "step": 53530 }, { "epoch": 0.2673724687258109, "grad_norm": 0.08750133216381073, "learning_rate": 2.203384315001627e-05, "loss": 9.5091, "step": 53540 }, { "epoch": 0.2674224075507503, "grad_norm": 0.09664824604988098, "learning_rate": 2.203234123507472e-05, "loss": 9.5102, "step": 53550 }, { "epoch": 0.2674723463756898, "grad_norm": 0.09974256902933121, "learning_rate": 2.203083932013317e-05, "loss": 9.5205, "step": 53560 }, { "epoch": 0.2675222852006292, "grad_norm": 0.08979537338018417, "learning_rate": 2.2029337405191618e-05, "loss": 9.5085, "step": 53570 }, { "epoch": 0.2675722240255687, "grad_norm": 0.09149536490440369, "learning_rate": 2.202783549025007e-05, "loss": 9.5113, "step": 53580 }, { "epoch": 0.2676221628505081, "grad_norm": 0.09277680516242981, "learning_rate": 2.2026333575308518e-05, "loss": 9.5072, "step": 53590 }, { "epoch": 0.2676721016754476, "grad_norm": 0.09466489404439926, "learning_rate": 2.2024831660366968e-05, "loss": 9.5135, "step": 53600 }, { "epoch": 0.267722040500387, "grad_norm": 0.10129041969776154, "learning_rate": 2.202332974542542e-05, "loss": 9.5097, "step": 53610 }, { "epoch": 0.2677719793253265, "grad_norm": 0.09101355820894241, "learning_rate": 2.2021827830483865e-05, "loss": 9.5139, "step": 53620 }, { "epoch": 0.2678219181502659, "grad_norm": 0.09977221488952637, "learning_rate": 2.202032591554232e-05, "loss": 9.5064, "step": 53630 }, { "epoch": 0.2678718569752054, "grad_norm": 0.09725436568260193, "learning_rate": 2.2018824000600766e-05, "loss": 9.505, "step": 53640 }, { "epoch": 0.2679217958001448, "grad_norm": 0.09925449639558792, "learning_rate": 2.2017322085659216e-05, "loss": 9.5171, "step": 53650 }, { "epoch": 0.2679717346250843, "grad_norm": 0.09515231102705002, "learning_rate": 2.2015820170717666e-05, "loss": 9.5013, "step": 53660 }, { "epoch": 0.2680216734500237, "grad_norm": 0.09482384473085403, "learning_rate": 2.2014318255776113e-05, "loss": 9.4974, "step": 53670 }, { "epoch": 0.2680716122749632, "grad_norm": 0.09810610860586166, "learning_rate": 2.2012816340834566e-05, "loss": 9.5044, "step": 53680 }, { "epoch": 0.2681215510999026, "grad_norm": 0.09548275917768478, "learning_rate": 2.2011314425893013e-05, "loss": 9.5023, "step": 53690 }, { "epoch": 0.2681714899248421, "grad_norm": 0.10010819137096405, "learning_rate": 2.2009812510951463e-05, "loss": 9.5103, "step": 53700 }, { "epoch": 0.2682214287497815, "grad_norm": 0.09645957499742508, "learning_rate": 2.2008310596009913e-05, "loss": 9.4937, "step": 53710 }, { "epoch": 0.268271367574721, "grad_norm": 0.09398103505373001, "learning_rate": 2.200680868106836e-05, "loss": 9.496, "step": 53720 }, { "epoch": 0.2683213063996604, "grad_norm": 0.09286051243543625, "learning_rate": 2.2005306766126814e-05, "loss": 9.5036, "step": 53730 }, { "epoch": 0.2683712452245999, "grad_norm": 0.09161993116140366, "learning_rate": 2.200380485118526e-05, "loss": 9.5114, "step": 53740 }, { "epoch": 0.2684211840495393, "grad_norm": 0.09624673426151276, "learning_rate": 2.200230293624371e-05, "loss": 9.5007, "step": 53750 }, { "epoch": 0.2684711228744788, "grad_norm": 0.08799513429403305, "learning_rate": 2.200080102130216e-05, "loss": 9.5013, "step": 53760 }, { "epoch": 0.2685210616994182, "grad_norm": 0.09283116459846497, "learning_rate": 2.199929910636061e-05, "loss": 9.5102, "step": 53770 }, { "epoch": 0.2685710005243577, "grad_norm": 0.09318029880523682, "learning_rate": 2.199779719141906e-05, "loss": 9.509, "step": 53780 }, { "epoch": 0.2686209393492971, "grad_norm": 0.09814724326133728, "learning_rate": 2.1996295276477508e-05, "loss": 9.502, "step": 53790 }, { "epoch": 0.26867087817423657, "grad_norm": 0.08983851969242096, "learning_rate": 2.1994793361535958e-05, "loss": 9.504, "step": 53800 }, { "epoch": 0.268720816999176, "grad_norm": 0.09457878768444061, "learning_rate": 2.199329144659441e-05, "loss": 9.4922, "step": 53810 }, { "epoch": 0.26877075582411547, "grad_norm": 0.09290546178817749, "learning_rate": 2.199178953165286e-05, "loss": 9.497, "step": 53820 }, { "epoch": 0.2688206946490549, "grad_norm": 0.09579872339963913, "learning_rate": 2.199028761671131e-05, "loss": 9.5007, "step": 53830 }, { "epoch": 0.26887063347399437, "grad_norm": 0.09526251256465912, "learning_rate": 2.1988785701769756e-05, "loss": 9.4973, "step": 53840 }, { "epoch": 0.2689205722989338, "grad_norm": 0.09075421094894409, "learning_rate": 2.1987283786828206e-05, "loss": 9.4985, "step": 53850 }, { "epoch": 0.26897051112387327, "grad_norm": 0.09878144413232803, "learning_rate": 2.1985781871886656e-05, "loss": 9.4944, "step": 53860 }, { "epoch": 0.2690204499488127, "grad_norm": 0.092621810734272, "learning_rate": 2.1984279956945106e-05, "loss": 9.4981, "step": 53870 }, { "epoch": 0.26907038877375217, "grad_norm": 0.0957510843873024, "learning_rate": 2.1982778042003556e-05, "loss": 9.5005, "step": 53880 }, { "epoch": 0.2691203275986916, "grad_norm": 0.0936402752995491, "learning_rate": 2.1981276127062003e-05, "loss": 9.4948, "step": 53890 }, { "epoch": 0.26917026642363107, "grad_norm": 0.09587475657463074, "learning_rate": 2.1979774212120453e-05, "loss": 9.5018, "step": 53900 }, { "epoch": 0.2692202052485705, "grad_norm": 0.09456077963113785, "learning_rate": 2.1978272297178903e-05, "loss": 9.496, "step": 53910 }, { "epoch": 0.26927014407350996, "grad_norm": 0.10105657577514648, "learning_rate": 2.1976770382237354e-05, "loss": 9.4946, "step": 53920 }, { "epoch": 0.2693200828984494, "grad_norm": 0.09980333596467972, "learning_rate": 2.1975268467295804e-05, "loss": 9.4955, "step": 53930 }, { "epoch": 0.26937002172338886, "grad_norm": 0.09290049970149994, "learning_rate": 2.197376655235425e-05, "loss": 9.5054, "step": 53940 }, { "epoch": 0.2694199605483283, "grad_norm": 0.09242033958435059, "learning_rate": 2.19722646374127e-05, "loss": 9.4997, "step": 53950 }, { "epoch": 0.26946989937326776, "grad_norm": 0.09912257641553879, "learning_rate": 2.197076272247115e-05, "loss": 9.4969, "step": 53960 }, { "epoch": 0.2695198381982072, "grad_norm": 0.10186777263879776, "learning_rate": 2.19692608075296e-05, "loss": 9.4975, "step": 53970 }, { "epoch": 0.26956977702314666, "grad_norm": 0.09263597428798676, "learning_rate": 2.196775889258805e-05, "loss": 9.4902, "step": 53980 }, { "epoch": 0.2696197158480861, "grad_norm": 0.09329408407211304, "learning_rate": 2.1966256977646498e-05, "loss": 9.4953, "step": 53990 }, { "epoch": 0.26966965467302556, "grad_norm": 0.09728638082742691, "learning_rate": 2.1964755062704948e-05, "loss": 9.5026, "step": 54000 }, { "epoch": 0.269719593497965, "grad_norm": 0.0942746102809906, "learning_rate": 2.19632531477634e-05, "loss": 9.5005, "step": 54010 }, { "epoch": 0.26976953232290446, "grad_norm": 0.08841151744127274, "learning_rate": 2.196175123282185e-05, "loss": 9.501, "step": 54020 }, { "epoch": 0.2698194711478439, "grad_norm": 0.09076721221208572, "learning_rate": 2.19602493178803e-05, "loss": 9.4855, "step": 54030 }, { "epoch": 0.26986940997278336, "grad_norm": 0.09586405754089355, "learning_rate": 2.1958747402938746e-05, "loss": 9.5019, "step": 54040 }, { "epoch": 0.2699193487977228, "grad_norm": 0.09588458389043808, "learning_rate": 2.1957245487997196e-05, "loss": 9.4985, "step": 54050 }, { "epoch": 0.26996928762266226, "grad_norm": 0.09645280987024307, "learning_rate": 2.1955743573055646e-05, "loss": 9.5033, "step": 54060 }, { "epoch": 0.2700192264476017, "grad_norm": 0.0946224108338356, "learning_rate": 2.1954241658114096e-05, "loss": 9.507, "step": 54070 }, { "epoch": 0.27006916527254116, "grad_norm": 0.09653602540493011, "learning_rate": 2.1952739743172546e-05, "loss": 9.5072, "step": 54080 }, { "epoch": 0.2701191040974806, "grad_norm": 0.09602764248847961, "learning_rate": 2.1951237828230996e-05, "loss": 9.4974, "step": 54090 }, { "epoch": 0.27016904292242006, "grad_norm": 0.09846575558185577, "learning_rate": 2.1949735913289443e-05, "loss": 9.4918, "step": 54100 }, { "epoch": 0.2702189817473595, "grad_norm": 0.09469395130872726, "learning_rate": 2.1948233998347893e-05, "loss": 9.4976, "step": 54110 }, { "epoch": 0.27026892057229895, "grad_norm": 0.09206046909093857, "learning_rate": 2.1946732083406344e-05, "loss": 9.494, "step": 54120 }, { "epoch": 0.2703188593972384, "grad_norm": 0.10043230652809143, "learning_rate": 2.1945230168464794e-05, "loss": 9.4947, "step": 54130 }, { "epoch": 0.27036879822217785, "grad_norm": 0.09006008505821228, "learning_rate": 2.1943728253523244e-05, "loss": 9.5057, "step": 54140 }, { "epoch": 0.2704187370471173, "grad_norm": 0.09778618067502975, "learning_rate": 2.194222633858169e-05, "loss": 9.5036, "step": 54150 }, { "epoch": 0.27046867587205675, "grad_norm": 0.09430201351642609, "learning_rate": 2.194072442364014e-05, "loss": 9.5066, "step": 54160 }, { "epoch": 0.2705186146969962, "grad_norm": 0.09432802349328995, "learning_rate": 2.193922250869859e-05, "loss": 9.4916, "step": 54170 }, { "epoch": 0.27056855352193565, "grad_norm": 0.09633664041757584, "learning_rate": 2.193772059375704e-05, "loss": 9.4962, "step": 54180 }, { "epoch": 0.2706184923468751, "grad_norm": 0.09365957230329514, "learning_rate": 2.193621867881549e-05, "loss": 9.5004, "step": 54190 }, { "epoch": 0.27066843117181455, "grad_norm": 0.0957445502281189, "learning_rate": 2.1934716763873938e-05, "loss": 9.5006, "step": 54200 }, { "epoch": 0.27071836999675397, "grad_norm": 0.09612950682640076, "learning_rate": 2.193321484893239e-05, "loss": 9.4985, "step": 54210 }, { "epoch": 0.27076830882169345, "grad_norm": 0.09785423427820206, "learning_rate": 2.193171293399084e-05, "loss": 9.4971, "step": 54220 }, { "epoch": 0.27081824764663287, "grad_norm": 0.09333615750074387, "learning_rate": 2.193021101904929e-05, "loss": 9.4973, "step": 54230 }, { "epoch": 0.27086818647157235, "grad_norm": 0.10054484754800797, "learning_rate": 2.192870910410774e-05, "loss": 9.5062, "step": 54240 }, { "epoch": 0.27091812529651177, "grad_norm": 0.09158135205507278, "learning_rate": 2.1927207189166186e-05, "loss": 9.5018, "step": 54250 }, { "epoch": 0.27096806412145125, "grad_norm": 0.09675116837024689, "learning_rate": 2.1925705274224636e-05, "loss": 9.5022, "step": 54260 }, { "epoch": 0.27101800294639067, "grad_norm": 0.09301751852035522, "learning_rate": 2.1924203359283086e-05, "loss": 9.4876, "step": 54270 }, { "epoch": 0.27106794177133015, "grad_norm": 0.09641415625810623, "learning_rate": 2.1922701444341536e-05, "loss": 9.489, "step": 54280 }, { "epoch": 0.27111788059626957, "grad_norm": 0.09570831805467606, "learning_rate": 2.1921199529399986e-05, "loss": 9.4929, "step": 54290 }, { "epoch": 0.27116781942120904, "grad_norm": 0.09328965097665787, "learning_rate": 2.1919697614458433e-05, "loss": 9.4963, "step": 54300 }, { "epoch": 0.27121775824614847, "grad_norm": 0.09763366729021072, "learning_rate": 2.1918195699516883e-05, "loss": 9.4935, "step": 54310 }, { "epoch": 0.27126769707108794, "grad_norm": 0.097484290599823, "learning_rate": 2.1916693784575334e-05, "loss": 9.4983, "step": 54320 }, { "epoch": 0.27131763589602736, "grad_norm": 0.09273017942905426, "learning_rate": 2.1915191869633784e-05, "loss": 9.5023, "step": 54330 }, { "epoch": 0.27136757472096684, "grad_norm": 0.08936069160699844, "learning_rate": 2.1913689954692234e-05, "loss": 9.4918, "step": 54340 }, { "epoch": 0.27141751354590626, "grad_norm": 0.08930017799139023, "learning_rate": 2.191218803975068e-05, "loss": 9.4937, "step": 54350 }, { "epoch": 0.27146745237084574, "grad_norm": 0.0880388543009758, "learning_rate": 2.191068612480913e-05, "loss": 9.4962, "step": 54360 }, { "epoch": 0.27151739119578516, "grad_norm": 0.10103661566972733, "learning_rate": 2.190918420986758e-05, "loss": 9.4915, "step": 54370 }, { "epoch": 0.27156733002072464, "grad_norm": 0.09336966276168823, "learning_rate": 2.190768229492603e-05, "loss": 9.487, "step": 54380 }, { "epoch": 0.27161726884566406, "grad_norm": 0.09124721586704254, "learning_rate": 2.190618037998448e-05, "loss": 9.4872, "step": 54390 }, { "epoch": 0.2716672076706035, "grad_norm": 0.09562219679355621, "learning_rate": 2.1904678465042928e-05, "loss": 9.4953, "step": 54400 }, { "epoch": 0.27171714649554296, "grad_norm": 0.0949045866727829, "learning_rate": 2.1903176550101382e-05, "loss": 9.505, "step": 54410 }, { "epoch": 0.2717670853204824, "grad_norm": 0.09756708145141602, "learning_rate": 2.190167463515983e-05, "loss": 9.5009, "step": 54420 }, { "epoch": 0.27181702414542186, "grad_norm": 0.08797673135995865, "learning_rate": 2.190017272021828e-05, "loss": 9.4954, "step": 54430 }, { "epoch": 0.2718669629703613, "grad_norm": 0.09841335564851761, "learning_rate": 2.189867080527673e-05, "loss": 9.4911, "step": 54440 }, { "epoch": 0.27191690179530076, "grad_norm": 0.09503602236509323, "learning_rate": 2.1897168890335176e-05, "loss": 9.5034, "step": 54450 }, { "epoch": 0.2719668406202402, "grad_norm": 0.09067325294017792, "learning_rate": 2.189566697539363e-05, "loss": 9.4863, "step": 54460 }, { "epoch": 0.27201677944517966, "grad_norm": 0.100099578499794, "learning_rate": 2.1894165060452076e-05, "loss": 9.5065, "step": 54470 }, { "epoch": 0.2720667182701191, "grad_norm": 0.09303244203329086, "learning_rate": 2.1892663145510526e-05, "loss": 9.4933, "step": 54480 }, { "epoch": 0.27211665709505856, "grad_norm": 0.09559056162834167, "learning_rate": 2.1891161230568977e-05, "loss": 9.492, "step": 54490 }, { "epoch": 0.272166595919998, "grad_norm": 0.09210564941167831, "learning_rate": 2.1889659315627423e-05, "loss": 9.5017, "step": 54500 }, { "epoch": 0.27221653474493746, "grad_norm": 0.09366162121295929, "learning_rate": 2.1888157400685877e-05, "loss": 9.493, "step": 54510 }, { "epoch": 0.2722664735698769, "grad_norm": 0.09506445378065109, "learning_rate": 2.1886655485744324e-05, "loss": 9.4811, "step": 54520 }, { "epoch": 0.27231641239481635, "grad_norm": 0.10406965017318726, "learning_rate": 2.1885153570802774e-05, "loss": 9.4821, "step": 54530 }, { "epoch": 0.2723663512197558, "grad_norm": 0.08874404430389404, "learning_rate": 2.1883651655861224e-05, "loss": 9.5002, "step": 54540 }, { "epoch": 0.27241629004469525, "grad_norm": 0.09706084430217743, "learning_rate": 2.188214974091967e-05, "loss": 9.4876, "step": 54550 }, { "epoch": 0.2724662288696347, "grad_norm": 0.09322942048311234, "learning_rate": 2.1880647825978124e-05, "loss": 9.4887, "step": 54560 }, { "epoch": 0.27251616769457415, "grad_norm": 0.09634638577699661, "learning_rate": 2.187914591103657e-05, "loss": 9.4933, "step": 54570 }, { "epoch": 0.2725661065195136, "grad_norm": 0.09559839218854904, "learning_rate": 2.187764399609502e-05, "loss": 9.4836, "step": 54580 }, { "epoch": 0.27261604534445305, "grad_norm": 0.08715943992137909, "learning_rate": 2.187614208115347e-05, "loss": 9.4903, "step": 54590 }, { "epoch": 0.2726659841693925, "grad_norm": 0.09356392920017242, "learning_rate": 2.1874640166211918e-05, "loss": 9.4909, "step": 54600 }, { "epoch": 0.27271592299433195, "grad_norm": 0.09131593257188797, "learning_rate": 2.1873138251270372e-05, "loss": 9.4938, "step": 54610 }, { "epoch": 0.27276586181927137, "grad_norm": 0.09553367644548416, "learning_rate": 2.187163633632882e-05, "loss": 9.4918, "step": 54620 }, { "epoch": 0.27281580064421085, "grad_norm": 0.09892634302377701, "learning_rate": 2.187013442138727e-05, "loss": 9.484, "step": 54630 }, { "epoch": 0.27286573946915027, "grad_norm": 0.09537822008132935, "learning_rate": 2.186863250644572e-05, "loss": 9.4901, "step": 54640 }, { "epoch": 0.27291567829408975, "grad_norm": 0.09602360427379608, "learning_rate": 2.1867130591504166e-05, "loss": 9.4821, "step": 54650 }, { "epoch": 0.27296561711902917, "grad_norm": 0.09913076460361481, "learning_rate": 2.186562867656262e-05, "loss": 9.488, "step": 54660 }, { "epoch": 0.27301555594396865, "grad_norm": 0.09501135349273682, "learning_rate": 2.1864126761621066e-05, "loss": 9.4864, "step": 54670 }, { "epoch": 0.27306549476890807, "grad_norm": 0.0974724143743515, "learning_rate": 2.1862624846679516e-05, "loss": 9.4942, "step": 54680 }, { "epoch": 0.27311543359384755, "grad_norm": 0.09023966640233994, "learning_rate": 2.1861122931737967e-05, "loss": 9.4859, "step": 54690 }, { "epoch": 0.27316537241878697, "grad_norm": 0.09034571051597595, "learning_rate": 2.1859621016796417e-05, "loss": 9.4907, "step": 54700 }, { "epoch": 0.27321531124372644, "grad_norm": 0.09617186337709427, "learning_rate": 2.1858119101854867e-05, "loss": 9.483, "step": 54710 }, { "epoch": 0.27326525006866587, "grad_norm": 0.09283477067947388, "learning_rate": 2.1856617186913314e-05, "loss": 9.4723, "step": 54720 }, { "epoch": 0.27331518889360534, "grad_norm": 0.0881299078464508, "learning_rate": 2.1855115271971767e-05, "loss": 9.4951, "step": 54730 }, { "epoch": 0.27336512771854476, "grad_norm": 0.09953461587429047, "learning_rate": 2.1853613357030214e-05, "loss": 9.4853, "step": 54740 }, { "epoch": 0.27341506654348424, "grad_norm": 0.09831681102514267, "learning_rate": 2.1852111442088664e-05, "loss": 9.4849, "step": 54750 }, { "epoch": 0.27346500536842366, "grad_norm": 0.0924922525882721, "learning_rate": 2.1850609527147114e-05, "loss": 9.4929, "step": 54760 }, { "epoch": 0.27351494419336314, "grad_norm": 0.09693264961242676, "learning_rate": 2.184910761220556e-05, "loss": 9.4778, "step": 54770 }, { "epoch": 0.27356488301830256, "grad_norm": 0.0940731093287468, "learning_rate": 2.1847605697264015e-05, "loss": 9.4909, "step": 54780 }, { "epoch": 0.27361482184324204, "grad_norm": 0.09551883488893509, "learning_rate": 2.184610378232246e-05, "loss": 9.485, "step": 54790 }, { "epoch": 0.27366476066818146, "grad_norm": 0.09278986603021622, "learning_rate": 2.1844601867380912e-05, "loss": 9.4857, "step": 54800 }, { "epoch": 0.27371469949312094, "grad_norm": 0.09064249694347382, "learning_rate": 2.1843099952439362e-05, "loss": 9.4826, "step": 54810 }, { "epoch": 0.27376463831806036, "grad_norm": 0.0937790721654892, "learning_rate": 2.184159803749781e-05, "loss": 9.4923, "step": 54820 }, { "epoch": 0.27381457714299984, "grad_norm": 0.0964457243680954, "learning_rate": 2.1840096122556262e-05, "loss": 9.4783, "step": 54830 }, { "epoch": 0.27386451596793926, "grad_norm": 0.08797242492437363, "learning_rate": 2.183859420761471e-05, "loss": 9.481, "step": 54840 }, { "epoch": 0.27391445479287874, "grad_norm": 0.09727727621793747, "learning_rate": 2.183709229267316e-05, "loss": 9.4801, "step": 54850 }, { "epoch": 0.27396439361781816, "grad_norm": 0.0918155238032341, "learning_rate": 2.183559037773161e-05, "loss": 9.4905, "step": 54860 }, { "epoch": 0.27401433244275764, "grad_norm": 0.09824883192777634, "learning_rate": 2.1834088462790056e-05, "loss": 9.4761, "step": 54870 }, { "epoch": 0.27406427126769706, "grad_norm": 0.09386686235666275, "learning_rate": 2.183258654784851e-05, "loss": 9.4937, "step": 54880 }, { "epoch": 0.27411421009263653, "grad_norm": 0.09294228255748749, "learning_rate": 2.1831084632906957e-05, "loss": 9.4793, "step": 54890 }, { "epoch": 0.27416414891757596, "grad_norm": 0.10264691710472107, "learning_rate": 2.1829582717965407e-05, "loss": 9.4796, "step": 54900 }, { "epoch": 0.27421408774251543, "grad_norm": 0.09530775249004364, "learning_rate": 2.1828080803023857e-05, "loss": 9.479, "step": 54910 }, { "epoch": 0.27426402656745485, "grad_norm": 0.0969926044344902, "learning_rate": 2.1826578888082304e-05, "loss": 9.4762, "step": 54920 }, { "epoch": 0.27431396539239433, "grad_norm": 0.0955069363117218, "learning_rate": 2.1825076973140757e-05, "loss": 9.4781, "step": 54930 }, { "epoch": 0.27436390421733375, "grad_norm": 0.09909159690141678, "learning_rate": 2.1823575058199204e-05, "loss": 9.4861, "step": 54940 }, { "epoch": 0.27441384304227323, "grad_norm": 0.09283483028411865, "learning_rate": 2.1822073143257654e-05, "loss": 9.4774, "step": 54950 }, { "epoch": 0.27446378186721265, "grad_norm": 0.09053994715213776, "learning_rate": 2.1820571228316104e-05, "loss": 9.4813, "step": 54960 }, { "epoch": 0.27451372069215213, "grad_norm": 0.0921054258942604, "learning_rate": 2.181906931337455e-05, "loss": 9.4889, "step": 54970 }, { "epoch": 0.27456365951709155, "grad_norm": 0.09481295198202133, "learning_rate": 2.1817567398433005e-05, "loss": 9.485, "step": 54980 }, { "epoch": 0.27461359834203103, "grad_norm": 0.10501012206077576, "learning_rate": 2.181606548349145e-05, "loss": 9.4831, "step": 54990 }, { "epoch": 0.27466353716697045, "grad_norm": 0.09528646618127823, "learning_rate": 2.1814563568549902e-05, "loss": 9.4872, "step": 55000 }, { "epoch": 0.27471347599190993, "grad_norm": 0.09487199038267136, "learning_rate": 2.1813061653608352e-05, "loss": 9.4929, "step": 55010 }, { "epoch": 0.27476341481684935, "grad_norm": 0.09505639970302582, "learning_rate": 2.18115597386668e-05, "loss": 9.4858, "step": 55020 }, { "epoch": 0.2748133536417888, "grad_norm": 0.0927814468741417, "learning_rate": 2.1810057823725252e-05, "loss": 9.4791, "step": 55030 }, { "epoch": 0.27486329246672825, "grad_norm": 0.09519551694393158, "learning_rate": 2.18085559087837e-05, "loss": 9.4805, "step": 55040 }, { "epoch": 0.2749132312916677, "grad_norm": 0.09184595942497253, "learning_rate": 2.1807053993842153e-05, "loss": 9.4717, "step": 55050 }, { "epoch": 0.27496317011660715, "grad_norm": 0.09657662361860275, "learning_rate": 2.18055520789006e-05, "loss": 9.4875, "step": 55060 }, { "epoch": 0.2750131089415466, "grad_norm": 0.0927683562040329, "learning_rate": 2.1804050163959046e-05, "loss": 9.4826, "step": 55070 }, { "epoch": 0.27506304776648605, "grad_norm": 0.09400233626365662, "learning_rate": 2.18025482490175e-05, "loss": 9.4751, "step": 55080 }, { "epoch": 0.2751129865914255, "grad_norm": 0.09439916908740997, "learning_rate": 2.1801046334075947e-05, "loss": 9.4739, "step": 55090 }, { "epoch": 0.27516292541636495, "grad_norm": 0.09644033014774323, "learning_rate": 2.17995444191344e-05, "loss": 9.4795, "step": 55100 }, { "epoch": 0.2752128642413044, "grad_norm": 0.09258447587490082, "learning_rate": 2.1798042504192847e-05, "loss": 9.4774, "step": 55110 }, { "epoch": 0.27526280306624384, "grad_norm": 0.09144683927297592, "learning_rate": 2.1796540589251294e-05, "loss": 9.4767, "step": 55120 }, { "epoch": 0.2753127418911833, "grad_norm": 0.09092001616954803, "learning_rate": 2.1795038674309747e-05, "loss": 9.4839, "step": 55130 }, { "epoch": 0.27536268071612274, "grad_norm": 0.0951116681098938, "learning_rate": 2.1793536759368194e-05, "loss": 9.4774, "step": 55140 }, { "epoch": 0.2754126195410622, "grad_norm": 0.09364614635705948, "learning_rate": 2.1792034844426648e-05, "loss": 9.4739, "step": 55150 }, { "epoch": 0.27546255836600164, "grad_norm": 0.09528619796037674, "learning_rate": 2.1790532929485094e-05, "loss": 9.4745, "step": 55160 }, { "epoch": 0.2755124971909411, "grad_norm": 0.09560272097587585, "learning_rate": 2.178903101454354e-05, "loss": 9.4794, "step": 55170 }, { "epoch": 0.27556243601588054, "grad_norm": 0.09209144115447998, "learning_rate": 2.1787529099601995e-05, "loss": 9.4788, "step": 55180 }, { "epoch": 0.27561237484082, "grad_norm": 0.09365902096033096, "learning_rate": 2.178602718466044e-05, "loss": 9.472, "step": 55190 }, { "epoch": 0.27566231366575944, "grad_norm": 0.09534572809934616, "learning_rate": 2.1784525269718895e-05, "loss": 9.4737, "step": 55200 }, { "epoch": 0.2757122524906989, "grad_norm": 0.09372628480195999, "learning_rate": 2.1783023354777342e-05, "loss": 9.4766, "step": 55210 }, { "epoch": 0.27576219131563834, "grad_norm": 0.09953497350215912, "learning_rate": 2.178152143983579e-05, "loss": 9.4827, "step": 55220 }, { "epoch": 0.2758121301405778, "grad_norm": 0.0936177521944046, "learning_rate": 2.1780019524894242e-05, "loss": 9.4838, "step": 55230 }, { "epoch": 0.27586206896551724, "grad_norm": 0.0956781804561615, "learning_rate": 2.177851760995269e-05, "loss": 9.4818, "step": 55240 }, { "epoch": 0.2759120077904567, "grad_norm": 0.09287876635789871, "learning_rate": 2.1777015695011143e-05, "loss": 9.4868, "step": 55250 }, { "epoch": 0.27596194661539614, "grad_norm": 0.09168554097414017, "learning_rate": 2.177551378006959e-05, "loss": 9.4796, "step": 55260 }, { "epoch": 0.2760118854403356, "grad_norm": 0.09445545822381973, "learning_rate": 2.1774011865128036e-05, "loss": 9.4829, "step": 55270 }, { "epoch": 0.27606182426527504, "grad_norm": 0.09545770287513733, "learning_rate": 2.177250995018649e-05, "loss": 9.4689, "step": 55280 }, { "epoch": 0.2761117630902145, "grad_norm": 0.09333658963441849, "learning_rate": 2.1771008035244937e-05, "loss": 9.4856, "step": 55290 }, { "epoch": 0.27616170191515393, "grad_norm": 0.09187939018011093, "learning_rate": 2.176950612030339e-05, "loss": 9.4839, "step": 55300 }, { "epoch": 0.2762116407400934, "grad_norm": 0.09599092602729797, "learning_rate": 2.1768004205361837e-05, "loss": 9.4787, "step": 55310 }, { "epoch": 0.27626157956503283, "grad_norm": 0.09836266934871674, "learning_rate": 2.1766502290420284e-05, "loss": 9.4733, "step": 55320 }, { "epoch": 0.2763115183899723, "grad_norm": 0.08872494846582413, "learning_rate": 2.1765000375478737e-05, "loss": 9.4781, "step": 55330 }, { "epoch": 0.27636145721491173, "grad_norm": 0.09891574829816818, "learning_rate": 2.1763498460537184e-05, "loss": 9.4845, "step": 55340 }, { "epoch": 0.2764113960398512, "grad_norm": 0.09147507697343826, "learning_rate": 2.1761996545595638e-05, "loss": 9.4784, "step": 55350 }, { "epoch": 0.27646133486479063, "grad_norm": 0.09677154570817947, "learning_rate": 2.1760494630654084e-05, "loss": 9.4795, "step": 55360 }, { "epoch": 0.2765112736897301, "grad_norm": 0.09255880117416382, "learning_rate": 2.1758992715712535e-05, "loss": 9.4825, "step": 55370 }, { "epoch": 0.27656121251466953, "grad_norm": 0.09383849054574966, "learning_rate": 2.1757490800770985e-05, "loss": 9.4684, "step": 55380 }, { "epoch": 0.27661115133960895, "grad_norm": 0.09055565297603607, "learning_rate": 2.175598888582943e-05, "loss": 9.4714, "step": 55390 }, { "epoch": 0.27666109016454843, "grad_norm": 0.09910639375448227, "learning_rate": 2.1754486970887885e-05, "loss": 9.4788, "step": 55400 }, { "epoch": 0.27671102898948785, "grad_norm": 0.09024091809988022, "learning_rate": 2.1752985055946332e-05, "loss": 9.4775, "step": 55410 }, { "epoch": 0.2767609678144273, "grad_norm": 0.09249179810285568, "learning_rate": 2.1751483141004782e-05, "loss": 9.4655, "step": 55420 }, { "epoch": 0.27681090663936675, "grad_norm": 0.09385978430509567, "learning_rate": 2.1749981226063232e-05, "loss": 9.4792, "step": 55430 }, { "epoch": 0.2768608454643062, "grad_norm": 0.09004578739404678, "learning_rate": 2.174847931112168e-05, "loss": 9.4759, "step": 55440 }, { "epoch": 0.27691078428924565, "grad_norm": 0.09427126497030258, "learning_rate": 2.1746977396180133e-05, "loss": 9.4672, "step": 55450 }, { "epoch": 0.2769607231141851, "grad_norm": 0.09121444821357727, "learning_rate": 2.174547548123858e-05, "loss": 9.4653, "step": 55460 }, { "epoch": 0.27701066193912455, "grad_norm": 0.0954887717962265, "learning_rate": 2.174397356629703e-05, "loss": 9.47, "step": 55470 }, { "epoch": 0.277060600764064, "grad_norm": 0.09227491915225983, "learning_rate": 2.174247165135548e-05, "loss": 9.4773, "step": 55480 }, { "epoch": 0.27711053958900345, "grad_norm": 0.09483451396226883, "learning_rate": 2.1740969736413927e-05, "loss": 9.4778, "step": 55490 }, { "epoch": 0.2771604784139429, "grad_norm": 0.09309407323598862, "learning_rate": 2.173946782147238e-05, "loss": 9.4802, "step": 55500 }, { "epoch": 0.27721041723888235, "grad_norm": 0.09302882105112076, "learning_rate": 2.1737965906530827e-05, "loss": 9.4772, "step": 55510 }, { "epoch": 0.2772603560638218, "grad_norm": 0.09708143770694733, "learning_rate": 2.1736463991589277e-05, "loss": 9.4642, "step": 55520 }, { "epoch": 0.27731029488876124, "grad_norm": 0.09857852011919022, "learning_rate": 2.1734962076647727e-05, "loss": 9.4724, "step": 55530 }, { "epoch": 0.2773602337137007, "grad_norm": 0.08941424638032913, "learning_rate": 2.1733460161706174e-05, "loss": 9.4653, "step": 55540 }, { "epoch": 0.27741017253864014, "grad_norm": 0.09356237202882767, "learning_rate": 2.1731958246764628e-05, "loss": 9.4737, "step": 55550 }, { "epoch": 0.2774601113635796, "grad_norm": 0.09721656143665314, "learning_rate": 2.1730456331823074e-05, "loss": 9.4649, "step": 55560 }, { "epoch": 0.27751005018851904, "grad_norm": 0.0922091156244278, "learning_rate": 2.1728954416881525e-05, "loss": 9.4746, "step": 55570 }, { "epoch": 0.2775599890134585, "grad_norm": 0.09237723052501678, "learning_rate": 2.1727452501939975e-05, "loss": 9.4742, "step": 55580 }, { "epoch": 0.27760992783839794, "grad_norm": 0.09540941566228867, "learning_rate": 2.172595058699842e-05, "loss": 9.4836, "step": 55590 }, { "epoch": 0.2776598666633374, "grad_norm": 0.09385842829942703, "learning_rate": 2.1724448672056875e-05, "loss": 9.4653, "step": 55600 }, { "epoch": 0.27770980548827684, "grad_norm": 0.09224699437618256, "learning_rate": 2.1722946757115322e-05, "loss": 9.4769, "step": 55610 }, { "epoch": 0.2777597443132163, "grad_norm": 0.08767042309045792, "learning_rate": 2.1721444842173772e-05, "loss": 9.4674, "step": 55620 }, { "epoch": 0.27780968313815574, "grad_norm": 0.09636397659778595, "learning_rate": 2.1719942927232222e-05, "loss": 9.4676, "step": 55630 }, { "epoch": 0.2778596219630952, "grad_norm": 0.09190484136343002, "learning_rate": 2.171844101229067e-05, "loss": 9.4758, "step": 55640 }, { "epoch": 0.27790956078803464, "grad_norm": 0.09460058063268661, "learning_rate": 2.1716939097349123e-05, "loss": 9.4664, "step": 55650 }, { "epoch": 0.2779594996129741, "grad_norm": 0.0935700535774231, "learning_rate": 2.171543718240757e-05, "loss": 9.4654, "step": 55660 }, { "epoch": 0.27800943843791354, "grad_norm": 0.09616656601428986, "learning_rate": 2.171393526746602e-05, "loss": 9.4798, "step": 55670 }, { "epoch": 0.278059377262853, "grad_norm": 0.09719350188970566, "learning_rate": 2.171243335252447e-05, "loss": 9.4636, "step": 55680 }, { "epoch": 0.27810931608779244, "grad_norm": 0.09644226729869843, "learning_rate": 2.171093143758292e-05, "loss": 9.471, "step": 55690 }, { "epoch": 0.2781592549127319, "grad_norm": 0.09436850249767303, "learning_rate": 2.170942952264137e-05, "loss": 9.4831, "step": 55700 }, { "epoch": 0.27820919373767133, "grad_norm": 0.09048687666654587, "learning_rate": 2.1707927607699817e-05, "loss": 9.4731, "step": 55710 }, { "epoch": 0.2782591325626108, "grad_norm": 0.10046453028917313, "learning_rate": 2.1706425692758267e-05, "loss": 9.4598, "step": 55720 }, { "epoch": 0.27830907138755023, "grad_norm": 0.09537816792726517, "learning_rate": 2.1704923777816717e-05, "loss": 9.4707, "step": 55730 }, { "epoch": 0.2783590102124897, "grad_norm": 0.09480351954698563, "learning_rate": 2.1703421862875167e-05, "loss": 9.478, "step": 55740 }, { "epoch": 0.27840894903742913, "grad_norm": 0.09742586314678192, "learning_rate": 2.1701919947933618e-05, "loss": 9.4817, "step": 55750 }, { "epoch": 0.2784588878623686, "grad_norm": 0.1001049056649208, "learning_rate": 2.1700418032992064e-05, "loss": 9.4723, "step": 55760 }, { "epoch": 0.27850882668730803, "grad_norm": 0.09015222638845444, "learning_rate": 2.1698916118050515e-05, "loss": 9.4722, "step": 55770 }, { "epoch": 0.2785587655122475, "grad_norm": 0.09560934454202652, "learning_rate": 2.1697414203108965e-05, "loss": 9.4617, "step": 55780 }, { "epoch": 0.27860870433718693, "grad_norm": 0.0906476080417633, "learning_rate": 2.1695912288167415e-05, "loss": 9.4799, "step": 55790 }, { "epoch": 0.2786586431621264, "grad_norm": 0.09162773191928864, "learning_rate": 2.1694410373225865e-05, "loss": 9.4484, "step": 55800 }, { "epoch": 0.27870858198706583, "grad_norm": 0.08697135001420975, "learning_rate": 2.1692908458284312e-05, "loss": 9.4698, "step": 55810 }, { "epoch": 0.2787585208120053, "grad_norm": 0.09465092420578003, "learning_rate": 2.1691406543342762e-05, "loss": 9.4725, "step": 55820 }, { "epoch": 0.2788084596369447, "grad_norm": 0.09739962965250015, "learning_rate": 2.1689904628401212e-05, "loss": 9.4746, "step": 55830 }, { "epoch": 0.2788583984618842, "grad_norm": 0.09323502331972122, "learning_rate": 2.1688402713459662e-05, "loss": 9.4643, "step": 55840 }, { "epoch": 0.2789083372868236, "grad_norm": 0.09480603039264679, "learning_rate": 2.1686900798518113e-05, "loss": 9.4724, "step": 55850 }, { "epoch": 0.2789582761117631, "grad_norm": 0.09424508363008499, "learning_rate": 2.168539888357656e-05, "loss": 9.4578, "step": 55860 }, { "epoch": 0.2790082149367025, "grad_norm": 0.10481972247362137, "learning_rate": 2.168389696863501e-05, "loss": 9.4665, "step": 55870 }, { "epoch": 0.279058153761642, "grad_norm": 0.09700514376163483, "learning_rate": 2.168239505369346e-05, "loss": 9.4596, "step": 55880 }, { "epoch": 0.2791080925865814, "grad_norm": 0.09476059675216675, "learning_rate": 2.168089313875191e-05, "loss": 9.4671, "step": 55890 }, { "epoch": 0.2791580314115209, "grad_norm": 0.09556054323911667, "learning_rate": 2.167939122381036e-05, "loss": 9.4661, "step": 55900 }, { "epoch": 0.2792079702364603, "grad_norm": 0.09373265504837036, "learning_rate": 2.1677889308868807e-05, "loss": 9.4751, "step": 55910 }, { "epoch": 0.2792579090613998, "grad_norm": 0.10191362351179123, "learning_rate": 2.1676387393927257e-05, "loss": 9.4675, "step": 55920 }, { "epoch": 0.2793078478863392, "grad_norm": 0.09132136404514313, "learning_rate": 2.1674885478985707e-05, "loss": 9.4671, "step": 55930 }, { "epoch": 0.2793577867112787, "grad_norm": 0.08985847234725952, "learning_rate": 2.1673383564044157e-05, "loss": 9.4687, "step": 55940 }, { "epoch": 0.2794077255362181, "grad_norm": 0.10665588080883026, "learning_rate": 2.1671881649102608e-05, "loss": 9.4769, "step": 55950 }, { "epoch": 0.2794576643611576, "grad_norm": 0.10030438750982285, "learning_rate": 2.1670379734161054e-05, "loss": 9.4622, "step": 55960 }, { "epoch": 0.279507603186097, "grad_norm": 0.09504380822181702, "learning_rate": 2.1668877819219505e-05, "loss": 9.468, "step": 55970 }, { "epoch": 0.2795575420110365, "grad_norm": 0.09228894859552383, "learning_rate": 2.1667375904277955e-05, "loss": 9.4615, "step": 55980 }, { "epoch": 0.2796074808359759, "grad_norm": 0.09327695518732071, "learning_rate": 2.1665873989336405e-05, "loss": 9.4575, "step": 55990 }, { "epoch": 0.2796574196609154, "grad_norm": 0.09014037996530533, "learning_rate": 2.1664372074394855e-05, "loss": 9.4682, "step": 56000 }, { "epoch": 0.2797073584858548, "grad_norm": 0.09762946516275406, "learning_rate": 2.1662870159453302e-05, "loss": 9.466, "step": 56010 }, { "epoch": 0.2797572973107943, "grad_norm": 0.08341635018587112, "learning_rate": 2.1661368244511752e-05, "loss": 9.4691, "step": 56020 }, { "epoch": 0.2798072361357337, "grad_norm": 0.09523497521877289, "learning_rate": 2.1659866329570202e-05, "loss": 9.4618, "step": 56030 }, { "epoch": 0.2798571749606732, "grad_norm": 0.10125718265771866, "learning_rate": 2.1658364414628653e-05, "loss": 9.4623, "step": 56040 }, { "epoch": 0.2799071137856126, "grad_norm": 0.10129445791244507, "learning_rate": 2.1656862499687103e-05, "loss": 9.458, "step": 56050 }, { "epoch": 0.2799570526105521, "grad_norm": 0.08802901953458786, "learning_rate": 2.1655360584745553e-05, "loss": 9.465, "step": 56060 }, { "epoch": 0.2800069914354915, "grad_norm": 0.09462311863899231, "learning_rate": 2.1653858669804e-05, "loss": 9.4684, "step": 56070 }, { "epoch": 0.280056930260431, "grad_norm": 0.09879063069820404, "learning_rate": 2.165235675486245e-05, "loss": 9.4562, "step": 56080 }, { "epoch": 0.2801068690853704, "grad_norm": 0.09174858778715134, "learning_rate": 2.16508548399209e-05, "loss": 9.4649, "step": 56090 }, { "epoch": 0.2801568079103099, "grad_norm": 0.09843375533819199, "learning_rate": 2.164935292497935e-05, "loss": 9.4666, "step": 56100 }, { "epoch": 0.2802067467352493, "grad_norm": 0.09890784323215485, "learning_rate": 2.16478510100378e-05, "loss": 9.464, "step": 56110 }, { "epoch": 0.2802566855601888, "grad_norm": 0.09890832751989365, "learning_rate": 2.1646349095096247e-05, "loss": 9.47, "step": 56120 }, { "epoch": 0.2803066243851282, "grad_norm": 0.09250981360673904, "learning_rate": 2.1644847180154697e-05, "loss": 9.4576, "step": 56130 }, { "epoch": 0.2803565632100677, "grad_norm": 0.09066881984472275, "learning_rate": 2.1643345265213148e-05, "loss": 9.4633, "step": 56140 }, { "epoch": 0.2804065020350071, "grad_norm": 0.09215233474969864, "learning_rate": 2.1641843350271598e-05, "loss": 9.4647, "step": 56150 }, { "epoch": 0.2804564408599466, "grad_norm": 0.09696812182664871, "learning_rate": 2.1640341435330048e-05, "loss": 9.4715, "step": 56160 }, { "epoch": 0.280506379684886, "grad_norm": 0.09409933537244797, "learning_rate": 2.1638839520388495e-05, "loss": 9.4697, "step": 56170 }, { "epoch": 0.2805563185098255, "grad_norm": 0.09850755333900452, "learning_rate": 2.1637337605446945e-05, "loss": 9.4676, "step": 56180 }, { "epoch": 0.2806062573347649, "grad_norm": 0.09551995247602463, "learning_rate": 2.1635835690505395e-05, "loss": 9.4628, "step": 56190 }, { "epoch": 0.2806561961597044, "grad_norm": 0.08868438005447388, "learning_rate": 2.1634333775563845e-05, "loss": 9.4624, "step": 56200 }, { "epoch": 0.2807061349846438, "grad_norm": 0.09749383479356766, "learning_rate": 2.1632831860622295e-05, "loss": 9.4605, "step": 56210 }, { "epoch": 0.2807560738095833, "grad_norm": 0.09395034611225128, "learning_rate": 2.1631329945680742e-05, "loss": 9.4619, "step": 56220 }, { "epoch": 0.2808060126345227, "grad_norm": 0.09494924545288086, "learning_rate": 2.1629828030739192e-05, "loss": 9.4666, "step": 56230 }, { "epoch": 0.2808559514594622, "grad_norm": 0.09371868520975113, "learning_rate": 2.1628326115797643e-05, "loss": 9.4607, "step": 56240 }, { "epoch": 0.2809058902844016, "grad_norm": 0.0938858613371849, "learning_rate": 2.1626824200856093e-05, "loss": 9.4649, "step": 56250 }, { "epoch": 0.2809558291093411, "grad_norm": 0.09127645194530487, "learning_rate": 2.1625322285914543e-05, "loss": 9.4614, "step": 56260 }, { "epoch": 0.2810057679342805, "grad_norm": 0.09199647605419159, "learning_rate": 2.162382037097299e-05, "loss": 9.4671, "step": 56270 }, { "epoch": 0.28105570675922, "grad_norm": 0.09332824498414993, "learning_rate": 2.162231845603144e-05, "loss": 9.4663, "step": 56280 }, { "epoch": 0.2811056455841594, "grad_norm": 0.09723930805921555, "learning_rate": 2.162081654108989e-05, "loss": 9.459, "step": 56290 }, { "epoch": 0.2811555844090989, "grad_norm": 0.09130632877349854, "learning_rate": 2.161931462614834e-05, "loss": 9.4603, "step": 56300 }, { "epoch": 0.2812055232340383, "grad_norm": 0.09806390851736069, "learning_rate": 2.161781271120679e-05, "loss": 9.463, "step": 56310 }, { "epoch": 0.2812554620589778, "grad_norm": 0.09688523411750793, "learning_rate": 2.1616310796265237e-05, "loss": 9.459, "step": 56320 }, { "epoch": 0.2813054008839172, "grad_norm": 0.09443571418523788, "learning_rate": 2.1614808881323687e-05, "loss": 9.4629, "step": 56330 }, { "epoch": 0.2813553397088567, "grad_norm": 0.09263693541288376, "learning_rate": 2.1613306966382138e-05, "loss": 9.4674, "step": 56340 }, { "epoch": 0.2814052785337961, "grad_norm": 0.08993376046419144, "learning_rate": 2.1611805051440588e-05, "loss": 9.462, "step": 56350 }, { "epoch": 0.2814552173587356, "grad_norm": 0.09446647763252258, "learning_rate": 2.1610303136499038e-05, "loss": 9.4597, "step": 56360 }, { "epoch": 0.281505156183675, "grad_norm": 0.09347580373287201, "learning_rate": 2.1608801221557485e-05, "loss": 9.4585, "step": 56370 }, { "epoch": 0.2815550950086144, "grad_norm": 0.09771845489740372, "learning_rate": 2.1607299306615938e-05, "loss": 9.4685, "step": 56380 }, { "epoch": 0.2816050338335539, "grad_norm": 0.09477143734693527, "learning_rate": 2.1605797391674385e-05, "loss": 9.4516, "step": 56390 }, { "epoch": 0.2816549726584933, "grad_norm": 0.09481296688318253, "learning_rate": 2.1604295476732835e-05, "loss": 9.4627, "step": 56400 }, { "epoch": 0.2817049114834328, "grad_norm": 0.09837774932384491, "learning_rate": 2.1602793561791285e-05, "loss": 9.4678, "step": 56410 }, { "epoch": 0.2817548503083722, "grad_norm": 0.09405245631933212, "learning_rate": 2.1601291646849732e-05, "loss": 9.4549, "step": 56420 }, { "epoch": 0.2818047891333117, "grad_norm": 0.09251265972852707, "learning_rate": 2.1599789731908186e-05, "loss": 9.466, "step": 56430 }, { "epoch": 0.2818547279582511, "grad_norm": 0.0892677903175354, "learning_rate": 2.1598287816966633e-05, "loss": 9.4575, "step": 56440 }, { "epoch": 0.2819046667831906, "grad_norm": 0.09336142987012863, "learning_rate": 2.1596785902025083e-05, "loss": 9.4523, "step": 56450 }, { "epoch": 0.28195460560813, "grad_norm": 0.09530828148126602, "learning_rate": 2.1595283987083533e-05, "loss": 9.4588, "step": 56460 }, { "epoch": 0.2820045444330695, "grad_norm": 0.09306281059980392, "learning_rate": 2.159378207214198e-05, "loss": 9.4528, "step": 56470 }, { "epoch": 0.2820544832580089, "grad_norm": 0.0939321219921112, "learning_rate": 2.1592280157200433e-05, "loss": 9.4545, "step": 56480 }, { "epoch": 0.2821044220829484, "grad_norm": 0.1002638041973114, "learning_rate": 2.159077824225888e-05, "loss": 9.4582, "step": 56490 }, { "epoch": 0.2821543609078878, "grad_norm": 0.09518595784902573, "learning_rate": 2.158927632731733e-05, "loss": 9.4531, "step": 56500 }, { "epoch": 0.2822042997328273, "grad_norm": 0.0943618193268776, "learning_rate": 2.158777441237578e-05, "loss": 9.4467, "step": 56510 }, { "epoch": 0.2822542385577667, "grad_norm": 0.09317539632320404, "learning_rate": 2.1586272497434227e-05, "loss": 9.4538, "step": 56520 }, { "epoch": 0.2823041773827062, "grad_norm": 0.09565674513578415, "learning_rate": 2.158477058249268e-05, "loss": 9.457, "step": 56530 }, { "epoch": 0.2823541162076456, "grad_norm": 0.09357752650976181, "learning_rate": 2.1583268667551128e-05, "loss": 9.4566, "step": 56540 }, { "epoch": 0.2824040550325851, "grad_norm": 0.09761801362037659, "learning_rate": 2.1581766752609578e-05, "loss": 9.46, "step": 56550 }, { "epoch": 0.2824539938575245, "grad_norm": 0.08818177133798599, "learning_rate": 2.1580264837668028e-05, "loss": 9.453, "step": 56560 }, { "epoch": 0.282503932682464, "grad_norm": 0.09338049590587616, "learning_rate": 2.1578762922726475e-05, "loss": 9.4508, "step": 56570 }, { "epoch": 0.2825538715074034, "grad_norm": 0.09481517225503922, "learning_rate": 2.1577261007784928e-05, "loss": 9.4517, "step": 56580 }, { "epoch": 0.2826038103323429, "grad_norm": 0.0910092368721962, "learning_rate": 2.1575759092843375e-05, "loss": 9.4635, "step": 56590 }, { "epoch": 0.2826537491572823, "grad_norm": 0.0879504382610321, "learning_rate": 2.1574257177901825e-05, "loss": 9.4676, "step": 56600 }, { "epoch": 0.2827036879822218, "grad_norm": 0.09749528020620346, "learning_rate": 2.1572755262960275e-05, "loss": 9.4548, "step": 56610 }, { "epoch": 0.2827536268071612, "grad_norm": 0.09335489571094513, "learning_rate": 2.1571253348018722e-05, "loss": 9.459, "step": 56620 }, { "epoch": 0.2828035656321007, "grad_norm": 0.09115227311849594, "learning_rate": 2.1569751433077176e-05, "loss": 9.4516, "step": 56630 }, { "epoch": 0.2828535044570401, "grad_norm": 0.09703268110752106, "learning_rate": 2.1568249518135623e-05, "loss": 9.4488, "step": 56640 }, { "epoch": 0.2829034432819796, "grad_norm": 0.08919961750507355, "learning_rate": 2.1566747603194073e-05, "loss": 9.4534, "step": 56650 }, { "epoch": 0.282953382106919, "grad_norm": 0.09611677378416061, "learning_rate": 2.1565245688252523e-05, "loss": 9.4558, "step": 56660 }, { "epoch": 0.2830033209318585, "grad_norm": 0.09488637745380402, "learning_rate": 2.156374377331097e-05, "loss": 9.4519, "step": 56670 }, { "epoch": 0.2830532597567979, "grad_norm": 0.09528210014104843, "learning_rate": 2.1562241858369423e-05, "loss": 9.4565, "step": 56680 }, { "epoch": 0.2831031985817374, "grad_norm": 0.0950738713145256, "learning_rate": 2.156073994342787e-05, "loss": 9.4551, "step": 56690 }, { "epoch": 0.2831531374066768, "grad_norm": 0.09509290009737015, "learning_rate": 2.1559238028486324e-05, "loss": 9.4597, "step": 56700 }, { "epoch": 0.2832030762316163, "grad_norm": 0.09353553503751755, "learning_rate": 2.155773611354477e-05, "loss": 9.443, "step": 56710 }, { "epoch": 0.2832530150565557, "grad_norm": 0.09463495761156082, "learning_rate": 2.1556234198603217e-05, "loss": 9.454, "step": 56720 }, { "epoch": 0.2833029538814952, "grad_norm": 0.0871269553899765, "learning_rate": 2.155473228366167e-05, "loss": 9.4553, "step": 56730 }, { "epoch": 0.2833528927064346, "grad_norm": 0.10174693167209625, "learning_rate": 2.1553230368720118e-05, "loss": 9.4618, "step": 56740 }, { "epoch": 0.2834028315313741, "grad_norm": 0.09311925619840622, "learning_rate": 2.155172845377857e-05, "loss": 9.4507, "step": 56750 }, { "epoch": 0.2834527703563135, "grad_norm": 0.09324943274259567, "learning_rate": 2.1550226538837018e-05, "loss": 9.4522, "step": 56760 }, { "epoch": 0.283502709181253, "grad_norm": 0.0928729698061943, "learning_rate": 2.1548724623895465e-05, "loss": 9.4504, "step": 56770 }, { "epoch": 0.2835526480061924, "grad_norm": 0.09641097486019135, "learning_rate": 2.1547222708953918e-05, "loss": 9.4463, "step": 56780 }, { "epoch": 0.2836025868311319, "grad_norm": 0.09734059870243073, "learning_rate": 2.1545720794012365e-05, "loss": 9.4444, "step": 56790 }, { "epoch": 0.2836525256560713, "grad_norm": 0.09037147462368011, "learning_rate": 2.154421887907082e-05, "loss": 9.4487, "step": 56800 }, { "epoch": 0.2837024644810108, "grad_norm": 0.09848001599311829, "learning_rate": 2.1542716964129265e-05, "loss": 9.4588, "step": 56810 }, { "epoch": 0.2837524033059502, "grad_norm": 0.092610202729702, "learning_rate": 2.1541215049187712e-05, "loss": 9.453, "step": 56820 }, { "epoch": 0.2838023421308897, "grad_norm": 0.09564469754695892, "learning_rate": 2.1539713134246166e-05, "loss": 9.4589, "step": 56830 }, { "epoch": 0.2838522809558291, "grad_norm": 0.08878234773874283, "learning_rate": 2.1538211219304613e-05, "loss": 9.4545, "step": 56840 }, { "epoch": 0.28390221978076857, "grad_norm": 0.09450232982635498, "learning_rate": 2.1536709304363066e-05, "loss": 9.4541, "step": 56850 }, { "epoch": 0.283952158605708, "grad_norm": 0.09449663758277893, "learning_rate": 2.1535207389421513e-05, "loss": 9.4419, "step": 56860 }, { "epoch": 0.28400209743064747, "grad_norm": 0.09098829329013824, "learning_rate": 2.153370547447996e-05, "loss": 9.452, "step": 56870 }, { "epoch": 0.2840520362555869, "grad_norm": 0.09370938688516617, "learning_rate": 2.1532203559538413e-05, "loss": 9.45, "step": 56880 }, { "epoch": 0.28410197508052637, "grad_norm": 0.09251664578914642, "learning_rate": 2.153070164459686e-05, "loss": 9.453, "step": 56890 }, { "epoch": 0.2841519139054658, "grad_norm": 0.0982016995549202, "learning_rate": 2.1529199729655314e-05, "loss": 9.4479, "step": 56900 }, { "epoch": 0.28420185273040527, "grad_norm": 0.0964377224445343, "learning_rate": 2.152769781471376e-05, "loss": 9.4496, "step": 56910 }, { "epoch": 0.2842517915553447, "grad_norm": 0.09132152795791626, "learning_rate": 2.1526195899772207e-05, "loss": 9.4547, "step": 56920 }, { "epoch": 0.28430173038028417, "grad_norm": 0.09878633171319962, "learning_rate": 2.152469398483066e-05, "loss": 9.4533, "step": 56930 }, { "epoch": 0.2843516692052236, "grad_norm": 0.09209530800580978, "learning_rate": 2.1523192069889108e-05, "loss": 9.4452, "step": 56940 }, { "epoch": 0.28440160803016307, "grad_norm": 0.09105639159679413, "learning_rate": 2.152169015494756e-05, "loss": 9.4402, "step": 56950 }, { "epoch": 0.2844515468551025, "grad_norm": 0.09028221666812897, "learning_rate": 2.1520188240006008e-05, "loss": 9.4486, "step": 56960 }, { "epoch": 0.28450148568004197, "grad_norm": 0.09749405086040497, "learning_rate": 2.1518686325064455e-05, "loss": 9.452, "step": 56970 }, { "epoch": 0.2845514245049814, "grad_norm": 0.0886070504784584, "learning_rate": 2.1517184410122908e-05, "loss": 9.4518, "step": 56980 }, { "epoch": 0.28460136332992086, "grad_norm": 0.09163407981395721, "learning_rate": 2.1515682495181355e-05, "loss": 9.4533, "step": 56990 }, { "epoch": 0.2846513021548603, "grad_norm": 0.09811826795339584, "learning_rate": 2.151418058023981e-05, "loss": 9.4422, "step": 57000 }, { "epoch": 0.28470124097979976, "grad_norm": 0.08839601278305054, "learning_rate": 2.1512678665298255e-05, "loss": 9.4462, "step": 57010 }, { "epoch": 0.2847511798047392, "grad_norm": 0.09699650853872299, "learning_rate": 2.1511176750356706e-05, "loss": 9.4547, "step": 57020 }, { "epoch": 0.28480111862967866, "grad_norm": 0.0974980965256691, "learning_rate": 2.1509674835415156e-05, "loss": 9.4466, "step": 57030 }, { "epoch": 0.2848510574546181, "grad_norm": 0.09446827322244644, "learning_rate": 2.1508172920473603e-05, "loss": 9.4488, "step": 57040 }, { "epoch": 0.28490099627955756, "grad_norm": 0.09437047690153122, "learning_rate": 2.1506671005532056e-05, "loss": 9.4458, "step": 57050 }, { "epoch": 0.284950935104497, "grad_norm": 0.09088893234729767, "learning_rate": 2.1505169090590503e-05, "loss": 9.4443, "step": 57060 }, { "epoch": 0.28500087392943646, "grad_norm": 0.0951780378818512, "learning_rate": 2.1503667175648953e-05, "loss": 9.4552, "step": 57070 }, { "epoch": 0.2850508127543759, "grad_norm": 0.08948690444231033, "learning_rate": 2.1502165260707403e-05, "loss": 9.4513, "step": 57080 }, { "epoch": 0.28510075157931536, "grad_norm": 0.09669423848390579, "learning_rate": 2.150066334576585e-05, "loss": 9.4471, "step": 57090 }, { "epoch": 0.2851506904042548, "grad_norm": 0.09193644672632217, "learning_rate": 2.1499161430824304e-05, "loss": 9.4458, "step": 57100 }, { "epoch": 0.28520062922919426, "grad_norm": 0.10340452194213867, "learning_rate": 2.149765951588275e-05, "loss": 9.4497, "step": 57110 }, { "epoch": 0.2852505680541337, "grad_norm": 0.09467227011919022, "learning_rate": 2.14961576009412e-05, "loss": 9.4555, "step": 57120 }, { "epoch": 0.28530050687907316, "grad_norm": 0.09359690546989441, "learning_rate": 2.149465568599965e-05, "loss": 9.4468, "step": 57130 }, { "epoch": 0.2853504457040126, "grad_norm": 0.08810552209615707, "learning_rate": 2.1493153771058098e-05, "loss": 9.4406, "step": 57140 }, { "epoch": 0.28540038452895206, "grad_norm": 0.09267257899045944, "learning_rate": 2.149165185611655e-05, "loss": 9.4496, "step": 57150 }, { "epoch": 0.2854503233538915, "grad_norm": 0.09369373321533203, "learning_rate": 2.1490149941174998e-05, "loss": 9.4524, "step": 57160 }, { "epoch": 0.28550026217883095, "grad_norm": 0.09611758589744568, "learning_rate": 2.1488648026233448e-05, "loss": 9.446, "step": 57170 }, { "epoch": 0.2855502010037704, "grad_norm": 0.09398515522480011, "learning_rate": 2.1487146111291898e-05, "loss": 9.4492, "step": 57180 }, { "epoch": 0.28560013982870985, "grad_norm": 0.08984824270009995, "learning_rate": 2.1485644196350345e-05, "loss": 9.4438, "step": 57190 }, { "epoch": 0.2856500786536493, "grad_norm": 0.09207414090633392, "learning_rate": 2.14841422814088e-05, "loss": 9.4522, "step": 57200 }, { "epoch": 0.28570001747858875, "grad_norm": 0.09611576050519943, "learning_rate": 2.1482640366467245e-05, "loss": 9.4511, "step": 57210 }, { "epoch": 0.2857499563035282, "grad_norm": 0.09785609692335129, "learning_rate": 2.1481138451525696e-05, "loss": 9.4475, "step": 57220 }, { "epoch": 0.28579989512846765, "grad_norm": 0.09928670525550842, "learning_rate": 2.1479636536584146e-05, "loss": 9.4436, "step": 57230 }, { "epoch": 0.2858498339534071, "grad_norm": 0.09629528224468231, "learning_rate": 2.1478134621642593e-05, "loss": 9.4383, "step": 57240 }, { "epoch": 0.28589977277834655, "grad_norm": 0.09739939123392105, "learning_rate": 2.1476632706701046e-05, "loss": 9.447, "step": 57250 }, { "epoch": 0.28594971160328597, "grad_norm": 0.09917205572128296, "learning_rate": 2.1475130791759493e-05, "loss": 9.4335, "step": 57260 }, { "epoch": 0.28599965042822545, "grad_norm": 0.09072194248437881, "learning_rate": 2.1473628876817943e-05, "loss": 9.4544, "step": 57270 }, { "epoch": 0.28604958925316487, "grad_norm": 0.09361054748296738, "learning_rate": 2.1472126961876393e-05, "loss": 9.4429, "step": 57280 }, { "epoch": 0.28609952807810435, "grad_norm": 0.09945287555456161, "learning_rate": 2.147062504693484e-05, "loss": 9.4512, "step": 57290 }, { "epoch": 0.28614946690304377, "grad_norm": 0.09822694212198257, "learning_rate": 2.1469123131993294e-05, "loss": 9.434, "step": 57300 }, { "epoch": 0.28619940572798325, "grad_norm": 0.09281609952449799, "learning_rate": 2.146762121705174e-05, "loss": 9.4456, "step": 57310 }, { "epoch": 0.28624934455292267, "grad_norm": 0.09337762743234634, "learning_rate": 2.146611930211019e-05, "loss": 9.4507, "step": 57320 }, { "epoch": 0.28629928337786215, "grad_norm": 0.09160379320383072, "learning_rate": 2.146461738716864e-05, "loss": 9.4441, "step": 57330 }, { "epoch": 0.28634922220280157, "grad_norm": 0.09638303518295288, "learning_rate": 2.146311547222709e-05, "loss": 9.45, "step": 57340 }, { "epoch": 0.28639916102774104, "grad_norm": 0.0943082943558693, "learning_rate": 2.146161355728554e-05, "loss": 9.4481, "step": 57350 }, { "epoch": 0.28644909985268047, "grad_norm": 0.09442918747663498, "learning_rate": 2.1460111642343988e-05, "loss": 9.4396, "step": 57360 }, { "epoch": 0.2864990386776199, "grad_norm": 0.10041014105081558, "learning_rate": 2.1458609727402438e-05, "loss": 9.4372, "step": 57370 }, { "epoch": 0.28654897750255937, "grad_norm": 0.0975830927491188, "learning_rate": 2.145710781246089e-05, "loss": 9.4574, "step": 57380 }, { "epoch": 0.2865989163274988, "grad_norm": 0.0888514295220375, "learning_rate": 2.145560589751934e-05, "loss": 9.4463, "step": 57390 }, { "epoch": 0.28664885515243826, "grad_norm": 0.09187906235456467, "learning_rate": 2.145410398257779e-05, "loss": 9.4551, "step": 57400 }, { "epoch": 0.2866987939773777, "grad_norm": 0.09454265236854553, "learning_rate": 2.1452602067636235e-05, "loss": 9.4473, "step": 57410 }, { "epoch": 0.28674873280231716, "grad_norm": 0.09518423676490784, "learning_rate": 2.1451100152694686e-05, "loss": 9.4421, "step": 57420 }, { "epoch": 0.2867986716272566, "grad_norm": 0.09494660049676895, "learning_rate": 2.1449598237753136e-05, "loss": 9.4452, "step": 57430 }, { "epoch": 0.28684861045219606, "grad_norm": 0.09462330490350723, "learning_rate": 2.1448096322811586e-05, "loss": 9.444, "step": 57440 }, { "epoch": 0.2868985492771355, "grad_norm": 0.09740941226482391, "learning_rate": 2.1446594407870036e-05, "loss": 9.4363, "step": 57450 }, { "epoch": 0.28694848810207496, "grad_norm": 0.09297572076320648, "learning_rate": 2.1445092492928483e-05, "loss": 9.4484, "step": 57460 }, { "epoch": 0.2869984269270144, "grad_norm": 0.09299388527870178, "learning_rate": 2.1443590577986933e-05, "loss": 9.4431, "step": 57470 }, { "epoch": 0.28704836575195386, "grad_norm": 0.09611527621746063, "learning_rate": 2.1442088663045383e-05, "loss": 9.4422, "step": 57480 }, { "epoch": 0.2870983045768933, "grad_norm": 0.09383988380432129, "learning_rate": 2.1440586748103833e-05, "loss": 9.4434, "step": 57490 }, { "epoch": 0.28714824340183276, "grad_norm": 0.09201846271753311, "learning_rate": 2.1439084833162284e-05, "loss": 9.4427, "step": 57500 }, { "epoch": 0.2871981822267722, "grad_norm": 0.09528767317533493, "learning_rate": 2.143758291822073e-05, "loss": 9.4362, "step": 57510 }, { "epoch": 0.28724812105171166, "grad_norm": 0.09210474044084549, "learning_rate": 2.143608100327918e-05, "loss": 9.4403, "step": 57520 }, { "epoch": 0.2872980598766511, "grad_norm": 0.09716343879699707, "learning_rate": 2.143457908833763e-05, "loss": 9.4486, "step": 57530 }, { "epoch": 0.28734799870159056, "grad_norm": 0.0949615091085434, "learning_rate": 2.143307717339608e-05, "loss": 9.4341, "step": 57540 }, { "epoch": 0.28739793752653, "grad_norm": 0.09865114837884903, "learning_rate": 2.143157525845453e-05, "loss": 9.4345, "step": 57550 }, { "epoch": 0.28744787635146946, "grad_norm": 0.09301295876502991, "learning_rate": 2.1430073343512978e-05, "loss": 9.4398, "step": 57560 }, { "epoch": 0.2874978151764089, "grad_norm": 0.09104243665933609, "learning_rate": 2.1428571428571428e-05, "loss": 9.4445, "step": 57570 }, { "epoch": 0.28754775400134835, "grad_norm": 0.09278678148984909, "learning_rate": 2.142706951362988e-05, "loss": 9.4513, "step": 57580 }, { "epoch": 0.2875976928262878, "grad_norm": 0.0937771275639534, "learning_rate": 2.142556759868833e-05, "loss": 9.4393, "step": 57590 }, { "epoch": 0.28764763165122725, "grad_norm": 0.09479419887065887, "learning_rate": 2.142406568374678e-05, "loss": 9.4373, "step": 57600 }, { "epoch": 0.2876975704761667, "grad_norm": 0.09332539886236191, "learning_rate": 2.1422563768805225e-05, "loss": 9.443, "step": 57610 }, { "epoch": 0.28774750930110615, "grad_norm": 0.09496680647134781, "learning_rate": 2.1421061853863676e-05, "loss": 9.4405, "step": 57620 }, { "epoch": 0.2877974481260456, "grad_norm": 0.09168408811092377, "learning_rate": 2.1419559938922126e-05, "loss": 9.4475, "step": 57630 }, { "epoch": 0.28784738695098505, "grad_norm": 0.08694005757570267, "learning_rate": 2.1418058023980576e-05, "loss": 9.4491, "step": 57640 }, { "epoch": 0.2878973257759245, "grad_norm": 0.0956626832485199, "learning_rate": 2.1416556109039026e-05, "loss": 9.4481, "step": 57650 }, { "epoch": 0.28794726460086395, "grad_norm": 0.09360313415527344, "learning_rate": 2.1415054194097476e-05, "loss": 9.438, "step": 57660 }, { "epoch": 0.28799720342580337, "grad_norm": 0.09131594747304916, "learning_rate": 2.1413552279155923e-05, "loss": 9.4273, "step": 57670 }, { "epoch": 0.28804714225074285, "grad_norm": 0.09347652643918991, "learning_rate": 2.1412050364214373e-05, "loss": 9.4416, "step": 57680 }, { "epoch": 0.28809708107568227, "grad_norm": 0.09475225955247879, "learning_rate": 2.1410548449272824e-05, "loss": 9.431, "step": 57690 }, { "epoch": 0.28814701990062175, "grad_norm": 0.0879056304693222, "learning_rate": 2.1409046534331274e-05, "loss": 9.4484, "step": 57700 }, { "epoch": 0.28819695872556117, "grad_norm": 0.090447798371315, "learning_rate": 2.1407544619389724e-05, "loss": 9.4465, "step": 57710 }, { "epoch": 0.28824689755050065, "grad_norm": 0.09707989543676376, "learning_rate": 2.140604270444817e-05, "loss": 9.4368, "step": 57720 }, { "epoch": 0.28829683637544007, "grad_norm": 0.09571398794651031, "learning_rate": 2.140454078950662e-05, "loss": 9.4407, "step": 57730 }, { "epoch": 0.28834677520037955, "grad_norm": 0.09500686079263687, "learning_rate": 2.140303887456507e-05, "loss": 9.4291, "step": 57740 }, { "epoch": 0.28839671402531897, "grad_norm": 0.09296731650829315, "learning_rate": 2.140153695962352e-05, "loss": 9.4408, "step": 57750 }, { "epoch": 0.28844665285025844, "grad_norm": 0.0914110615849495, "learning_rate": 2.140003504468197e-05, "loss": 9.4334, "step": 57760 }, { "epoch": 0.28849659167519787, "grad_norm": 0.09195576608181, "learning_rate": 2.1398533129740418e-05, "loss": 9.4371, "step": 57770 }, { "epoch": 0.28854653050013734, "grad_norm": 0.09556440263986588, "learning_rate": 2.139703121479887e-05, "loss": 9.4361, "step": 57780 }, { "epoch": 0.28859646932507677, "grad_norm": 0.09003462642431259, "learning_rate": 2.139552929985732e-05, "loss": 9.4242, "step": 57790 }, { "epoch": 0.28864640815001624, "grad_norm": 0.09391631186008453, "learning_rate": 2.139402738491577e-05, "loss": 9.4349, "step": 57800 }, { "epoch": 0.28869634697495566, "grad_norm": 0.09652207046747208, "learning_rate": 2.139252546997422e-05, "loss": 9.4379, "step": 57810 }, { "epoch": 0.28874628579989514, "grad_norm": 0.09347474575042725, "learning_rate": 2.1391023555032666e-05, "loss": 9.4349, "step": 57820 }, { "epoch": 0.28879622462483456, "grad_norm": 0.09264619648456573, "learning_rate": 2.1389521640091116e-05, "loss": 9.4356, "step": 57830 }, { "epoch": 0.28884616344977404, "grad_norm": 0.09425893425941467, "learning_rate": 2.1388019725149566e-05, "loss": 9.4268, "step": 57840 }, { "epoch": 0.28889610227471346, "grad_norm": 0.09370515495538712, "learning_rate": 2.1386517810208016e-05, "loss": 9.4335, "step": 57850 }, { "epoch": 0.28894604109965294, "grad_norm": 0.09193460643291473, "learning_rate": 2.1385015895266466e-05, "loss": 9.4294, "step": 57860 }, { "epoch": 0.28899597992459236, "grad_norm": 0.09023167192935944, "learning_rate": 2.1383513980324913e-05, "loss": 9.4308, "step": 57870 }, { "epoch": 0.28904591874953184, "grad_norm": 0.09350664168596268, "learning_rate": 2.1382012065383363e-05, "loss": 9.4267, "step": 57880 }, { "epoch": 0.28909585757447126, "grad_norm": 0.09755279123783112, "learning_rate": 2.1380510150441814e-05, "loss": 9.4382, "step": 57890 }, { "epoch": 0.28914579639941074, "grad_norm": 0.09248659759759903, "learning_rate": 2.1379008235500264e-05, "loss": 9.4328, "step": 57900 }, { "epoch": 0.28919573522435016, "grad_norm": 0.09720529615879059, "learning_rate": 2.1377506320558714e-05, "loss": 9.4405, "step": 57910 }, { "epoch": 0.28924567404928964, "grad_norm": 0.09519968181848526, "learning_rate": 2.137600440561716e-05, "loss": 9.4223, "step": 57920 }, { "epoch": 0.28929561287422906, "grad_norm": 0.09368513524532318, "learning_rate": 2.137450249067561e-05, "loss": 9.4369, "step": 57930 }, { "epoch": 0.28934555169916854, "grad_norm": 0.09568150341510773, "learning_rate": 2.137300057573406e-05, "loss": 9.4303, "step": 57940 }, { "epoch": 0.28939549052410796, "grad_norm": 0.09773694723844528, "learning_rate": 2.137149866079251e-05, "loss": 9.4328, "step": 57950 }, { "epoch": 0.28944542934904743, "grad_norm": 0.0943894013762474, "learning_rate": 2.136999674585096e-05, "loss": 9.4307, "step": 57960 }, { "epoch": 0.28949536817398686, "grad_norm": 0.09780600666999817, "learning_rate": 2.1368494830909408e-05, "loss": 9.4274, "step": 57970 }, { "epoch": 0.28954530699892633, "grad_norm": 0.09167556464672089, "learning_rate": 2.1366992915967862e-05, "loss": 9.4372, "step": 57980 }, { "epoch": 0.28959524582386575, "grad_norm": 0.08926273137331009, "learning_rate": 2.136549100102631e-05, "loss": 9.4382, "step": 57990 }, { "epoch": 0.28964518464880523, "grad_norm": 0.09246543794870377, "learning_rate": 2.136398908608476e-05, "loss": 9.4295, "step": 58000 }, { "epoch": 0.28969512347374465, "grad_norm": 0.10091344267129898, "learning_rate": 2.136248717114321e-05, "loss": 9.4334, "step": 58010 }, { "epoch": 0.28974506229868413, "grad_norm": 0.10019103437662125, "learning_rate": 2.1360985256201656e-05, "loss": 9.443, "step": 58020 }, { "epoch": 0.28979500112362355, "grad_norm": 0.09648888558149338, "learning_rate": 2.135948334126011e-05, "loss": 9.4307, "step": 58030 }, { "epoch": 0.28984493994856303, "grad_norm": 0.09710298478603363, "learning_rate": 2.1357981426318556e-05, "loss": 9.4356, "step": 58040 }, { "epoch": 0.28989487877350245, "grad_norm": 0.0928901955485344, "learning_rate": 2.1356479511377006e-05, "loss": 9.4327, "step": 58050 }, { "epoch": 0.28994481759844193, "grad_norm": 0.09765011072158813, "learning_rate": 2.1354977596435456e-05, "loss": 9.4317, "step": 58060 }, { "epoch": 0.28999475642338135, "grad_norm": 0.08833613991737366, "learning_rate": 2.1353475681493903e-05, "loss": 9.4366, "step": 58070 }, { "epoch": 0.2900446952483208, "grad_norm": 0.09767667204141617, "learning_rate": 2.1351973766552357e-05, "loss": 9.4237, "step": 58080 }, { "epoch": 0.29009463407326025, "grad_norm": 0.09400112926959991, "learning_rate": 2.1350471851610804e-05, "loss": 9.4363, "step": 58090 }, { "epoch": 0.2901445728981997, "grad_norm": 0.09494834393262863, "learning_rate": 2.1348969936669254e-05, "loss": 9.4317, "step": 58100 }, { "epoch": 0.29019451172313915, "grad_norm": 0.08743976801633835, "learning_rate": 2.1347468021727704e-05, "loss": 9.4312, "step": 58110 }, { "epoch": 0.2902444505480786, "grad_norm": 0.0986538678407669, "learning_rate": 2.134596610678615e-05, "loss": 9.4404, "step": 58120 }, { "epoch": 0.29029438937301805, "grad_norm": 0.09177923202514648, "learning_rate": 2.1344464191844604e-05, "loss": 9.4274, "step": 58130 }, { "epoch": 0.2903443281979575, "grad_norm": 0.09830587357282639, "learning_rate": 2.134296227690305e-05, "loss": 9.4314, "step": 58140 }, { "epoch": 0.29039426702289695, "grad_norm": 0.09088832139968872, "learning_rate": 2.13414603619615e-05, "loss": 9.4367, "step": 58150 }, { "epoch": 0.2904442058478364, "grad_norm": 0.0943973958492279, "learning_rate": 2.133995844701995e-05, "loss": 9.4372, "step": 58160 }, { "epoch": 0.29049414467277584, "grad_norm": 0.09387973695993423, "learning_rate": 2.1338456532078398e-05, "loss": 9.4323, "step": 58170 }, { "epoch": 0.2905440834977153, "grad_norm": 0.0891706719994545, "learning_rate": 2.1336954617136852e-05, "loss": 9.4364, "step": 58180 }, { "epoch": 0.29059402232265474, "grad_norm": 0.09315983951091766, "learning_rate": 2.13354527021953e-05, "loss": 9.428, "step": 58190 }, { "epoch": 0.2906439611475942, "grad_norm": 0.09957679361104965, "learning_rate": 2.133395078725375e-05, "loss": 9.4258, "step": 58200 }, { "epoch": 0.29069389997253364, "grad_norm": 0.0999840795993805, "learning_rate": 2.13324488723122e-05, "loss": 9.4291, "step": 58210 }, { "epoch": 0.2907438387974731, "grad_norm": 0.09671057015657425, "learning_rate": 2.1330946957370646e-05, "loss": 9.4328, "step": 58220 }, { "epoch": 0.29079377762241254, "grad_norm": 0.0913921594619751, "learning_rate": 2.13294450424291e-05, "loss": 9.4308, "step": 58230 }, { "epoch": 0.290843716447352, "grad_norm": 0.09564314782619476, "learning_rate": 2.1327943127487546e-05, "loss": 9.4269, "step": 58240 }, { "epoch": 0.29089365527229144, "grad_norm": 0.09441643208265305, "learning_rate": 2.1326441212545996e-05, "loss": 9.4385, "step": 58250 }, { "epoch": 0.2909435940972309, "grad_norm": 0.0950765311717987, "learning_rate": 2.1324939297604446e-05, "loss": 9.4394, "step": 58260 }, { "epoch": 0.29099353292217034, "grad_norm": 0.09390682727098465, "learning_rate": 2.1323437382662893e-05, "loss": 9.4312, "step": 58270 }, { "epoch": 0.2910434717471098, "grad_norm": 0.09104769676923752, "learning_rate": 2.1321935467721347e-05, "loss": 9.4247, "step": 58280 }, { "epoch": 0.29109341057204924, "grad_norm": 0.10046746581792831, "learning_rate": 2.1320433552779794e-05, "loss": 9.4248, "step": 58290 }, { "epoch": 0.2911433493969887, "grad_norm": 0.09288429468870163, "learning_rate": 2.1318931637838247e-05, "loss": 9.4221, "step": 58300 }, { "epoch": 0.29119328822192814, "grad_norm": 0.09124504029750824, "learning_rate": 2.1317429722896694e-05, "loss": 9.4278, "step": 58310 }, { "epoch": 0.2912432270468676, "grad_norm": 0.09842488169670105, "learning_rate": 2.131592780795514e-05, "loss": 9.4219, "step": 58320 }, { "epoch": 0.29129316587180704, "grad_norm": 0.09622178226709366, "learning_rate": 2.1314425893013594e-05, "loss": 9.4214, "step": 58330 }, { "epoch": 0.2913431046967465, "grad_norm": 0.09460314363241196, "learning_rate": 2.131292397807204e-05, "loss": 9.4139, "step": 58340 }, { "epoch": 0.29139304352168593, "grad_norm": 0.10045454651117325, "learning_rate": 2.1311422063130495e-05, "loss": 9.4236, "step": 58350 }, { "epoch": 0.29144298234662536, "grad_norm": 0.09059098362922668, "learning_rate": 2.130992014818894e-05, "loss": 9.4255, "step": 58360 }, { "epoch": 0.29149292117156483, "grad_norm": 0.09203209728002548, "learning_rate": 2.1308418233247388e-05, "loss": 9.4297, "step": 58370 }, { "epoch": 0.29154285999650426, "grad_norm": 0.08972401171922684, "learning_rate": 2.1306916318305842e-05, "loss": 9.4402, "step": 58380 }, { "epoch": 0.29159279882144373, "grad_norm": 0.09233536571264267, "learning_rate": 2.130541440336429e-05, "loss": 9.4283, "step": 58390 }, { "epoch": 0.29164273764638315, "grad_norm": 0.10041224211454391, "learning_rate": 2.1303912488422742e-05, "loss": 9.425, "step": 58400 }, { "epoch": 0.29169267647132263, "grad_norm": 0.09314385056495667, "learning_rate": 2.130241057348119e-05, "loss": 9.4202, "step": 58410 }, { "epoch": 0.29174261529626205, "grad_norm": 0.09177546203136444, "learning_rate": 2.1300908658539636e-05, "loss": 9.4319, "step": 58420 }, { "epoch": 0.29179255412120153, "grad_norm": 0.09403910487890244, "learning_rate": 2.129940674359809e-05, "loss": 9.4367, "step": 58430 }, { "epoch": 0.29184249294614095, "grad_norm": 0.09180597215890884, "learning_rate": 2.1297904828656536e-05, "loss": 9.4304, "step": 58440 }, { "epoch": 0.29189243177108043, "grad_norm": 0.09582697600126266, "learning_rate": 2.129640291371499e-05, "loss": 9.4268, "step": 58450 }, { "epoch": 0.29194237059601985, "grad_norm": 0.09342509508132935, "learning_rate": 2.1294900998773436e-05, "loss": 9.4324, "step": 58460 }, { "epoch": 0.29199230942095933, "grad_norm": 0.09267037361860275, "learning_rate": 2.1293399083831883e-05, "loss": 9.4224, "step": 58470 }, { "epoch": 0.29204224824589875, "grad_norm": 0.10199777036905289, "learning_rate": 2.1291897168890337e-05, "loss": 9.4253, "step": 58480 }, { "epoch": 0.2920921870708382, "grad_norm": 0.08985645323991776, "learning_rate": 2.1290395253948784e-05, "loss": 9.4197, "step": 58490 }, { "epoch": 0.29214212589577765, "grad_norm": 0.09782855212688446, "learning_rate": 2.1288893339007237e-05, "loss": 9.4231, "step": 58500 }, { "epoch": 0.2921920647207171, "grad_norm": 0.09603475779294968, "learning_rate": 2.1287391424065684e-05, "loss": 9.4237, "step": 58510 }, { "epoch": 0.29224200354565655, "grad_norm": 0.0944107323884964, "learning_rate": 2.128588950912413e-05, "loss": 9.4173, "step": 58520 }, { "epoch": 0.292291942370596, "grad_norm": 0.0947156697511673, "learning_rate": 2.1284387594182584e-05, "loss": 9.4352, "step": 58530 }, { "epoch": 0.29234188119553545, "grad_norm": 0.093475840985775, "learning_rate": 2.128288567924103e-05, "loss": 9.4267, "step": 58540 }, { "epoch": 0.2923918200204749, "grad_norm": 0.1016530990600586, "learning_rate": 2.1281383764299485e-05, "loss": 9.4312, "step": 58550 }, { "epoch": 0.29244175884541435, "grad_norm": 0.09788955748081207, "learning_rate": 2.127988184935793e-05, "loss": 9.4174, "step": 58560 }, { "epoch": 0.2924916976703538, "grad_norm": 0.09802679717540741, "learning_rate": 2.1278379934416378e-05, "loss": 9.428, "step": 58570 }, { "epoch": 0.29254163649529324, "grad_norm": 0.09107000380754471, "learning_rate": 2.1276878019474832e-05, "loss": 9.4274, "step": 58580 }, { "epoch": 0.2925915753202327, "grad_norm": 0.09072667360305786, "learning_rate": 2.127537610453328e-05, "loss": 9.4244, "step": 58590 }, { "epoch": 0.29264151414517214, "grad_norm": 0.097968690097332, "learning_rate": 2.1273874189591732e-05, "loss": 9.4158, "step": 58600 }, { "epoch": 0.2926914529701116, "grad_norm": 0.09575551748275757, "learning_rate": 2.127237227465018e-05, "loss": 9.4244, "step": 58610 }, { "epoch": 0.29274139179505104, "grad_norm": 0.09668948501348495, "learning_rate": 2.127087035970863e-05, "loss": 9.4156, "step": 58620 }, { "epoch": 0.2927913306199905, "grad_norm": 0.09409730136394501, "learning_rate": 2.126936844476708e-05, "loss": 9.4266, "step": 58630 }, { "epoch": 0.29284126944492994, "grad_norm": 0.09310100227594376, "learning_rate": 2.1267866529825526e-05, "loss": 9.4158, "step": 58640 }, { "epoch": 0.2928912082698694, "grad_norm": 0.09272795915603638, "learning_rate": 2.126636461488398e-05, "loss": 9.4275, "step": 58650 }, { "epoch": 0.29294114709480884, "grad_norm": 0.09329953789710999, "learning_rate": 2.1264862699942426e-05, "loss": 9.425, "step": 58660 }, { "epoch": 0.2929910859197483, "grad_norm": 0.08982928097248077, "learning_rate": 2.1263360785000877e-05, "loss": 9.4224, "step": 58670 }, { "epoch": 0.29304102474468774, "grad_norm": 0.09044983983039856, "learning_rate": 2.1261858870059327e-05, "loss": 9.4227, "step": 58680 }, { "epoch": 0.2930909635696272, "grad_norm": 0.09245163947343826, "learning_rate": 2.1260356955117774e-05, "loss": 9.424, "step": 58690 }, { "epoch": 0.29314090239456664, "grad_norm": 0.09064756333827972, "learning_rate": 2.1258855040176227e-05, "loss": 9.4198, "step": 58700 }, { "epoch": 0.2931908412195061, "grad_norm": 0.08946257829666138, "learning_rate": 2.1257353125234674e-05, "loss": 9.4247, "step": 58710 }, { "epoch": 0.29324078004444554, "grad_norm": 0.09985718131065369, "learning_rate": 2.1255851210293124e-05, "loss": 9.4138, "step": 58720 }, { "epoch": 0.293290718869385, "grad_norm": 0.0930742546916008, "learning_rate": 2.1254349295351574e-05, "loss": 9.4201, "step": 58730 }, { "epoch": 0.29334065769432444, "grad_norm": 0.08928178995847702, "learning_rate": 2.125284738041002e-05, "loss": 9.4215, "step": 58740 }, { "epoch": 0.2933905965192639, "grad_norm": 0.09685919433832169, "learning_rate": 2.1251345465468475e-05, "loss": 9.4265, "step": 58750 }, { "epoch": 0.29344053534420333, "grad_norm": 0.09459210932254791, "learning_rate": 2.124984355052692e-05, "loss": 9.411, "step": 58760 }, { "epoch": 0.2934904741691428, "grad_norm": 0.09364203363656998, "learning_rate": 2.124834163558537e-05, "loss": 9.425, "step": 58770 }, { "epoch": 0.29354041299408223, "grad_norm": 0.09589599817991257, "learning_rate": 2.1246839720643822e-05, "loss": 9.4135, "step": 58780 }, { "epoch": 0.2935903518190217, "grad_norm": 0.08911266922950745, "learning_rate": 2.124533780570227e-05, "loss": 9.4207, "step": 58790 }, { "epoch": 0.29364029064396113, "grad_norm": 0.09034483134746552, "learning_rate": 2.1243835890760722e-05, "loss": 9.4197, "step": 58800 }, { "epoch": 0.2936902294689006, "grad_norm": 0.0975160002708435, "learning_rate": 2.124233397581917e-05, "loss": 9.4134, "step": 58810 }, { "epoch": 0.29374016829384003, "grad_norm": 0.09520293772220612, "learning_rate": 2.124083206087762e-05, "loss": 9.4159, "step": 58820 }, { "epoch": 0.2937901071187795, "grad_norm": 0.09131459891796112, "learning_rate": 2.123933014593607e-05, "loss": 9.4216, "step": 58830 }, { "epoch": 0.29384004594371893, "grad_norm": 0.10248485952615738, "learning_rate": 2.1237828230994516e-05, "loss": 9.4095, "step": 58840 }, { "epoch": 0.2938899847686584, "grad_norm": 0.09531592577695847, "learning_rate": 2.123632631605297e-05, "loss": 9.4285, "step": 58850 }, { "epoch": 0.29393992359359783, "grad_norm": 0.09358898550271988, "learning_rate": 2.1234824401111416e-05, "loss": 9.4086, "step": 58860 }, { "epoch": 0.2939898624185373, "grad_norm": 0.09685607254505157, "learning_rate": 2.1233322486169867e-05, "loss": 9.4227, "step": 58870 }, { "epoch": 0.29403980124347673, "grad_norm": 0.09395089745521545, "learning_rate": 2.1231820571228317e-05, "loss": 9.4146, "step": 58880 }, { "epoch": 0.2940897400684162, "grad_norm": 0.0898551344871521, "learning_rate": 2.1230318656286764e-05, "loss": 9.4178, "step": 58890 }, { "epoch": 0.2941396788933556, "grad_norm": 0.0958523377776146, "learning_rate": 2.1228816741345217e-05, "loss": 9.4159, "step": 58900 }, { "epoch": 0.2941896177182951, "grad_norm": 0.093168243765831, "learning_rate": 2.1227314826403664e-05, "loss": 9.4083, "step": 58910 }, { "epoch": 0.2942395565432345, "grad_norm": 0.09986717253923416, "learning_rate": 2.1225812911462114e-05, "loss": 9.4089, "step": 58920 }, { "epoch": 0.294289495368174, "grad_norm": 0.0952669233083725, "learning_rate": 2.1224310996520564e-05, "loss": 9.4216, "step": 58930 }, { "epoch": 0.2943394341931134, "grad_norm": 0.0995723307132721, "learning_rate": 2.122280908157901e-05, "loss": 9.4183, "step": 58940 }, { "epoch": 0.2943893730180529, "grad_norm": 0.09398746490478516, "learning_rate": 2.1221307166637465e-05, "loss": 9.4149, "step": 58950 }, { "epoch": 0.2944393118429923, "grad_norm": 0.10161332786083221, "learning_rate": 2.121980525169591e-05, "loss": 9.4085, "step": 58960 }, { "epoch": 0.2944892506679318, "grad_norm": 0.09317755699157715, "learning_rate": 2.121830333675436e-05, "loss": 9.4227, "step": 58970 }, { "epoch": 0.2945391894928712, "grad_norm": 0.09553561359643936, "learning_rate": 2.1216801421812812e-05, "loss": 9.4188, "step": 58980 }, { "epoch": 0.2945891283178107, "grad_norm": 0.09423384815454483, "learning_rate": 2.1215299506871262e-05, "loss": 9.4171, "step": 58990 }, { "epoch": 0.2946390671427501, "grad_norm": 0.09159230440855026, "learning_rate": 2.1213797591929712e-05, "loss": 9.4175, "step": 59000 }, { "epoch": 0.2946890059676896, "grad_norm": 0.09593381732702255, "learning_rate": 2.121229567698816e-05, "loss": 9.4132, "step": 59010 }, { "epoch": 0.294738944792629, "grad_norm": 0.09449375420808792, "learning_rate": 2.121079376204661e-05, "loss": 9.422, "step": 59020 }, { "epoch": 0.2947888836175685, "grad_norm": 0.09151158481836319, "learning_rate": 2.120929184710506e-05, "loss": 9.4252, "step": 59030 }, { "epoch": 0.2948388224425079, "grad_norm": 0.09154769778251648, "learning_rate": 2.120778993216351e-05, "loss": 9.4157, "step": 59040 }, { "epoch": 0.2948887612674474, "grad_norm": 0.09065014123916626, "learning_rate": 2.120628801722196e-05, "loss": 9.4179, "step": 59050 }, { "epoch": 0.2949387000923868, "grad_norm": 0.09222662448883057, "learning_rate": 2.1204786102280406e-05, "loss": 9.4056, "step": 59060 }, { "epoch": 0.2949886389173263, "grad_norm": 0.09881994873285294, "learning_rate": 2.1203284187338857e-05, "loss": 9.4033, "step": 59070 }, { "epoch": 0.2950385777422657, "grad_norm": 0.10036629438400269, "learning_rate": 2.1201782272397307e-05, "loss": 9.4071, "step": 59080 }, { "epoch": 0.2950885165672052, "grad_norm": 0.09214989095926285, "learning_rate": 2.1200280357455757e-05, "loss": 9.4131, "step": 59090 }, { "epoch": 0.2951384553921446, "grad_norm": 0.09671517461538315, "learning_rate": 2.1198778442514207e-05, "loss": 9.407, "step": 59100 }, { "epoch": 0.2951883942170841, "grad_norm": 0.09238772839307785, "learning_rate": 2.1197276527572654e-05, "loss": 9.4105, "step": 59110 }, { "epoch": 0.2952383330420235, "grad_norm": 0.09682830423116684, "learning_rate": 2.1195774612631104e-05, "loss": 9.4144, "step": 59120 }, { "epoch": 0.295288271866963, "grad_norm": 0.09174001961946487, "learning_rate": 2.1194272697689554e-05, "loss": 9.409, "step": 59130 }, { "epoch": 0.2953382106919024, "grad_norm": 0.09227842837572098, "learning_rate": 2.1192770782748005e-05, "loss": 9.4068, "step": 59140 }, { "epoch": 0.2953881495168419, "grad_norm": 0.09464056044816971, "learning_rate": 2.1191268867806455e-05, "loss": 9.4129, "step": 59150 }, { "epoch": 0.2954380883417813, "grad_norm": 0.09324008971452713, "learning_rate": 2.11897669528649e-05, "loss": 9.4128, "step": 59160 }, { "epoch": 0.2954880271667208, "grad_norm": 0.093290314078331, "learning_rate": 2.118826503792335e-05, "loss": 9.4178, "step": 59170 }, { "epoch": 0.2955379659916602, "grad_norm": 0.09760567545890808, "learning_rate": 2.1186763122981802e-05, "loss": 9.4109, "step": 59180 }, { "epoch": 0.2955879048165997, "grad_norm": 0.09324108064174652, "learning_rate": 2.1185261208040252e-05, "loss": 9.4177, "step": 59190 }, { "epoch": 0.2956378436415391, "grad_norm": 0.09072353690862656, "learning_rate": 2.1183759293098702e-05, "loss": 9.4191, "step": 59200 }, { "epoch": 0.2956877824664786, "grad_norm": 0.09248016029596329, "learning_rate": 2.118225737815715e-05, "loss": 9.4159, "step": 59210 }, { "epoch": 0.295737721291418, "grad_norm": 0.08923139423131943, "learning_rate": 2.1180755463215603e-05, "loss": 9.4086, "step": 59220 }, { "epoch": 0.2957876601163575, "grad_norm": 0.09490073472261429, "learning_rate": 2.117925354827405e-05, "loss": 9.4123, "step": 59230 }, { "epoch": 0.2958375989412969, "grad_norm": 0.09313952177762985, "learning_rate": 2.11777516333325e-05, "loss": 9.4169, "step": 59240 }, { "epoch": 0.2958875377662364, "grad_norm": 0.0937030240893364, "learning_rate": 2.117624971839095e-05, "loss": 9.4101, "step": 59250 }, { "epoch": 0.2959374765911758, "grad_norm": 0.0976044312119484, "learning_rate": 2.1174747803449396e-05, "loss": 9.4097, "step": 59260 }, { "epoch": 0.2959874154161153, "grad_norm": 0.09942024946212769, "learning_rate": 2.117324588850785e-05, "loss": 9.4033, "step": 59270 }, { "epoch": 0.2960373542410547, "grad_norm": 0.09450352191925049, "learning_rate": 2.1171743973566297e-05, "loss": 9.4104, "step": 59280 }, { "epoch": 0.2960872930659942, "grad_norm": 0.09626131504774094, "learning_rate": 2.1170242058624747e-05, "loss": 9.4153, "step": 59290 }, { "epoch": 0.2961372318909336, "grad_norm": 0.09533768892288208, "learning_rate": 2.1168740143683197e-05, "loss": 9.4257, "step": 59300 }, { "epoch": 0.2961871707158731, "grad_norm": 0.09322089701890945, "learning_rate": 2.1167238228741647e-05, "loss": 9.4129, "step": 59310 }, { "epoch": 0.2962371095408125, "grad_norm": 0.08978445082902908, "learning_rate": 2.1165736313800098e-05, "loss": 9.4065, "step": 59320 }, { "epoch": 0.296287048365752, "grad_norm": 0.09518822282552719, "learning_rate": 2.1164234398858544e-05, "loss": 9.4149, "step": 59330 }, { "epoch": 0.2963369871906914, "grad_norm": 0.09288329631090164, "learning_rate": 2.1162732483916995e-05, "loss": 9.4026, "step": 59340 }, { "epoch": 0.2963869260156308, "grad_norm": 0.0915319174528122, "learning_rate": 2.1161230568975445e-05, "loss": 9.4154, "step": 59350 }, { "epoch": 0.2964368648405703, "grad_norm": 0.09252842515707016, "learning_rate": 2.1159728654033895e-05, "loss": 9.4147, "step": 59360 }, { "epoch": 0.2964868036655097, "grad_norm": 0.09411928057670593, "learning_rate": 2.1158226739092345e-05, "loss": 9.4116, "step": 59370 }, { "epoch": 0.2965367424904492, "grad_norm": 0.09231306612491608, "learning_rate": 2.1156724824150792e-05, "loss": 9.4012, "step": 59380 }, { "epoch": 0.2965866813153886, "grad_norm": 0.09552402049303055, "learning_rate": 2.1155222909209242e-05, "loss": 9.4092, "step": 59390 }, { "epoch": 0.2966366201403281, "grad_norm": 0.0954194962978363, "learning_rate": 2.1153720994267692e-05, "loss": 9.4077, "step": 59400 }, { "epoch": 0.2966865589652675, "grad_norm": 0.09317540377378464, "learning_rate": 2.1152219079326142e-05, "loss": 9.4094, "step": 59410 }, { "epoch": 0.296736497790207, "grad_norm": 0.09430140256881714, "learning_rate": 2.1150717164384593e-05, "loss": 9.4044, "step": 59420 }, { "epoch": 0.2967864366151464, "grad_norm": 0.09407494962215424, "learning_rate": 2.114921524944304e-05, "loss": 9.4147, "step": 59430 }, { "epoch": 0.2968363754400859, "grad_norm": 0.09957774728536606, "learning_rate": 2.114771333450149e-05, "loss": 9.4126, "step": 59440 }, { "epoch": 0.2968863142650253, "grad_norm": 0.09334376454353333, "learning_rate": 2.114621141955994e-05, "loss": 9.4016, "step": 59450 }, { "epoch": 0.2969362530899648, "grad_norm": 0.0957721695303917, "learning_rate": 2.114470950461839e-05, "loss": 9.4109, "step": 59460 }, { "epoch": 0.2969861919149042, "grad_norm": 0.09447581321001053, "learning_rate": 2.114320758967684e-05, "loss": 9.4193, "step": 59470 }, { "epoch": 0.2970361307398437, "grad_norm": 0.0941816195845604, "learning_rate": 2.1141705674735287e-05, "loss": 9.4047, "step": 59480 }, { "epoch": 0.2970860695647831, "grad_norm": 0.08960052579641342, "learning_rate": 2.1140203759793737e-05, "loss": 9.4161, "step": 59490 }, { "epoch": 0.2971360083897226, "grad_norm": 0.09240063279867172, "learning_rate": 2.1138701844852187e-05, "loss": 9.4053, "step": 59500 }, { "epoch": 0.297185947214662, "grad_norm": 0.08736561983823776, "learning_rate": 2.1137199929910637e-05, "loss": 9.4092, "step": 59510 }, { "epoch": 0.2972358860396015, "grad_norm": 0.09654782712459564, "learning_rate": 2.1135698014969088e-05, "loss": 9.4072, "step": 59520 }, { "epoch": 0.2972858248645409, "grad_norm": 0.09687676280736923, "learning_rate": 2.1134196100027534e-05, "loss": 9.409, "step": 59530 }, { "epoch": 0.2973357636894804, "grad_norm": 0.0934508666396141, "learning_rate": 2.1132694185085985e-05, "loss": 9.4048, "step": 59540 }, { "epoch": 0.2973857025144198, "grad_norm": 0.09141778200864792, "learning_rate": 2.1131192270144435e-05, "loss": 9.4101, "step": 59550 }, { "epoch": 0.2974356413393593, "grad_norm": 0.0905558317899704, "learning_rate": 2.1129690355202885e-05, "loss": 9.4071, "step": 59560 }, { "epoch": 0.2974855801642987, "grad_norm": 0.09105515480041504, "learning_rate": 2.1128188440261335e-05, "loss": 9.4076, "step": 59570 }, { "epoch": 0.2975355189892382, "grad_norm": 0.09343723952770233, "learning_rate": 2.1126686525319782e-05, "loss": 9.4212, "step": 59580 }, { "epoch": 0.2975854578141776, "grad_norm": 0.09228665381669998, "learning_rate": 2.1125184610378232e-05, "loss": 9.4151, "step": 59590 }, { "epoch": 0.2976353966391171, "grad_norm": 0.09571309387683868, "learning_rate": 2.1123682695436682e-05, "loss": 9.3969, "step": 59600 }, { "epoch": 0.2976853354640565, "grad_norm": 0.09124372899532318, "learning_rate": 2.1122180780495132e-05, "loss": 9.4104, "step": 59610 }, { "epoch": 0.297735274288996, "grad_norm": 0.09096134454011917, "learning_rate": 2.1120678865553583e-05, "loss": 9.4108, "step": 59620 }, { "epoch": 0.2977852131139354, "grad_norm": 0.09474842250347137, "learning_rate": 2.1119176950612033e-05, "loss": 9.4011, "step": 59630 }, { "epoch": 0.2978351519388749, "grad_norm": 0.09214451164007187, "learning_rate": 2.111767503567048e-05, "loss": 9.4138, "step": 59640 }, { "epoch": 0.2978850907638143, "grad_norm": 0.09250292927026749, "learning_rate": 2.111617312072893e-05, "loss": 9.4054, "step": 59650 }, { "epoch": 0.2979350295887538, "grad_norm": 0.09642202407121658, "learning_rate": 2.111467120578738e-05, "loss": 9.4088, "step": 59660 }, { "epoch": 0.2979849684136932, "grad_norm": 0.09199708700180054, "learning_rate": 2.111316929084583e-05, "loss": 9.4239, "step": 59670 }, { "epoch": 0.2980349072386327, "grad_norm": 0.09029726684093475, "learning_rate": 2.111166737590428e-05, "loss": 9.4053, "step": 59680 }, { "epoch": 0.2980848460635721, "grad_norm": 0.09503061324357986, "learning_rate": 2.1110165460962727e-05, "loss": 9.4035, "step": 59690 }, { "epoch": 0.2981347848885116, "grad_norm": 0.09824039041996002, "learning_rate": 2.1108663546021177e-05, "loss": 9.4013, "step": 59700 }, { "epoch": 0.298184723713451, "grad_norm": 0.09801938384771347, "learning_rate": 2.1107161631079627e-05, "loss": 9.3948, "step": 59710 }, { "epoch": 0.2982346625383905, "grad_norm": 0.10737218707799911, "learning_rate": 2.1105659716138078e-05, "loss": 9.3953, "step": 59720 }, { "epoch": 0.2982846013633299, "grad_norm": 0.09229201078414917, "learning_rate": 2.1104157801196528e-05, "loss": 9.4062, "step": 59730 }, { "epoch": 0.2983345401882694, "grad_norm": 0.08875761926174164, "learning_rate": 2.1102655886254975e-05, "loss": 9.4129, "step": 59740 }, { "epoch": 0.2983844790132088, "grad_norm": 0.09321201592683792, "learning_rate": 2.1101153971313425e-05, "loss": 9.412, "step": 59750 }, { "epoch": 0.2984344178381483, "grad_norm": 0.0906151756644249, "learning_rate": 2.1099652056371875e-05, "loss": 9.3997, "step": 59760 }, { "epoch": 0.2984843566630877, "grad_norm": 0.10253120213747025, "learning_rate": 2.1098150141430325e-05, "loss": 9.397, "step": 59770 }, { "epoch": 0.2985342954880272, "grad_norm": 0.08926044404506683, "learning_rate": 2.1096648226488775e-05, "loss": 9.4117, "step": 59780 }, { "epoch": 0.2985842343129666, "grad_norm": 0.09025093168020248, "learning_rate": 2.1095146311547222e-05, "loss": 9.4118, "step": 59790 }, { "epoch": 0.2986341731379061, "grad_norm": 0.09714997559785843, "learning_rate": 2.1093644396605672e-05, "loss": 9.4087, "step": 59800 }, { "epoch": 0.2986841119628455, "grad_norm": 0.09052036702632904, "learning_rate": 2.1092142481664122e-05, "loss": 9.4045, "step": 59810 }, { "epoch": 0.298734050787785, "grad_norm": 0.09755855053663254, "learning_rate": 2.1090640566722573e-05, "loss": 9.398, "step": 59820 }, { "epoch": 0.2987839896127244, "grad_norm": 0.09479790180921555, "learning_rate": 2.1089138651781023e-05, "loss": 9.404, "step": 59830 }, { "epoch": 0.2988339284376639, "grad_norm": 0.09147018939256668, "learning_rate": 2.108763673683947e-05, "loss": 9.3966, "step": 59840 }, { "epoch": 0.2988838672626033, "grad_norm": 0.0920906662940979, "learning_rate": 2.108613482189792e-05, "loss": 9.4109, "step": 59850 }, { "epoch": 0.2989338060875428, "grad_norm": 0.0949472039937973, "learning_rate": 2.108463290695637e-05, "loss": 9.3958, "step": 59860 }, { "epoch": 0.2989837449124822, "grad_norm": 0.0973886027932167, "learning_rate": 2.108313099201482e-05, "loss": 9.4021, "step": 59870 }, { "epoch": 0.2990336837374217, "grad_norm": 0.08694946020841599, "learning_rate": 2.108162907707327e-05, "loss": 9.405, "step": 59880 }, { "epoch": 0.2990836225623611, "grad_norm": 0.09538460522890091, "learning_rate": 2.1080127162131717e-05, "loss": 9.4057, "step": 59890 }, { "epoch": 0.2991335613873006, "grad_norm": 0.0949239507317543, "learning_rate": 2.1078625247190167e-05, "loss": 9.4029, "step": 59900 }, { "epoch": 0.29918350021224, "grad_norm": 0.09560471773147583, "learning_rate": 2.1077123332248617e-05, "loss": 9.3962, "step": 59910 }, { "epoch": 0.29923343903717947, "grad_norm": 0.09594585001468658, "learning_rate": 2.1075621417307068e-05, "loss": 9.4046, "step": 59920 }, { "epoch": 0.2992833778621189, "grad_norm": 0.09318960458040237, "learning_rate": 2.1074119502365518e-05, "loss": 9.407, "step": 59930 }, { "epoch": 0.29933331668705837, "grad_norm": 0.09141054004430771, "learning_rate": 2.1072617587423965e-05, "loss": 9.3944, "step": 59940 }, { "epoch": 0.2993832555119978, "grad_norm": 0.09421342611312866, "learning_rate": 2.1071115672482418e-05, "loss": 9.4033, "step": 59950 }, { "epoch": 0.29943319433693727, "grad_norm": 0.09517619758844376, "learning_rate": 2.1069613757540865e-05, "loss": 9.3988, "step": 59960 }, { "epoch": 0.2994831331618767, "grad_norm": 0.09366179257631302, "learning_rate": 2.1068111842599315e-05, "loss": 9.4144, "step": 59970 }, { "epoch": 0.29953307198681617, "grad_norm": 0.09368981420993805, "learning_rate": 2.1066609927657765e-05, "loss": 9.3938, "step": 59980 }, { "epoch": 0.2995830108117556, "grad_norm": 0.09122604131698608, "learning_rate": 2.1065108012716212e-05, "loss": 9.3962, "step": 59990 }, { "epoch": 0.29963294963669507, "grad_norm": 0.0922602191567421, "learning_rate": 2.1063606097774666e-05, "loss": 9.3997, "step": 60000 }, { "epoch": 0.2996828884616345, "grad_norm": 0.1016426831483841, "learning_rate": 2.1062104182833112e-05, "loss": 9.3922, "step": 60010 }, { "epoch": 0.29973282728657397, "grad_norm": 0.08960018306970596, "learning_rate": 2.1060602267891563e-05, "loss": 9.3926, "step": 60020 }, { "epoch": 0.2997827661115134, "grad_norm": 0.08921145647764206, "learning_rate": 2.1059100352950013e-05, "loss": 9.3912, "step": 60030 }, { "epoch": 0.29983270493645287, "grad_norm": 0.09575637429952621, "learning_rate": 2.105759843800846e-05, "loss": 9.3966, "step": 60040 }, { "epoch": 0.2998826437613923, "grad_norm": 0.09875485301017761, "learning_rate": 2.1056096523066913e-05, "loss": 9.3956, "step": 60050 }, { "epoch": 0.29993258258633176, "grad_norm": 0.09163234382867813, "learning_rate": 2.105459460812536e-05, "loss": 9.3946, "step": 60060 }, { "epoch": 0.2999825214112712, "grad_norm": 0.0965883880853653, "learning_rate": 2.105309269318381e-05, "loss": 9.3962, "step": 60070 }, { "epoch": 0.30003246023621066, "grad_norm": 0.09682179242372513, "learning_rate": 2.105159077824226e-05, "loss": 9.4011, "step": 60080 }, { "epoch": 0.3000823990611501, "grad_norm": 0.09401979297399521, "learning_rate": 2.1050088863300707e-05, "loss": 9.3984, "step": 60090 }, { "epoch": 0.30013233788608956, "grad_norm": 0.09467914700508118, "learning_rate": 2.104858694835916e-05, "loss": 9.3967, "step": 60100 }, { "epoch": 0.300182276711029, "grad_norm": 0.09743199497461319, "learning_rate": 2.1047085033417607e-05, "loss": 9.3941, "step": 60110 }, { "epoch": 0.30023221553596846, "grad_norm": 0.08682700246572495, "learning_rate": 2.1045583118476058e-05, "loss": 9.3914, "step": 60120 }, { "epoch": 0.3002821543609079, "grad_norm": 0.0950264036655426, "learning_rate": 2.1044081203534508e-05, "loss": 9.395, "step": 60130 }, { "epoch": 0.30033209318584736, "grad_norm": 0.09148851037025452, "learning_rate": 2.1042579288592955e-05, "loss": 9.3978, "step": 60140 }, { "epoch": 0.3003820320107868, "grad_norm": 0.09214703738689423, "learning_rate": 2.1041077373651408e-05, "loss": 9.3972, "step": 60150 }, { "epoch": 0.30043197083572626, "grad_norm": 0.0951230525970459, "learning_rate": 2.1039575458709855e-05, "loss": 9.4042, "step": 60160 }, { "epoch": 0.3004819096606657, "grad_norm": 0.09221307933330536, "learning_rate": 2.1038073543768305e-05, "loss": 9.4051, "step": 60170 }, { "epoch": 0.30053184848560516, "grad_norm": 0.09453621506690979, "learning_rate": 2.1036571628826755e-05, "loss": 9.3895, "step": 60180 }, { "epoch": 0.3005817873105446, "grad_norm": 0.09616675227880478, "learning_rate": 2.1035069713885202e-05, "loss": 9.4036, "step": 60190 }, { "epoch": 0.30063172613548406, "grad_norm": 0.0845404714345932, "learning_rate": 2.1033567798943656e-05, "loss": 9.4068, "step": 60200 }, { "epoch": 0.3006816649604235, "grad_norm": 0.092598095536232, "learning_rate": 2.1032065884002102e-05, "loss": 9.396, "step": 60210 }, { "epoch": 0.30073160378536296, "grad_norm": 0.09395390748977661, "learning_rate": 2.1030563969060553e-05, "loss": 9.4003, "step": 60220 }, { "epoch": 0.3007815426103024, "grad_norm": 0.09719889611005783, "learning_rate": 2.1029062054119003e-05, "loss": 9.383, "step": 60230 }, { "epoch": 0.30083148143524185, "grad_norm": 0.10100460797548294, "learning_rate": 2.102756013917745e-05, "loss": 9.398, "step": 60240 }, { "epoch": 0.3008814202601813, "grad_norm": 0.09339495748281479, "learning_rate": 2.1026058224235903e-05, "loss": 9.3972, "step": 60250 }, { "epoch": 0.30093135908512075, "grad_norm": 0.095674529671669, "learning_rate": 2.102455630929435e-05, "loss": 9.3979, "step": 60260 }, { "epoch": 0.3009812979100602, "grad_norm": 0.09552553296089172, "learning_rate": 2.1023054394352804e-05, "loss": 9.4002, "step": 60270 }, { "epoch": 0.30103123673499965, "grad_norm": 0.0926952138543129, "learning_rate": 2.102155247941125e-05, "loss": 9.4003, "step": 60280 }, { "epoch": 0.3010811755599391, "grad_norm": 0.09167633205652237, "learning_rate": 2.1020050564469697e-05, "loss": 9.3939, "step": 60290 }, { "epoch": 0.30113111438487855, "grad_norm": 0.10082550346851349, "learning_rate": 2.101854864952815e-05, "loss": 9.3882, "step": 60300 }, { "epoch": 0.301181053209818, "grad_norm": 0.09979651123285294, "learning_rate": 2.1017046734586597e-05, "loss": 9.3897, "step": 60310 }, { "epoch": 0.30123099203475745, "grad_norm": 0.09624478965997696, "learning_rate": 2.101554481964505e-05, "loss": 9.3969, "step": 60320 }, { "epoch": 0.30128093085969687, "grad_norm": 0.09281089156866074, "learning_rate": 2.1014042904703498e-05, "loss": 9.3837, "step": 60330 }, { "epoch": 0.3013308696846363, "grad_norm": 0.08858885616064072, "learning_rate": 2.1012540989761945e-05, "loss": 9.3936, "step": 60340 }, { "epoch": 0.30138080850957577, "grad_norm": 0.09177447855472565, "learning_rate": 2.1011039074820398e-05, "loss": 9.4041, "step": 60350 }, { "epoch": 0.3014307473345152, "grad_norm": 0.09817986190319061, "learning_rate": 2.1009537159878845e-05, "loss": 9.4026, "step": 60360 }, { "epoch": 0.30148068615945467, "grad_norm": 0.09388108551502228, "learning_rate": 2.10080352449373e-05, "loss": 9.3916, "step": 60370 }, { "epoch": 0.3015306249843941, "grad_norm": 0.09480094164609909, "learning_rate": 2.1006533329995745e-05, "loss": 9.3932, "step": 60380 }, { "epoch": 0.30158056380933357, "grad_norm": 0.09471967816352844, "learning_rate": 2.1005031415054192e-05, "loss": 9.3966, "step": 60390 }, { "epoch": 0.301630502634273, "grad_norm": 0.0964706614613533, "learning_rate": 2.1003529500112646e-05, "loss": 9.3854, "step": 60400 }, { "epoch": 0.30168044145921247, "grad_norm": 0.0946371778845787, "learning_rate": 2.1002027585171092e-05, "loss": 9.3927, "step": 60410 }, { "epoch": 0.3017303802841519, "grad_norm": 0.09347691386938095, "learning_rate": 2.1000525670229546e-05, "loss": 9.4019, "step": 60420 }, { "epoch": 0.30178031910909137, "grad_norm": 0.09268350899219513, "learning_rate": 2.0999023755287993e-05, "loss": 9.3979, "step": 60430 }, { "epoch": 0.3018302579340308, "grad_norm": 0.10059063881635666, "learning_rate": 2.099752184034644e-05, "loss": 9.3866, "step": 60440 }, { "epoch": 0.30188019675897027, "grad_norm": 0.08915655314922333, "learning_rate": 2.0996019925404893e-05, "loss": 9.3971, "step": 60450 }, { "epoch": 0.3019301355839097, "grad_norm": 0.09569905698299408, "learning_rate": 2.099451801046334e-05, "loss": 9.3943, "step": 60460 }, { "epoch": 0.30198007440884916, "grad_norm": 0.10031815618276596, "learning_rate": 2.0993016095521794e-05, "loss": 9.3903, "step": 60470 }, { "epoch": 0.3020300132337886, "grad_norm": 0.09447156637907028, "learning_rate": 2.099151418058024e-05, "loss": 9.3889, "step": 60480 }, { "epoch": 0.30207995205872806, "grad_norm": 0.09017782658338547, "learning_rate": 2.0990012265638687e-05, "loss": 9.3948, "step": 60490 }, { "epoch": 0.3021298908836675, "grad_norm": 0.09461423754692078, "learning_rate": 2.098851035069714e-05, "loss": 9.3869, "step": 60500 }, { "epoch": 0.30217982970860696, "grad_norm": 0.09656091034412384, "learning_rate": 2.0987008435755587e-05, "loss": 9.3937, "step": 60510 }, { "epoch": 0.3022297685335464, "grad_norm": 0.08991716802120209, "learning_rate": 2.098550652081404e-05, "loss": 9.3899, "step": 60520 }, { "epoch": 0.30227970735848586, "grad_norm": 0.09503340721130371, "learning_rate": 2.0984004605872488e-05, "loss": 9.3889, "step": 60530 }, { "epoch": 0.3023296461834253, "grad_norm": 0.08786161243915558, "learning_rate": 2.0982502690930935e-05, "loss": 9.3972, "step": 60540 }, { "epoch": 0.30237958500836476, "grad_norm": 0.09755858033895493, "learning_rate": 2.0981000775989388e-05, "loss": 9.4025, "step": 60550 }, { "epoch": 0.3024295238333042, "grad_norm": 0.09725630283355713, "learning_rate": 2.0979498861047835e-05, "loss": 9.3909, "step": 60560 }, { "epoch": 0.30247946265824366, "grad_norm": 0.08783844858407974, "learning_rate": 2.097799694610629e-05, "loss": 9.3987, "step": 60570 }, { "epoch": 0.3025294014831831, "grad_norm": 0.09798564016819, "learning_rate": 2.0976495031164735e-05, "loss": 9.4008, "step": 60580 }, { "epoch": 0.30257934030812256, "grad_norm": 0.09155137091875076, "learning_rate": 2.0974993116223185e-05, "loss": 9.3922, "step": 60590 }, { "epoch": 0.302629279133062, "grad_norm": 0.09524587541818619, "learning_rate": 2.0973491201281636e-05, "loss": 9.3868, "step": 60600 }, { "epoch": 0.30267921795800146, "grad_norm": 0.09223838150501251, "learning_rate": 2.0971989286340082e-05, "loss": 9.3943, "step": 60610 }, { "epoch": 0.3027291567829409, "grad_norm": 0.09258273243904114, "learning_rate": 2.0970487371398536e-05, "loss": 9.382, "step": 60620 }, { "epoch": 0.30277909560788036, "grad_norm": 0.09514257311820984, "learning_rate": 2.0968985456456983e-05, "loss": 9.3933, "step": 60630 }, { "epoch": 0.3028290344328198, "grad_norm": 0.08645520359277725, "learning_rate": 2.0967483541515433e-05, "loss": 9.3864, "step": 60640 }, { "epoch": 0.30287897325775925, "grad_norm": 0.09346000850200653, "learning_rate": 2.0965981626573883e-05, "loss": 9.4062, "step": 60650 }, { "epoch": 0.3029289120826987, "grad_norm": 0.09467698633670807, "learning_rate": 2.096447971163233e-05, "loss": 9.3819, "step": 60660 }, { "epoch": 0.30297885090763815, "grad_norm": 0.09375496953725815, "learning_rate": 2.0962977796690784e-05, "loss": 9.3892, "step": 60670 }, { "epoch": 0.3030287897325776, "grad_norm": 0.09323824197053909, "learning_rate": 2.096147588174923e-05, "loss": 9.3915, "step": 60680 }, { "epoch": 0.30307872855751705, "grad_norm": 0.0925745964050293, "learning_rate": 2.095997396680768e-05, "loss": 9.3835, "step": 60690 }, { "epoch": 0.3031286673824565, "grad_norm": 0.09299085289239883, "learning_rate": 2.095847205186613e-05, "loss": 9.3882, "step": 60700 }, { "epoch": 0.30317860620739595, "grad_norm": 0.09627874940633774, "learning_rate": 2.0956970136924577e-05, "loss": 9.3914, "step": 60710 }, { "epoch": 0.3032285450323354, "grad_norm": 0.09084891527891159, "learning_rate": 2.095546822198303e-05, "loss": 9.3905, "step": 60720 }, { "epoch": 0.30327848385727485, "grad_norm": 0.09070669114589691, "learning_rate": 2.0953966307041478e-05, "loss": 9.3851, "step": 60730 }, { "epoch": 0.30332842268221427, "grad_norm": 0.09068963676691055, "learning_rate": 2.0952464392099928e-05, "loss": 9.3869, "step": 60740 }, { "epoch": 0.30337836150715375, "grad_norm": 0.09145542979240417, "learning_rate": 2.0950962477158378e-05, "loss": 9.3823, "step": 60750 }, { "epoch": 0.30342830033209317, "grad_norm": 0.09390787780284882, "learning_rate": 2.0949460562216825e-05, "loss": 9.3873, "step": 60760 }, { "epoch": 0.30347823915703265, "grad_norm": 0.09399698674678802, "learning_rate": 2.094795864727528e-05, "loss": 9.3884, "step": 60770 }, { "epoch": 0.30352817798197207, "grad_norm": 0.09260044991970062, "learning_rate": 2.0946456732333725e-05, "loss": 9.3739, "step": 60780 }, { "epoch": 0.30357811680691155, "grad_norm": 0.09860619902610779, "learning_rate": 2.0944954817392176e-05, "loss": 9.4007, "step": 60790 }, { "epoch": 0.30362805563185097, "grad_norm": 0.0905844122171402, "learning_rate": 2.0943452902450626e-05, "loss": 9.3961, "step": 60800 }, { "epoch": 0.30367799445679045, "grad_norm": 0.09324529021978378, "learning_rate": 2.0941950987509072e-05, "loss": 9.3876, "step": 60810 }, { "epoch": 0.30372793328172987, "grad_norm": 0.08999308943748474, "learning_rate": 2.0940449072567526e-05, "loss": 9.3901, "step": 60820 }, { "epoch": 0.30377787210666934, "grad_norm": 0.09419707208871841, "learning_rate": 2.0938947157625973e-05, "loss": 9.3841, "step": 60830 }, { "epoch": 0.30382781093160877, "grad_norm": 0.09293625503778458, "learning_rate": 2.0937445242684423e-05, "loss": 9.3892, "step": 60840 }, { "epoch": 0.30387774975654824, "grad_norm": 0.09216861426830292, "learning_rate": 2.0935943327742873e-05, "loss": 9.3948, "step": 60850 }, { "epoch": 0.30392768858148766, "grad_norm": 0.0980692207813263, "learning_rate": 2.093444141280132e-05, "loss": 9.3888, "step": 60860 }, { "epoch": 0.30397762740642714, "grad_norm": 0.09464273601770401, "learning_rate": 2.0932939497859774e-05, "loss": 9.3793, "step": 60870 }, { "epoch": 0.30402756623136656, "grad_norm": 0.0960172563791275, "learning_rate": 2.093143758291822e-05, "loss": 9.3839, "step": 60880 }, { "epoch": 0.30407750505630604, "grad_norm": 0.09181035310029984, "learning_rate": 2.092993566797667e-05, "loss": 9.3835, "step": 60890 }, { "epoch": 0.30412744388124546, "grad_norm": 0.09236995130777359, "learning_rate": 2.092843375303512e-05, "loss": 9.3809, "step": 60900 }, { "epoch": 0.30417738270618494, "grad_norm": 0.08640799671411514, "learning_rate": 2.092693183809357e-05, "loss": 9.378, "step": 60910 }, { "epoch": 0.30422732153112436, "grad_norm": 0.0906277671456337, "learning_rate": 2.092542992315202e-05, "loss": 9.381, "step": 60920 }, { "epoch": 0.30427726035606384, "grad_norm": 0.10483743995428085, "learning_rate": 2.0923928008210468e-05, "loss": 9.3815, "step": 60930 }, { "epoch": 0.30432719918100326, "grad_norm": 0.08990181982517242, "learning_rate": 2.0922426093268918e-05, "loss": 9.3943, "step": 60940 }, { "epoch": 0.30437713800594274, "grad_norm": 0.09138570725917816, "learning_rate": 2.0920924178327368e-05, "loss": 9.3993, "step": 60950 }, { "epoch": 0.30442707683088216, "grad_norm": 0.0894051194190979, "learning_rate": 2.091942226338582e-05, "loss": 9.3783, "step": 60960 }, { "epoch": 0.30447701565582164, "grad_norm": 0.09751047194004059, "learning_rate": 2.091792034844427e-05, "loss": 9.3815, "step": 60970 }, { "epoch": 0.30452695448076106, "grad_norm": 0.09688021242618561, "learning_rate": 2.0916418433502715e-05, "loss": 9.3885, "step": 60980 }, { "epoch": 0.30457689330570054, "grad_norm": 0.09227102994918823, "learning_rate": 2.0914916518561166e-05, "loss": 9.3901, "step": 60990 }, { "epoch": 0.30462683213063996, "grad_norm": 0.09256913512945175, "learning_rate": 2.0913414603619616e-05, "loss": 9.3922, "step": 61000 }, { "epoch": 0.30467677095557943, "grad_norm": 0.08989210426807404, "learning_rate": 2.0911912688678066e-05, "loss": 9.3778, "step": 61010 }, { "epoch": 0.30472670978051886, "grad_norm": 0.09482806921005249, "learning_rate": 2.0910410773736516e-05, "loss": 9.3865, "step": 61020 }, { "epoch": 0.30477664860545833, "grad_norm": 0.09472500532865524, "learning_rate": 2.0908908858794963e-05, "loss": 9.3832, "step": 61030 }, { "epoch": 0.30482658743039776, "grad_norm": 0.09536511451005936, "learning_rate": 2.0907406943853413e-05, "loss": 9.3852, "step": 61040 }, { "epoch": 0.30487652625533723, "grad_norm": 0.0961252823472023, "learning_rate": 2.0905905028911863e-05, "loss": 9.3877, "step": 61050 }, { "epoch": 0.30492646508027665, "grad_norm": 0.0982387587428093, "learning_rate": 2.0904403113970313e-05, "loss": 9.3891, "step": 61060 }, { "epoch": 0.30497640390521613, "grad_norm": 0.09710310399532318, "learning_rate": 2.0902901199028764e-05, "loss": 9.3805, "step": 61070 }, { "epoch": 0.30502634273015555, "grad_norm": 0.09833624958992004, "learning_rate": 2.090139928408721e-05, "loss": 9.3872, "step": 61080 }, { "epoch": 0.30507628155509503, "grad_norm": 0.09045345336198807, "learning_rate": 2.089989736914566e-05, "loss": 9.3848, "step": 61090 }, { "epoch": 0.30512622038003445, "grad_norm": 0.0948140025138855, "learning_rate": 2.089839545420411e-05, "loss": 9.3753, "step": 61100 }, { "epoch": 0.30517615920497393, "grad_norm": 0.09809758514165878, "learning_rate": 2.089689353926256e-05, "loss": 9.3902, "step": 61110 }, { "epoch": 0.30522609802991335, "grad_norm": 0.09343090653419495, "learning_rate": 2.089539162432101e-05, "loss": 9.3775, "step": 61120 }, { "epoch": 0.30527603685485283, "grad_norm": 0.09006762504577637, "learning_rate": 2.0893889709379458e-05, "loss": 9.376, "step": 61130 }, { "epoch": 0.30532597567979225, "grad_norm": 0.09377524256706238, "learning_rate": 2.0892387794437908e-05, "loss": 9.3931, "step": 61140 }, { "epoch": 0.3053759145047317, "grad_norm": 0.09025995433330536, "learning_rate": 2.0890885879496358e-05, "loss": 9.3832, "step": 61150 }, { "epoch": 0.30542585332967115, "grad_norm": 0.09238019585609436, "learning_rate": 2.088938396455481e-05, "loss": 9.3832, "step": 61160 }, { "epoch": 0.3054757921546106, "grad_norm": 0.0937105119228363, "learning_rate": 2.088788204961326e-05, "loss": 9.3737, "step": 61170 }, { "epoch": 0.30552573097955005, "grad_norm": 0.10375673323869705, "learning_rate": 2.0886380134671705e-05, "loss": 9.3801, "step": 61180 }, { "epoch": 0.3055756698044895, "grad_norm": 0.09446416795253754, "learning_rate": 2.0884878219730156e-05, "loss": 9.3759, "step": 61190 }, { "epoch": 0.30562560862942895, "grad_norm": 0.10232505947351456, "learning_rate": 2.0883376304788606e-05, "loss": 9.3806, "step": 61200 }, { "epoch": 0.3056755474543684, "grad_norm": 0.09139604866504669, "learning_rate": 2.0881874389847056e-05, "loss": 9.385, "step": 61210 }, { "epoch": 0.30572548627930785, "grad_norm": 0.09424486756324768, "learning_rate": 2.0880372474905506e-05, "loss": 9.384, "step": 61220 }, { "epoch": 0.3057754251042473, "grad_norm": 0.10143927484750748, "learning_rate": 2.0878870559963956e-05, "loss": 9.3777, "step": 61230 }, { "epoch": 0.30582536392918674, "grad_norm": 0.09673741459846497, "learning_rate": 2.0877368645022403e-05, "loss": 9.3755, "step": 61240 }, { "epoch": 0.3058753027541262, "grad_norm": 0.08992431312799454, "learning_rate": 2.0875866730080853e-05, "loss": 9.3668, "step": 61250 }, { "epoch": 0.30592524157906564, "grad_norm": 0.0944616049528122, "learning_rate": 2.0874364815139303e-05, "loss": 9.3816, "step": 61260 }, { "epoch": 0.3059751804040051, "grad_norm": 0.09414882957935333, "learning_rate": 2.0872862900197754e-05, "loss": 9.3888, "step": 61270 }, { "epoch": 0.30602511922894454, "grad_norm": 0.0981215313076973, "learning_rate": 2.0871360985256204e-05, "loss": 9.3905, "step": 61280 }, { "epoch": 0.306075058053884, "grad_norm": 0.0904826819896698, "learning_rate": 2.086985907031465e-05, "loss": 9.3693, "step": 61290 }, { "epoch": 0.30612499687882344, "grad_norm": 0.09661656618118286, "learning_rate": 2.08683571553731e-05, "loss": 9.3803, "step": 61300 }, { "epoch": 0.30617493570376286, "grad_norm": 0.09500022977590561, "learning_rate": 2.086685524043155e-05, "loss": 9.369, "step": 61310 }, { "epoch": 0.30622487452870234, "grad_norm": 0.0918661504983902, "learning_rate": 2.086535332549e-05, "loss": 9.3814, "step": 61320 }, { "epoch": 0.30627481335364176, "grad_norm": 0.09368791431188583, "learning_rate": 2.086385141054845e-05, "loss": 9.3804, "step": 61330 }, { "epoch": 0.30632475217858124, "grad_norm": 0.09218349307775497, "learning_rate": 2.0862349495606898e-05, "loss": 9.3819, "step": 61340 }, { "epoch": 0.30637469100352066, "grad_norm": 0.10055679827928543, "learning_rate": 2.0860847580665348e-05, "loss": 9.3934, "step": 61350 }, { "epoch": 0.30642462982846014, "grad_norm": 0.09349419176578522, "learning_rate": 2.08593456657238e-05, "loss": 9.3864, "step": 61360 }, { "epoch": 0.30647456865339956, "grad_norm": 0.09518557041883469, "learning_rate": 2.085784375078225e-05, "loss": 9.3871, "step": 61370 }, { "epoch": 0.30652450747833904, "grad_norm": 0.09079370647668839, "learning_rate": 2.08563418358407e-05, "loss": 9.3785, "step": 61380 }, { "epoch": 0.30657444630327846, "grad_norm": 0.0915151834487915, "learning_rate": 2.0854839920899146e-05, "loss": 9.3852, "step": 61390 }, { "epoch": 0.30662438512821794, "grad_norm": 0.09642606973648071, "learning_rate": 2.0853338005957596e-05, "loss": 9.3797, "step": 61400 }, { "epoch": 0.30667432395315736, "grad_norm": 0.08891963213682175, "learning_rate": 2.0851836091016046e-05, "loss": 9.382, "step": 61410 }, { "epoch": 0.30672426277809683, "grad_norm": 0.0949094146490097, "learning_rate": 2.0850334176074496e-05, "loss": 9.3788, "step": 61420 }, { "epoch": 0.30677420160303626, "grad_norm": 0.0930948406457901, "learning_rate": 2.0848832261132946e-05, "loss": 9.3803, "step": 61430 }, { "epoch": 0.30682414042797573, "grad_norm": 0.09792865067720413, "learning_rate": 2.0847330346191393e-05, "loss": 9.3795, "step": 61440 }, { "epoch": 0.30687407925291516, "grad_norm": 0.09721236675977707, "learning_rate": 2.0845828431249843e-05, "loss": 9.3869, "step": 61450 }, { "epoch": 0.30692401807785463, "grad_norm": 0.095427006483078, "learning_rate": 2.0844326516308293e-05, "loss": 9.3857, "step": 61460 }, { "epoch": 0.30697395690279405, "grad_norm": 0.093628890812397, "learning_rate": 2.0842824601366744e-05, "loss": 9.3756, "step": 61470 }, { "epoch": 0.30702389572773353, "grad_norm": 0.09637012332677841, "learning_rate": 2.0841322686425194e-05, "loss": 9.3804, "step": 61480 }, { "epoch": 0.30707383455267295, "grad_norm": 0.09300060570240021, "learning_rate": 2.083982077148364e-05, "loss": 9.3687, "step": 61490 }, { "epoch": 0.30712377337761243, "grad_norm": 0.08992978185415268, "learning_rate": 2.083831885654209e-05, "loss": 9.382, "step": 61500 }, { "epoch": 0.30717371220255185, "grad_norm": 0.09597385674715042, "learning_rate": 2.083681694160054e-05, "loss": 9.379, "step": 61510 }, { "epoch": 0.30722365102749133, "grad_norm": 0.09319521486759186, "learning_rate": 2.083531502665899e-05, "loss": 9.3809, "step": 61520 }, { "epoch": 0.30727358985243075, "grad_norm": 0.09977719187736511, "learning_rate": 2.083381311171744e-05, "loss": 9.3695, "step": 61530 }, { "epoch": 0.30732352867737023, "grad_norm": 0.09011849015951157, "learning_rate": 2.0832311196775888e-05, "loss": 9.3751, "step": 61540 }, { "epoch": 0.30737346750230965, "grad_norm": 0.09430202841758728, "learning_rate": 2.083080928183434e-05, "loss": 9.3763, "step": 61550 }, { "epoch": 0.3074234063272491, "grad_norm": 0.09825265407562256, "learning_rate": 2.082930736689279e-05, "loss": 9.3643, "step": 61560 }, { "epoch": 0.30747334515218855, "grad_norm": 0.09236638993024826, "learning_rate": 2.082780545195124e-05, "loss": 9.3792, "step": 61570 }, { "epoch": 0.307523283977128, "grad_norm": 0.09510213881731033, "learning_rate": 2.082630353700969e-05, "loss": 9.3854, "step": 61580 }, { "epoch": 0.30757322280206745, "grad_norm": 0.09195613861083984, "learning_rate": 2.0824801622068136e-05, "loss": 9.3655, "step": 61590 }, { "epoch": 0.3076231616270069, "grad_norm": 0.09247229248285294, "learning_rate": 2.082329970712659e-05, "loss": 9.3756, "step": 61600 }, { "epoch": 0.30767310045194635, "grad_norm": 0.08966951817274094, "learning_rate": 2.0821797792185036e-05, "loss": 9.379, "step": 61610 }, { "epoch": 0.3077230392768858, "grad_norm": 0.10270446538925171, "learning_rate": 2.0820295877243486e-05, "loss": 9.3774, "step": 61620 }, { "epoch": 0.30777297810182525, "grad_norm": 0.09327306598424911, "learning_rate": 2.0818793962301936e-05, "loss": 9.3764, "step": 61630 }, { "epoch": 0.3078229169267647, "grad_norm": 0.09807731211185455, "learning_rate": 2.0817292047360383e-05, "loss": 9.3884, "step": 61640 }, { "epoch": 0.30787285575170414, "grad_norm": 0.09317595511674881, "learning_rate": 2.0815790132418837e-05, "loss": 9.3817, "step": 61650 }, { "epoch": 0.3079227945766436, "grad_norm": 0.09404472261667252, "learning_rate": 2.0814288217477283e-05, "loss": 9.3812, "step": 61660 }, { "epoch": 0.30797273340158304, "grad_norm": 0.0946296826004982, "learning_rate": 2.0812786302535734e-05, "loss": 9.3784, "step": 61670 }, { "epoch": 0.3080226722265225, "grad_norm": 0.09800449013710022, "learning_rate": 2.0811284387594184e-05, "loss": 9.3691, "step": 61680 }, { "epoch": 0.30807261105146194, "grad_norm": 0.09789472073316574, "learning_rate": 2.080978247265263e-05, "loss": 9.3718, "step": 61690 }, { "epoch": 0.3081225498764014, "grad_norm": 0.0944046601653099, "learning_rate": 2.0808280557711084e-05, "loss": 9.3751, "step": 61700 }, { "epoch": 0.30817248870134084, "grad_norm": 0.09153825044631958, "learning_rate": 2.080677864276953e-05, "loss": 9.3673, "step": 61710 }, { "epoch": 0.3082224275262803, "grad_norm": 0.09950811415910721, "learning_rate": 2.080527672782798e-05, "loss": 9.3759, "step": 61720 }, { "epoch": 0.30827236635121974, "grad_norm": 0.09401055425405502, "learning_rate": 2.080377481288643e-05, "loss": 9.3752, "step": 61730 }, { "epoch": 0.3083223051761592, "grad_norm": 0.09673509001731873, "learning_rate": 2.0802272897944878e-05, "loss": 9.3674, "step": 61740 }, { "epoch": 0.30837224400109864, "grad_norm": 0.09277281165122986, "learning_rate": 2.080077098300333e-05, "loss": 9.3649, "step": 61750 }, { "epoch": 0.3084221828260381, "grad_norm": 0.09555141627788544, "learning_rate": 2.079926906806178e-05, "loss": 9.3666, "step": 61760 }, { "epoch": 0.30847212165097754, "grad_norm": 0.09273938834667206, "learning_rate": 2.079776715312023e-05, "loss": 9.3702, "step": 61770 }, { "epoch": 0.308522060475917, "grad_norm": 0.09557077288627625, "learning_rate": 2.079626523817868e-05, "loss": 9.3766, "step": 61780 }, { "epoch": 0.30857199930085644, "grad_norm": 0.09353851526975632, "learning_rate": 2.0794763323237126e-05, "loss": 9.3753, "step": 61790 }, { "epoch": 0.3086219381257959, "grad_norm": 0.09252606332302094, "learning_rate": 2.079326140829558e-05, "loss": 9.3661, "step": 61800 }, { "epoch": 0.30867187695073534, "grad_norm": 0.09201821684837341, "learning_rate": 2.0791759493354026e-05, "loss": 9.3726, "step": 61810 }, { "epoch": 0.3087218157756748, "grad_norm": 0.09545756131410599, "learning_rate": 2.0790257578412476e-05, "loss": 9.3795, "step": 61820 }, { "epoch": 0.30877175460061423, "grad_norm": 0.0946696475148201, "learning_rate": 2.0788755663470926e-05, "loss": 9.3637, "step": 61830 }, { "epoch": 0.3088216934255537, "grad_norm": 0.09465292096138, "learning_rate": 2.0787253748529373e-05, "loss": 9.3803, "step": 61840 }, { "epoch": 0.30887163225049313, "grad_norm": 0.09846868366003036, "learning_rate": 2.0785751833587827e-05, "loss": 9.3652, "step": 61850 }, { "epoch": 0.3089215710754326, "grad_norm": 0.09574449062347412, "learning_rate": 2.0784249918646273e-05, "loss": 9.3696, "step": 61860 }, { "epoch": 0.30897150990037203, "grad_norm": 0.09601958841085434, "learning_rate": 2.0782748003704724e-05, "loss": 9.3682, "step": 61870 }, { "epoch": 0.3090214487253115, "grad_norm": 0.0923629179596901, "learning_rate": 2.0781246088763174e-05, "loss": 9.3626, "step": 61880 }, { "epoch": 0.30907138755025093, "grad_norm": 0.09300635010004044, "learning_rate": 2.077974417382162e-05, "loss": 9.3663, "step": 61890 }, { "epoch": 0.3091213263751904, "grad_norm": 0.09495260566473007, "learning_rate": 2.0778242258880074e-05, "loss": 9.3814, "step": 61900 }, { "epoch": 0.30917126520012983, "grad_norm": 0.0935489758849144, "learning_rate": 2.077674034393852e-05, "loss": 9.3656, "step": 61910 }, { "epoch": 0.3092212040250693, "grad_norm": 0.09819766879081726, "learning_rate": 2.0775238428996975e-05, "loss": 9.3711, "step": 61920 }, { "epoch": 0.30927114285000873, "grad_norm": 0.09066694229841232, "learning_rate": 2.077373651405542e-05, "loss": 9.3818, "step": 61930 }, { "epoch": 0.3093210816749482, "grad_norm": 0.09707902371883392, "learning_rate": 2.0772234599113868e-05, "loss": 9.3693, "step": 61940 }, { "epoch": 0.30937102049988763, "grad_norm": 0.09382162988185883, "learning_rate": 2.077073268417232e-05, "loss": 9.3681, "step": 61950 }, { "epoch": 0.3094209593248271, "grad_norm": 0.09599854052066803, "learning_rate": 2.076923076923077e-05, "loss": 9.3744, "step": 61960 }, { "epoch": 0.3094708981497665, "grad_norm": 0.09321361035108566, "learning_rate": 2.0767728854289222e-05, "loss": 9.3718, "step": 61970 }, { "epoch": 0.309520836974706, "grad_norm": 0.09474252909421921, "learning_rate": 2.076622693934767e-05, "loss": 9.3747, "step": 61980 }, { "epoch": 0.3095707757996454, "grad_norm": 0.09249020367860794, "learning_rate": 2.0764725024406116e-05, "loss": 9.3677, "step": 61990 }, { "epoch": 0.3096207146245849, "grad_norm": 0.09276629239320755, "learning_rate": 2.076322310946457e-05, "loss": 9.375, "step": 62000 }, { "epoch": 0.3096706534495243, "grad_norm": 0.09531158208847046, "learning_rate": 2.0761721194523016e-05, "loss": 9.3636, "step": 62010 }, { "epoch": 0.3097205922744638, "grad_norm": 0.09786634147167206, "learning_rate": 2.076021927958147e-05, "loss": 9.3572, "step": 62020 }, { "epoch": 0.3097705310994032, "grad_norm": 0.09066082537174225, "learning_rate": 2.0758717364639916e-05, "loss": 9.3621, "step": 62030 }, { "epoch": 0.3098204699243427, "grad_norm": 0.09768658876419067, "learning_rate": 2.0757215449698363e-05, "loss": 9.3643, "step": 62040 }, { "epoch": 0.3098704087492821, "grad_norm": 0.09411395341157913, "learning_rate": 2.0755713534756817e-05, "loss": 9.3757, "step": 62050 }, { "epoch": 0.3099203475742216, "grad_norm": 0.09201698750257492, "learning_rate": 2.0754211619815263e-05, "loss": 9.3652, "step": 62060 }, { "epoch": 0.309970286399161, "grad_norm": 0.09616990387439728, "learning_rate": 2.0752709704873717e-05, "loss": 9.3675, "step": 62070 }, { "epoch": 0.3100202252241005, "grad_norm": 0.09069666266441345, "learning_rate": 2.0751207789932164e-05, "loss": 9.3703, "step": 62080 }, { "epoch": 0.3100701640490399, "grad_norm": 0.09700020402669907, "learning_rate": 2.074970587499061e-05, "loss": 9.3644, "step": 62090 }, { "epoch": 0.3101201028739794, "grad_norm": 0.10156318545341492, "learning_rate": 2.0748203960049064e-05, "loss": 9.3651, "step": 62100 }, { "epoch": 0.3101700416989188, "grad_norm": 0.09886864572763443, "learning_rate": 2.074670204510751e-05, "loss": 9.3718, "step": 62110 }, { "epoch": 0.3102199805238583, "grad_norm": 0.09852635115385056, "learning_rate": 2.0745200130165965e-05, "loss": 9.3617, "step": 62120 }, { "epoch": 0.3102699193487977, "grad_norm": 0.09089913219213486, "learning_rate": 2.074369821522441e-05, "loss": 9.3574, "step": 62130 }, { "epoch": 0.3103198581737372, "grad_norm": 0.08944336324930191, "learning_rate": 2.0742196300282858e-05, "loss": 9.3706, "step": 62140 }, { "epoch": 0.3103697969986766, "grad_norm": 0.09155122190713882, "learning_rate": 2.074069438534131e-05, "loss": 9.3682, "step": 62150 }, { "epoch": 0.3104197358236161, "grad_norm": 0.09597411006689072, "learning_rate": 2.073919247039976e-05, "loss": 9.3717, "step": 62160 }, { "epoch": 0.3104696746485555, "grad_norm": 0.09866214543581009, "learning_rate": 2.0737690555458212e-05, "loss": 9.3543, "step": 62170 }, { "epoch": 0.310519613473495, "grad_norm": 0.09262767434120178, "learning_rate": 2.073618864051666e-05, "loss": 9.3623, "step": 62180 }, { "epoch": 0.3105695522984344, "grad_norm": 0.08718739449977875, "learning_rate": 2.0734686725575106e-05, "loss": 9.3704, "step": 62190 }, { "epoch": 0.3106194911233739, "grad_norm": 0.08802207559347153, "learning_rate": 2.073318481063356e-05, "loss": 9.3702, "step": 62200 }, { "epoch": 0.3106694299483133, "grad_norm": 0.09451784193515778, "learning_rate": 2.0731682895692006e-05, "loss": 9.3645, "step": 62210 }, { "epoch": 0.3107193687732528, "grad_norm": 0.09479044377803802, "learning_rate": 2.073018098075046e-05, "loss": 9.3502, "step": 62220 }, { "epoch": 0.3107693075981922, "grad_norm": 0.09419619292020798, "learning_rate": 2.0728679065808906e-05, "loss": 9.3656, "step": 62230 }, { "epoch": 0.3108192464231317, "grad_norm": 0.09506448358297348, "learning_rate": 2.0727177150867357e-05, "loss": 9.3581, "step": 62240 }, { "epoch": 0.3108691852480711, "grad_norm": 0.09851628541946411, "learning_rate": 2.0725675235925807e-05, "loss": 9.362, "step": 62250 }, { "epoch": 0.3109191240730106, "grad_norm": 0.0938975065946579, "learning_rate": 2.0724173320984253e-05, "loss": 9.3594, "step": 62260 }, { "epoch": 0.31096906289795, "grad_norm": 0.0952107235789299, "learning_rate": 2.0722671406042707e-05, "loss": 9.3598, "step": 62270 }, { "epoch": 0.3110190017228895, "grad_norm": 0.09428625553846359, "learning_rate": 2.0721169491101154e-05, "loss": 9.3688, "step": 62280 }, { "epoch": 0.3110689405478289, "grad_norm": 0.09929768741130829, "learning_rate": 2.0719667576159604e-05, "loss": 9.3563, "step": 62290 }, { "epoch": 0.31111887937276833, "grad_norm": 0.09252603352069855, "learning_rate": 2.0718165661218054e-05, "loss": 9.3555, "step": 62300 }, { "epoch": 0.3111688181977078, "grad_norm": 0.0918884426355362, "learning_rate": 2.07166637462765e-05, "loss": 9.3604, "step": 62310 }, { "epoch": 0.31121875702264723, "grad_norm": 0.09412012249231339, "learning_rate": 2.0715161831334955e-05, "loss": 9.36, "step": 62320 }, { "epoch": 0.3112686958475867, "grad_norm": 0.08864130824804306, "learning_rate": 2.07136599163934e-05, "loss": 9.3741, "step": 62330 }, { "epoch": 0.31131863467252613, "grad_norm": 0.09902546554803848, "learning_rate": 2.071215800145185e-05, "loss": 9.3707, "step": 62340 }, { "epoch": 0.3113685734974656, "grad_norm": 0.0945492535829544, "learning_rate": 2.07106560865103e-05, "loss": 9.362, "step": 62350 }, { "epoch": 0.31141851232240503, "grad_norm": 0.0934128388762474, "learning_rate": 2.070915417156875e-05, "loss": 9.3579, "step": 62360 }, { "epoch": 0.3114684511473445, "grad_norm": 0.10103520005941391, "learning_rate": 2.0707652256627202e-05, "loss": 9.3705, "step": 62370 }, { "epoch": 0.3115183899722839, "grad_norm": 0.09728195518255234, "learning_rate": 2.070615034168565e-05, "loss": 9.3674, "step": 62380 }, { "epoch": 0.3115683287972234, "grad_norm": 0.0965341180562973, "learning_rate": 2.07046484267441e-05, "loss": 9.368, "step": 62390 }, { "epoch": 0.3116182676221628, "grad_norm": 0.09560953825712204, "learning_rate": 2.070314651180255e-05, "loss": 9.3595, "step": 62400 }, { "epoch": 0.3116682064471023, "grad_norm": 0.09266313165426254, "learning_rate": 2.0701644596860996e-05, "loss": 9.3655, "step": 62410 }, { "epoch": 0.3117181452720417, "grad_norm": 0.09788119792938232, "learning_rate": 2.070014268191945e-05, "loss": 9.3602, "step": 62420 }, { "epoch": 0.3117680840969812, "grad_norm": 0.09870301932096481, "learning_rate": 2.0698640766977896e-05, "loss": 9.3552, "step": 62430 }, { "epoch": 0.3118180229219206, "grad_norm": 0.09183664619922638, "learning_rate": 2.0697138852036347e-05, "loss": 9.3642, "step": 62440 }, { "epoch": 0.3118679617468601, "grad_norm": 0.09062953293323517, "learning_rate": 2.0695636937094797e-05, "loss": 9.3599, "step": 62450 }, { "epoch": 0.3119179005717995, "grad_norm": 0.08905572444200516, "learning_rate": 2.0694135022153243e-05, "loss": 9.3629, "step": 62460 }, { "epoch": 0.311967839396739, "grad_norm": 0.08906285464763641, "learning_rate": 2.0692633107211697e-05, "loss": 9.3675, "step": 62470 }, { "epoch": 0.3120177782216784, "grad_norm": 0.09582462906837463, "learning_rate": 2.0691131192270144e-05, "loss": 9.3733, "step": 62480 }, { "epoch": 0.3120677170466179, "grad_norm": 0.09110390394926071, "learning_rate": 2.0689629277328594e-05, "loss": 9.3662, "step": 62490 }, { "epoch": 0.3121176558715573, "grad_norm": 0.09354531019926071, "learning_rate": 2.0688127362387044e-05, "loss": 9.3623, "step": 62500 }, { "epoch": 0.3121675946964968, "grad_norm": 0.0941261276602745, "learning_rate": 2.068662544744549e-05, "loss": 9.3616, "step": 62510 }, { "epoch": 0.3122175335214362, "grad_norm": 0.09080076217651367, "learning_rate": 2.0685123532503945e-05, "loss": 9.3641, "step": 62520 }, { "epoch": 0.3122674723463757, "grad_norm": 0.09199047833681107, "learning_rate": 2.068362161756239e-05, "loss": 9.3541, "step": 62530 }, { "epoch": 0.3123174111713151, "grad_norm": 0.09623630344867706, "learning_rate": 2.068211970262084e-05, "loss": 9.3581, "step": 62540 }, { "epoch": 0.3123673499962546, "grad_norm": 0.09023137390613556, "learning_rate": 2.0680617787679292e-05, "loss": 9.3669, "step": 62550 }, { "epoch": 0.312417288821194, "grad_norm": 0.09632597118616104, "learning_rate": 2.0679115872737742e-05, "loss": 9.3582, "step": 62560 }, { "epoch": 0.3124672276461335, "grad_norm": 0.09074317663908005, "learning_rate": 2.0677613957796192e-05, "loss": 9.3596, "step": 62570 }, { "epoch": 0.3125171664710729, "grad_norm": 0.09316986799240112, "learning_rate": 2.067611204285464e-05, "loss": 9.3562, "step": 62580 }, { "epoch": 0.3125671052960124, "grad_norm": 0.09124487638473511, "learning_rate": 2.067461012791309e-05, "loss": 9.3615, "step": 62590 }, { "epoch": 0.3126170441209518, "grad_norm": 0.09576891362667084, "learning_rate": 2.067310821297154e-05, "loss": 9.3529, "step": 62600 }, { "epoch": 0.3126669829458913, "grad_norm": 0.09395425021648407, "learning_rate": 2.067160629802999e-05, "loss": 9.3519, "step": 62610 }, { "epoch": 0.3127169217708307, "grad_norm": 0.09081248939037323, "learning_rate": 2.067010438308844e-05, "loss": 9.3598, "step": 62620 }, { "epoch": 0.3127668605957702, "grad_norm": 0.09516331553459167, "learning_rate": 2.0668602468146886e-05, "loss": 9.3653, "step": 62630 }, { "epoch": 0.3128167994207096, "grad_norm": 0.09487758576869965, "learning_rate": 2.0667100553205337e-05, "loss": 9.3517, "step": 62640 }, { "epoch": 0.3128667382456491, "grad_norm": 0.0971754714846611, "learning_rate": 2.0665598638263787e-05, "loss": 9.3476, "step": 62650 }, { "epoch": 0.3129166770705885, "grad_norm": 0.09306435286998749, "learning_rate": 2.0664096723322237e-05, "loss": 9.3561, "step": 62660 }, { "epoch": 0.312966615895528, "grad_norm": 0.0970197319984436, "learning_rate": 2.0662594808380687e-05, "loss": 9.3684, "step": 62670 }, { "epoch": 0.3130165547204674, "grad_norm": 0.09373274445533752, "learning_rate": 2.0661092893439134e-05, "loss": 9.3663, "step": 62680 }, { "epoch": 0.3130664935454069, "grad_norm": 0.09603800624608994, "learning_rate": 2.0659590978497584e-05, "loss": 9.3468, "step": 62690 }, { "epoch": 0.3131164323703463, "grad_norm": 0.09086347371339798, "learning_rate": 2.0658089063556034e-05, "loss": 9.3515, "step": 62700 }, { "epoch": 0.3131663711952858, "grad_norm": 0.09210800379514694, "learning_rate": 2.0656587148614484e-05, "loss": 9.3627, "step": 62710 }, { "epoch": 0.3132163100202252, "grad_norm": 0.09706009924411774, "learning_rate": 2.0655085233672935e-05, "loss": 9.3576, "step": 62720 }, { "epoch": 0.3132662488451647, "grad_norm": 0.09735792875289917, "learning_rate": 2.065358331873138e-05, "loss": 9.3584, "step": 62730 }, { "epoch": 0.3133161876701041, "grad_norm": 0.08968986570835114, "learning_rate": 2.065208140378983e-05, "loss": 9.3586, "step": 62740 }, { "epoch": 0.3133661264950436, "grad_norm": 0.09612874686717987, "learning_rate": 2.0650579488848282e-05, "loss": 9.3559, "step": 62750 }, { "epoch": 0.313416065319983, "grad_norm": 0.10443656146526337, "learning_rate": 2.0649077573906732e-05, "loss": 9.3511, "step": 62760 }, { "epoch": 0.3134660041449225, "grad_norm": 0.09903521835803986, "learning_rate": 2.0647575658965182e-05, "loss": 9.3614, "step": 62770 }, { "epoch": 0.3135159429698619, "grad_norm": 0.09267803281545639, "learning_rate": 2.064607374402363e-05, "loss": 9.3444, "step": 62780 }, { "epoch": 0.3135658817948014, "grad_norm": 0.09199144691228867, "learning_rate": 2.064457182908208e-05, "loss": 9.3585, "step": 62790 }, { "epoch": 0.3136158206197408, "grad_norm": 0.09724773466587067, "learning_rate": 2.064306991414053e-05, "loss": 9.352, "step": 62800 }, { "epoch": 0.3136657594446803, "grad_norm": 0.10160032659769058, "learning_rate": 2.064156799919898e-05, "loss": 9.3515, "step": 62810 }, { "epoch": 0.3137156982696197, "grad_norm": 0.09469520300626755, "learning_rate": 2.064006608425743e-05, "loss": 9.3425, "step": 62820 }, { "epoch": 0.3137656370945592, "grad_norm": 0.09688717126846313, "learning_rate": 2.0638564169315876e-05, "loss": 9.3486, "step": 62830 }, { "epoch": 0.3138155759194986, "grad_norm": 0.09648570418357849, "learning_rate": 2.0637062254374327e-05, "loss": 9.365, "step": 62840 }, { "epoch": 0.3138655147444381, "grad_norm": 0.09986833482980728, "learning_rate": 2.0635560339432777e-05, "loss": 9.3472, "step": 62850 }, { "epoch": 0.3139154535693775, "grad_norm": 0.09859945625066757, "learning_rate": 2.0634058424491227e-05, "loss": 9.3553, "step": 62860 }, { "epoch": 0.313965392394317, "grad_norm": 0.09469161182641983, "learning_rate": 2.0632556509549677e-05, "loss": 9.351, "step": 62870 }, { "epoch": 0.3140153312192564, "grad_norm": 0.10031906515359879, "learning_rate": 2.0631054594608127e-05, "loss": 9.3604, "step": 62880 }, { "epoch": 0.3140652700441959, "grad_norm": 0.09583910554647446, "learning_rate": 2.0629552679666574e-05, "loss": 9.3615, "step": 62890 }, { "epoch": 0.3141152088691353, "grad_norm": 0.09656809270381927, "learning_rate": 2.0628050764725024e-05, "loss": 9.3522, "step": 62900 }, { "epoch": 0.3141651476940748, "grad_norm": 0.08953146636486053, "learning_rate": 2.0626548849783474e-05, "loss": 9.3564, "step": 62910 }, { "epoch": 0.3142150865190142, "grad_norm": 0.09325104206800461, "learning_rate": 2.0625046934841925e-05, "loss": 9.3426, "step": 62920 }, { "epoch": 0.3142650253439537, "grad_norm": 0.09127029776573181, "learning_rate": 2.0623545019900375e-05, "loss": 9.3543, "step": 62930 }, { "epoch": 0.3143149641688931, "grad_norm": 0.09044724702835083, "learning_rate": 2.062204310495882e-05, "loss": 9.3675, "step": 62940 }, { "epoch": 0.3143649029938326, "grad_norm": 0.0922994613647461, "learning_rate": 2.0620541190017272e-05, "loss": 9.3608, "step": 62950 }, { "epoch": 0.314414841818772, "grad_norm": 0.09617674350738525, "learning_rate": 2.0619039275075722e-05, "loss": 9.353, "step": 62960 }, { "epoch": 0.3144647806437115, "grad_norm": 0.09723023325204849, "learning_rate": 2.0617537360134172e-05, "loss": 9.3553, "step": 62970 }, { "epoch": 0.3145147194686509, "grad_norm": 0.09201812744140625, "learning_rate": 2.0616035445192622e-05, "loss": 9.3563, "step": 62980 }, { "epoch": 0.31456465829359037, "grad_norm": 0.09177816659212112, "learning_rate": 2.061453353025107e-05, "loss": 9.3471, "step": 62990 }, { "epoch": 0.3146145971185298, "grad_norm": 0.09503309428691864, "learning_rate": 2.061303161530952e-05, "loss": 9.3563, "step": 63000 }, { "epoch": 0.31466453594346927, "grad_norm": 0.0928831547498703, "learning_rate": 2.061152970036797e-05, "loss": 9.3518, "step": 63010 }, { "epoch": 0.3147144747684087, "grad_norm": 0.09431839734315872, "learning_rate": 2.061002778542642e-05, "loss": 9.3627, "step": 63020 }, { "epoch": 0.31476441359334817, "grad_norm": 0.08945141732692719, "learning_rate": 2.060852587048487e-05, "loss": 9.3505, "step": 63030 }, { "epoch": 0.3148143524182876, "grad_norm": 0.09035779535770416, "learning_rate": 2.0607023955543317e-05, "loss": 9.3552, "step": 63040 }, { "epoch": 0.31486429124322707, "grad_norm": 0.09948616474866867, "learning_rate": 2.0605522040601767e-05, "loss": 9.3376, "step": 63050 }, { "epoch": 0.3149142300681665, "grad_norm": 0.09741248935461044, "learning_rate": 2.0604020125660217e-05, "loss": 9.343, "step": 63060 }, { "epoch": 0.31496416889310597, "grad_norm": 0.0959203913807869, "learning_rate": 2.0602518210718667e-05, "loss": 9.3452, "step": 63070 }, { "epoch": 0.3150141077180454, "grad_norm": 0.09166992455720901, "learning_rate": 2.0601016295777117e-05, "loss": 9.3536, "step": 63080 }, { "epoch": 0.31506404654298487, "grad_norm": 0.09764059633016586, "learning_rate": 2.0599514380835564e-05, "loss": 9.3498, "step": 63090 }, { "epoch": 0.3151139853679243, "grad_norm": 0.09447203576564789, "learning_rate": 2.0598012465894014e-05, "loss": 9.3514, "step": 63100 }, { "epoch": 0.31516392419286376, "grad_norm": 0.09235192835330963, "learning_rate": 2.0596510550952464e-05, "loss": 9.3443, "step": 63110 }, { "epoch": 0.3152138630178032, "grad_norm": 0.09150667488574982, "learning_rate": 2.0595008636010915e-05, "loss": 9.3494, "step": 63120 }, { "epoch": 0.31526380184274266, "grad_norm": 0.09310675412416458, "learning_rate": 2.0593506721069365e-05, "loss": 9.3463, "step": 63130 }, { "epoch": 0.3153137406676821, "grad_norm": 0.09127092361450195, "learning_rate": 2.059200480612781e-05, "loss": 9.3564, "step": 63140 }, { "epoch": 0.31536367949262156, "grad_norm": 0.09105300903320312, "learning_rate": 2.0590502891186262e-05, "loss": 9.3598, "step": 63150 }, { "epoch": 0.315413618317561, "grad_norm": 0.09039835631847382, "learning_rate": 2.0589000976244712e-05, "loss": 9.3697, "step": 63160 }, { "epoch": 0.31546355714250046, "grad_norm": 0.09602472931146622, "learning_rate": 2.0587499061303162e-05, "loss": 9.3542, "step": 63170 }, { "epoch": 0.3155134959674399, "grad_norm": 0.09388317167758942, "learning_rate": 2.0585997146361612e-05, "loss": 9.3465, "step": 63180 }, { "epoch": 0.31556343479237936, "grad_norm": 0.092608243227005, "learning_rate": 2.058449523142006e-05, "loss": 9.3552, "step": 63190 }, { "epoch": 0.3156133736173188, "grad_norm": 0.09442637860774994, "learning_rate": 2.0582993316478513e-05, "loss": 9.3467, "step": 63200 }, { "epoch": 0.31566331244225826, "grad_norm": 0.09088288247585297, "learning_rate": 2.058149140153696e-05, "loss": 9.3525, "step": 63210 }, { "epoch": 0.3157132512671977, "grad_norm": 0.09530657529830933, "learning_rate": 2.057998948659541e-05, "loss": 9.3462, "step": 63220 }, { "epoch": 0.31576319009213716, "grad_norm": 0.09368572384119034, "learning_rate": 2.057848757165386e-05, "loss": 9.3582, "step": 63230 }, { "epoch": 0.3158131289170766, "grad_norm": 0.09014841169118881, "learning_rate": 2.0576985656712307e-05, "loss": 9.3449, "step": 63240 }, { "epoch": 0.31586306774201606, "grad_norm": 0.09297845512628555, "learning_rate": 2.057548374177076e-05, "loss": 9.3541, "step": 63250 }, { "epoch": 0.3159130065669555, "grad_norm": 0.09388581663370132, "learning_rate": 2.0573981826829207e-05, "loss": 9.3532, "step": 63260 }, { "epoch": 0.31596294539189496, "grad_norm": 0.09536658972501755, "learning_rate": 2.0572479911887657e-05, "loss": 9.3481, "step": 63270 }, { "epoch": 0.3160128842168344, "grad_norm": 0.09043373167514801, "learning_rate": 2.0570977996946107e-05, "loss": 9.3532, "step": 63280 }, { "epoch": 0.3160628230417738, "grad_norm": 0.09627131372690201, "learning_rate": 2.0569476082004554e-05, "loss": 9.3527, "step": 63290 }, { "epoch": 0.3161127618667133, "grad_norm": 0.09719482809305191, "learning_rate": 2.0567974167063008e-05, "loss": 9.348, "step": 63300 }, { "epoch": 0.3161627006916527, "grad_norm": 0.09590967744588852, "learning_rate": 2.0566472252121454e-05, "loss": 9.3409, "step": 63310 }, { "epoch": 0.3162126395165922, "grad_norm": 0.09232126921415329, "learning_rate": 2.0564970337179905e-05, "loss": 9.3467, "step": 63320 }, { "epoch": 0.3162625783415316, "grad_norm": 0.09349498897790909, "learning_rate": 2.0563468422238355e-05, "loss": 9.3443, "step": 63330 }, { "epoch": 0.3163125171664711, "grad_norm": 0.09130989015102386, "learning_rate": 2.05619665072968e-05, "loss": 9.3482, "step": 63340 }, { "epoch": 0.3163624559914105, "grad_norm": 0.09377502650022507, "learning_rate": 2.0560464592355255e-05, "loss": 9.3469, "step": 63350 }, { "epoch": 0.31641239481635, "grad_norm": 0.08837824314832687, "learning_rate": 2.0558962677413702e-05, "loss": 9.3549, "step": 63360 }, { "epoch": 0.3164623336412894, "grad_norm": 0.08911017328500748, "learning_rate": 2.0557460762472152e-05, "loss": 9.3441, "step": 63370 }, { "epoch": 0.31651227246622887, "grad_norm": 0.09551316499710083, "learning_rate": 2.0555958847530602e-05, "loss": 9.3406, "step": 63380 }, { "epoch": 0.3165622112911683, "grad_norm": 0.098643459379673, "learning_rate": 2.0554456932589052e-05, "loss": 9.3425, "step": 63390 }, { "epoch": 0.31661215011610777, "grad_norm": 0.09688541293144226, "learning_rate": 2.0552955017647503e-05, "loss": 9.3504, "step": 63400 }, { "epoch": 0.3166620889410472, "grad_norm": 0.0948943942785263, "learning_rate": 2.055145310270595e-05, "loss": 9.3492, "step": 63410 }, { "epoch": 0.31671202776598667, "grad_norm": 0.09211491793394089, "learning_rate": 2.05499511877644e-05, "loss": 9.3537, "step": 63420 }, { "epoch": 0.3167619665909261, "grad_norm": 0.0964820459485054, "learning_rate": 2.054844927282285e-05, "loss": 9.339, "step": 63430 }, { "epoch": 0.31681190541586557, "grad_norm": 0.09166949987411499, "learning_rate": 2.05469473578813e-05, "loss": 9.3545, "step": 63440 }, { "epoch": 0.316861844240805, "grad_norm": 0.10011352598667145, "learning_rate": 2.054544544293975e-05, "loss": 9.3521, "step": 63450 }, { "epoch": 0.31691178306574447, "grad_norm": 0.0967571884393692, "learning_rate": 2.0543943527998197e-05, "loss": 9.3413, "step": 63460 }, { "epoch": 0.3169617218906839, "grad_norm": 0.09536176174879074, "learning_rate": 2.0542441613056647e-05, "loss": 9.3398, "step": 63470 }, { "epoch": 0.31701166071562337, "grad_norm": 0.09382636100053787, "learning_rate": 2.0540939698115097e-05, "loss": 9.3471, "step": 63480 }, { "epoch": 0.3170615995405628, "grad_norm": 0.09492380917072296, "learning_rate": 2.0539437783173547e-05, "loss": 9.3293, "step": 63490 }, { "epoch": 0.31711153836550227, "grad_norm": 0.09516463428735733, "learning_rate": 2.0537935868231998e-05, "loss": 9.3534, "step": 63500 }, { "epoch": 0.3171614771904417, "grad_norm": 0.09900394827127457, "learning_rate": 2.0536433953290444e-05, "loss": 9.3535, "step": 63510 }, { "epoch": 0.31721141601538116, "grad_norm": 0.09245557337999344, "learning_rate": 2.0534932038348898e-05, "loss": 9.3483, "step": 63520 }, { "epoch": 0.3172613548403206, "grad_norm": 0.09061167389154434, "learning_rate": 2.0533430123407345e-05, "loss": 9.348, "step": 63530 }, { "epoch": 0.31731129366526006, "grad_norm": 0.09653984010219574, "learning_rate": 2.0531928208465795e-05, "loss": 9.344, "step": 63540 }, { "epoch": 0.3173612324901995, "grad_norm": 0.09344928711652756, "learning_rate": 2.0530426293524245e-05, "loss": 9.355, "step": 63550 }, { "epoch": 0.31741117131513896, "grad_norm": 0.09327616542577744, "learning_rate": 2.0528924378582692e-05, "loss": 9.3527, "step": 63560 }, { "epoch": 0.3174611101400784, "grad_norm": 0.088762067258358, "learning_rate": 2.0527422463641146e-05, "loss": 9.3329, "step": 63570 }, { "epoch": 0.31751104896501786, "grad_norm": 0.09158555418252945, "learning_rate": 2.0525920548699592e-05, "loss": 9.3442, "step": 63580 }, { "epoch": 0.3175609877899573, "grad_norm": 0.0912177637219429, "learning_rate": 2.0524418633758042e-05, "loss": 9.3425, "step": 63590 }, { "epoch": 0.31761092661489676, "grad_norm": 0.08927998691797256, "learning_rate": 2.0522916718816493e-05, "loss": 9.3505, "step": 63600 }, { "epoch": 0.3176608654398362, "grad_norm": 0.09646981954574585, "learning_rate": 2.052141480387494e-05, "loss": 9.338, "step": 63610 }, { "epoch": 0.31771080426477566, "grad_norm": 0.10072862356901169, "learning_rate": 2.0519912888933393e-05, "loss": 9.3302, "step": 63620 }, { "epoch": 0.3177607430897151, "grad_norm": 0.09051260352134705, "learning_rate": 2.051841097399184e-05, "loss": 9.342, "step": 63630 }, { "epoch": 0.31781068191465456, "grad_norm": 0.09302885830402374, "learning_rate": 2.051690905905029e-05, "loss": 9.338, "step": 63640 }, { "epoch": 0.317860620739594, "grad_norm": 0.0928134098649025, "learning_rate": 2.051540714410874e-05, "loss": 9.3399, "step": 63650 }, { "epoch": 0.31791055956453346, "grad_norm": 0.09679362177848816, "learning_rate": 2.0513905229167187e-05, "loss": 9.3486, "step": 63660 }, { "epoch": 0.3179604983894729, "grad_norm": 0.09691763669252396, "learning_rate": 2.051240331422564e-05, "loss": 9.3392, "step": 63670 }, { "epoch": 0.31801043721441236, "grad_norm": 0.09337911009788513, "learning_rate": 2.0510901399284087e-05, "loss": 9.3413, "step": 63680 }, { "epoch": 0.3180603760393518, "grad_norm": 0.09015453606843948, "learning_rate": 2.0509399484342538e-05, "loss": 9.3384, "step": 63690 }, { "epoch": 0.31811031486429125, "grad_norm": 0.09561469405889511, "learning_rate": 2.0507897569400988e-05, "loss": 9.3374, "step": 63700 }, { "epoch": 0.3181602536892307, "grad_norm": 0.09089536219835281, "learning_rate": 2.0506395654459434e-05, "loss": 9.3469, "step": 63710 }, { "epoch": 0.31821019251417015, "grad_norm": 0.09029261767864227, "learning_rate": 2.0504893739517888e-05, "loss": 9.3534, "step": 63720 }, { "epoch": 0.3182601313391096, "grad_norm": 0.092425636947155, "learning_rate": 2.0503391824576335e-05, "loss": 9.3394, "step": 63730 }, { "epoch": 0.31831007016404905, "grad_norm": 0.09339354932308197, "learning_rate": 2.0501889909634785e-05, "loss": 9.3421, "step": 63740 }, { "epoch": 0.3183600089889885, "grad_norm": 0.09930525720119476, "learning_rate": 2.0500387994693235e-05, "loss": 9.3387, "step": 63750 }, { "epoch": 0.31840994781392795, "grad_norm": 0.09377606213092804, "learning_rate": 2.0498886079751682e-05, "loss": 9.3461, "step": 63760 }, { "epoch": 0.3184598866388674, "grad_norm": 0.09875767678022385, "learning_rate": 2.0497384164810136e-05, "loss": 9.3302, "step": 63770 }, { "epoch": 0.31850982546380685, "grad_norm": 0.09149225801229477, "learning_rate": 2.0495882249868582e-05, "loss": 9.342, "step": 63780 }, { "epoch": 0.31855976428874627, "grad_norm": 0.0938776284456253, "learning_rate": 2.0494380334927033e-05, "loss": 9.3441, "step": 63790 }, { "epoch": 0.31860970311368575, "grad_norm": 0.0972360149025917, "learning_rate": 2.0492878419985483e-05, "loss": 9.3438, "step": 63800 }, { "epoch": 0.31865964193862517, "grad_norm": 0.09202911704778671, "learning_rate": 2.049137650504393e-05, "loss": 9.3384, "step": 63810 }, { "epoch": 0.31870958076356465, "grad_norm": 0.09489790350198746, "learning_rate": 2.0489874590102383e-05, "loss": 9.3465, "step": 63820 }, { "epoch": 0.31875951958850407, "grad_norm": 0.09030698239803314, "learning_rate": 2.048837267516083e-05, "loss": 9.3487, "step": 63830 }, { "epoch": 0.31880945841344355, "grad_norm": 0.09348948299884796, "learning_rate": 2.0486870760219283e-05, "loss": 9.3306, "step": 63840 }, { "epoch": 0.31885939723838297, "grad_norm": 0.09794354438781738, "learning_rate": 2.048536884527773e-05, "loss": 9.3349, "step": 63850 }, { "epoch": 0.31890933606332245, "grad_norm": 0.09906934201717377, "learning_rate": 2.0483866930336177e-05, "loss": 9.3347, "step": 63860 }, { "epoch": 0.31895927488826187, "grad_norm": 0.09116369485855103, "learning_rate": 2.048236501539463e-05, "loss": 9.3346, "step": 63870 }, { "epoch": 0.31900921371320135, "grad_norm": 0.0981554165482521, "learning_rate": 2.0480863100453077e-05, "loss": 9.3393, "step": 63880 }, { "epoch": 0.31905915253814077, "grad_norm": 0.09284525364637375, "learning_rate": 2.047936118551153e-05, "loss": 9.3378, "step": 63890 }, { "epoch": 0.31910909136308024, "grad_norm": 0.08612337708473206, "learning_rate": 2.0477859270569978e-05, "loss": 9.3374, "step": 63900 }, { "epoch": 0.31915903018801967, "grad_norm": 0.0932999700307846, "learning_rate": 2.0476357355628424e-05, "loss": 9.3239, "step": 63910 }, { "epoch": 0.31920896901295914, "grad_norm": 0.09308277070522308, "learning_rate": 2.0474855440686878e-05, "loss": 9.3398, "step": 63920 }, { "epoch": 0.31925890783789856, "grad_norm": 0.09518188238143921, "learning_rate": 2.0473353525745325e-05, "loss": 9.343, "step": 63930 }, { "epoch": 0.31930884666283804, "grad_norm": 0.09198715537786484, "learning_rate": 2.047185161080378e-05, "loss": 9.333, "step": 63940 }, { "epoch": 0.31935878548777746, "grad_norm": 0.09532855451107025, "learning_rate": 2.0470349695862225e-05, "loss": 9.3383, "step": 63950 }, { "epoch": 0.31940872431271694, "grad_norm": 0.09205342829227448, "learning_rate": 2.0468847780920672e-05, "loss": 9.3471, "step": 63960 }, { "epoch": 0.31945866313765636, "grad_norm": 0.09050062298774719, "learning_rate": 2.0467345865979126e-05, "loss": 9.3426, "step": 63970 }, { "epoch": 0.31950860196259584, "grad_norm": 0.0922810360789299, "learning_rate": 2.0465843951037572e-05, "loss": 9.3293, "step": 63980 }, { "epoch": 0.31955854078753526, "grad_norm": 0.09575898200273514, "learning_rate": 2.0464342036096026e-05, "loss": 9.3398, "step": 63990 }, { "epoch": 0.31960847961247474, "grad_norm": 0.09558529406785965, "learning_rate": 2.0462840121154473e-05, "loss": 9.3395, "step": 64000 }, { "epoch": 0.31965841843741416, "grad_norm": 0.09229427576065063, "learning_rate": 2.046133820621292e-05, "loss": 9.3204, "step": 64010 }, { "epoch": 0.31970835726235364, "grad_norm": 0.10348726063966751, "learning_rate": 2.0459836291271373e-05, "loss": 9.3323, "step": 64020 }, { "epoch": 0.31975829608729306, "grad_norm": 0.09877219051122665, "learning_rate": 2.045833437632982e-05, "loss": 9.3436, "step": 64030 }, { "epoch": 0.31980823491223254, "grad_norm": 0.09454214572906494, "learning_rate": 2.0456832461388273e-05, "loss": 9.3461, "step": 64040 }, { "epoch": 0.31985817373717196, "grad_norm": 0.09850972890853882, "learning_rate": 2.045533054644672e-05, "loss": 9.3352, "step": 64050 }, { "epoch": 0.31990811256211144, "grad_norm": 0.0938955768942833, "learning_rate": 2.0453828631505167e-05, "loss": 9.3256, "step": 64060 }, { "epoch": 0.31995805138705086, "grad_norm": 0.08874368667602539, "learning_rate": 2.045232671656362e-05, "loss": 9.3407, "step": 64070 }, { "epoch": 0.32000799021199033, "grad_norm": 0.09031280875205994, "learning_rate": 2.0450824801622067e-05, "loss": 9.3334, "step": 64080 }, { "epoch": 0.32005792903692976, "grad_norm": 0.09989767521619797, "learning_rate": 2.044932288668052e-05, "loss": 9.3453, "step": 64090 }, { "epoch": 0.32010786786186923, "grad_norm": 0.09756146371364594, "learning_rate": 2.0447820971738968e-05, "loss": 9.335, "step": 64100 }, { "epoch": 0.32015780668680865, "grad_norm": 0.09458436816930771, "learning_rate": 2.0446319056797414e-05, "loss": 9.3411, "step": 64110 }, { "epoch": 0.32020774551174813, "grad_norm": 0.0925472229719162, "learning_rate": 2.0444817141855868e-05, "loss": 9.3411, "step": 64120 }, { "epoch": 0.32025768433668755, "grad_norm": 0.09772704541683197, "learning_rate": 2.0443315226914315e-05, "loss": 9.3346, "step": 64130 }, { "epoch": 0.32030762316162703, "grad_norm": 0.09279755502939224, "learning_rate": 2.044181331197277e-05, "loss": 9.3347, "step": 64140 }, { "epoch": 0.32035756198656645, "grad_norm": 0.09363307803869247, "learning_rate": 2.0440311397031215e-05, "loss": 9.3403, "step": 64150 }, { "epoch": 0.32040750081150593, "grad_norm": 0.09853540360927582, "learning_rate": 2.0438809482089665e-05, "loss": 9.3262, "step": 64160 }, { "epoch": 0.32045743963644535, "grad_norm": 0.09178748726844788, "learning_rate": 2.0437307567148116e-05, "loss": 9.3342, "step": 64170 }, { "epoch": 0.32050737846138483, "grad_norm": 0.09160963445901871, "learning_rate": 2.0435805652206562e-05, "loss": 9.3396, "step": 64180 }, { "epoch": 0.32055731728632425, "grad_norm": 0.09168528020381927, "learning_rate": 2.0434303737265016e-05, "loss": 9.3464, "step": 64190 }, { "epoch": 0.3206072561112637, "grad_norm": 0.09851713478565216, "learning_rate": 2.0432801822323463e-05, "loss": 9.3393, "step": 64200 }, { "epoch": 0.32065719493620315, "grad_norm": 0.09476348012685776, "learning_rate": 2.0431299907381913e-05, "loss": 9.3281, "step": 64210 }, { "epoch": 0.3207071337611426, "grad_norm": 0.08930637687444687, "learning_rate": 2.0429797992440363e-05, "loss": 9.3371, "step": 64220 }, { "epoch": 0.32075707258608205, "grad_norm": 0.09610996395349503, "learning_rate": 2.042829607749881e-05, "loss": 9.3344, "step": 64230 }, { "epoch": 0.3208070114110215, "grad_norm": 0.09887643158435822, "learning_rate": 2.0426794162557263e-05, "loss": 9.3348, "step": 64240 }, { "epoch": 0.32085695023596095, "grad_norm": 0.09950941801071167, "learning_rate": 2.042529224761571e-05, "loss": 9.3368, "step": 64250 }, { "epoch": 0.3209068890609004, "grad_norm": 0.09124087542295456, "learning_rate": 2.042379033267416e-05, "loss": 9.3366, "step": 64260 }, { "epoch": 0.32095682788583985, "grad_norm": 0.08983422070741653, "learning_rate": 2.042228841773261e-05, "loss": 9.3281, "step": 64270 }, { "epoch": 0.32100676671077927, "grad_norm": 0.09503147006034851, "learning_rate": 2.0420786502791057e-05, "loss": 9.336, "step": 64280 }, { "epoch": 0.32105670553571874, "grad_norm": 0.09027642011642456, "learning_rate": 2.041928458784951e-05, "loss": 9.3261, "step": 64290 }, { "epoch": 0.32110664436065817, "grad_norm": 0.09734740853309631, "learning_rate": 2.0417782672907958e-05, "loss": 9.3386, "step": 64300 }, { "epoch": 0.32115658318559764, "grad_norm": 0.09774748980998993, "learning_rate": 2.0416280757966408e-05, "loss": 9.335, "step": 64310 }, { "epoch": 0.32120652201053707, "grad_norm": 0.09811116009950638, "learning_rate": 2.0414778843024858e-05, "loss": 9.3404, "step": 64320 }, { "epoch": 0.32125646083547654, "grad_norm": 0.09968292713165283, "learning_rate": 2.0413276928083305e-05, "loss": 9.3334, "step": 64330 }, { "epoch": 0.32130639966041596, "grad_norm": 0.09214014559984207, "learning_rate": 2.041177501314176e-05, "loss": 9.3353, "step": 64340 }, { "epoch": 0.32135633848535544, "grad_norm": 0.09302917867898941, "learning_rate": 2.0410273098200205e-05, "loss": 9.3377, "step": 64350 }, { "epoch": 0.32140627731029486, "grad_norm": 0.09487059712409973, "learning_rate": 2.0408771183258655e-05, "loss": 9.3373, "step": 64360 }, { "epoch": 0.32145621613523434, "grad_norm": 0.09410389512777328, "learning_rate": 2.0407269268317106e-05, "loss": 9.3376, "step": 64370 }, { "epoch": 0.32150615496017376, "grad_norm": 0.09394783526659012, "learning_rate": 2.0405767353375552e-05, "loss": 9.3304, "step": 64380 }, { "epoch": 0.32155609378511324, "grad_norm": 0.10009703040122986, "learning_rate": 2.0404265438434006e-05, "loss": 9.3396, "step": 64390 }, { "epoch": 0.32160603261005266, "grad_norm": 0.088577039539814, "learning_rate": 2.0402763523492453e-05, "loss": 9.3351, "step": 64400 }, { "epoch": 0.32165597143499214, "grad_norm": 0.09477560222148895, "learning_rate": 2.0401261608550903e-05, "loss": 9.3235, "step": 64410 }, { "epoch": 0.32170591025993156, "grad_norm": 0.09325342625379562, "learning_rate": 2.0399759693609353e-05, "loss": 9.344, "step": 64420 }, { "epoch": 0.32175584908487104, "grad_norm": 0.09685123711824417, "learning_rate": 2.03982577786678e-05, "loss": 9.3409, "step": 64430 }, { "epoch": 0.32180578790981046, "grad_norm": 0.09679123014211655, "learning_rate": 2.0396755863726253e-05, "loss": 9.3162, "step": 64440 }, { "epoch": 0.32185572673474994, "grad_norm": 0.09780653566122055, "learning_rate": 2.03952539487847e-05, "loss": 9.3269, "step": 64450 }, { "epoch": 0.32190566555968936, "grad_norm": 0.09367544203996658, "learning_rate": 2.039375203384315e-05, "loss": 9.3264, "step": 64460 }, { "epoch": 0.32195560438462884, "grad_norm": 0.0960097685456276, "learning_rate": 2.03922501189016e-05, "loss": 9.334, "step": 64470 }, { "epoch": 0.32200554320956826, "grad_norm": 0.09054725617170334, "learning_rate": 2.039074820396005e-05, "loss": 9.3354, "step": 64480 }, { "epoch": 0.32205548203450773, "grad_norm": 0.09367144107818604, "learning_rate": 2.03892462890185e-05, "loss": 9.3318, "step": 64490 }, { "epoch": 0.32210542085944716, "grad_norm": 0.09095479547977448, "learning_rate": 2.0387744374076948e-05, "loss": 9.3297, "step": 64500 }, { "epoch": 0.32215535968438663, "grad_norm": 0.09100858122110367, "learning_rate": 2.0386242459135398e-05, "loss": 9.3333, "step": 64510 }, { "epoch": 0.32220529850932605, "grad_norm": 0.09426578879356384, "learning_rate": 2.0384740544193848e-05, "loss": 9.3289, "step": 64520 }, { "epoch": 0.32225523733426553, "grad_norm": 0.08965863287448883, "learning_rate": 2.0383238629252298e-05, "loss": 9.3264, "step": 64530 }, { "epoch": 0.32230517615920495, "grad_norm": 0.092529296875, "learning_rate": 2.038173671431075e-05, "loss": 9.337, "step": 64540 }, { "epoch": 0.32235511498414443, "grad_norm": 0.09353473037481308, "learning_rate": 2.0380234799369195e-05, "loss": 9.3338, "step": 64550 }, { "epoch": 0.32240505380908385, "grad_norm": 0.09257248789072037, "learning_rate": 2.0378732884427645e-05, "loss": 9.3232, "step": 64560 }, { "epoch": 0.32245499263402333, "grad_norm": 0.09056426584720612, "learning_rate": 2.0377230969486096e-05, "loss": 9.3363, "step": 64570 }, { "epoch": 0.32250493145896275, "grad_norm": 0.09313210844993591, "learning_rate": 2.0375729054544546e-05, "loss": 9.3281, "step": 64580 }, { "epoch": 0.32255487028390223, "grad_norm": 0.09300418943166733, "learning_rate": 2.0374227139602996e-05, "loss": 9.3343, "step": 64590 }, { "epoch": 0.32260480910884165, "grad_norm": 0.09289450198411942, "learning_rate": 2.0372725224661443e-05, "loss": 9.3244, "step": 64600 }, { "epoch": 0.3226547479337811, "grad_norm": 0.09735582768917084, "learning_rate": 2.0371223309719893e-05, "loss": 9.3233, "step": 64610 }, { "epoch": 0.32270468675872055, "grad_norm": 0.09457716345787048, "learning_rate": 2.0369721394778343e-05, "loss": 9.3306, "step": 64620 }, { "epoch": 0.32275462558366, "grad_norm": 0.10149849951267242, "learning_rate": 2.0368219479836793e-05, "loss": 9.339, "step": 64630 }, { "epoch": 0.32280456440859945, "grad_norm": 0.094261035323143, "learning_rate": 2.0366717564895243e-05, "loss": 9.3271, "step": 64640 }, { "epoch": 0.3228545032335389, "grad_norm": 0.09714456647634506, "learning_rate": 2.036521564995369e-05, "loss": 9.3295, "step": 64650 }, { "epoch": 0.32290444205847835, "grad_norm": 0.09245141595602036, "learning_rate": 2.036371373501214e-05, "loss": 9.3433, "step": 64660 }, { "epoch": 0.3229543808834178, "grad_norm": 0.09247272461652756, "learning_rate": 2.036221182007059e-05, "loss": 9.3184, "step": 64670 }, { "epoch": 0.32300431970835725, "grad_norm": 0.08987551927566528, "learning_rate": 2.036070990512904e-05, "loss": 9.3249, "step": 64680 }, { "epoch": 0.3230542585332967, "grad_norm": 0.09094039350748062, "learning_rate": 2.035920799018749e-05, "loss": 9.341, "step": 64690 }, { "epoch": 0.32310419735823614, "grad_norm": 0.09806418418884277, "learning_rate": 2.0357706075245938e-05, "loss": 9.3295, "step": 64700 }, { "epoch": 0.3231541361831756, "grad_norm": 0.09336648136377335, "learning_rate": 2.0356204160304388e-05, "loss": 9.3318, "step": 64710 }, { "epoch": 0.32320407500811504, "grad_norm": 0.09484011679887772, "learning_rate": 2.0354702245362838e-05, "loss": 9.3308, "step": 64720 }, { "epoch": 0.3232540138330545, "grad_norm": 0.09836547821760178, "learning_rate": 2.0353200330421288e-05, "loss": 9.3284, "step": 64730 }, { "epoch": 0.32330395265799394, "grad_norm": 0.09364361315965652, "learning_rate": 2.035169841547974e-05, "loss": 9.322, "step": 64740 }, { "epoch": 0.3233538914829334, "grad_norm": 0.09323278069496155, "learning_rate": 2.0350196500538185e-05, "loss": 9.3337, "step": 64750 }, { "epoch": 0.32340383030787284, "grad_norm": 0.09357263892889023, "learning_rate": 2.0348694585596635e-05, "loss": 9.3295, "step": 64760 }, { "epoch": 0.3234537691328123, "grad_norm": 0.09743960201740265, "learning_rate": 2.0347192670655086e-05, "loss": 9.3393, "step": 64770 }, { "epoch": 0.32350370795775174, "grad_norm": 0.09175863862037659, "learning_rate": 2.0345690755713536e-05, "loss": 9.3253, "step": 64780 }, { "epoch": 0.3235536467826912, "grad_norm": 0.08689554035663605, "learning_rate": 2.0344188840771986e-05, "loss": 9.3292, "step": 64790 }, { "epoch": 0.32360358560763064, "grad_norm": 0.09737929701805115, "learning_rate": 2.0342686925830433e-05, "loss": 9.3275, "step": 64800 }, { "epoch": 0.3236535244325701, "grad_norm": 0.09190335869789124, "learning_rate": 2.0341185010888883e-05, "loss": 9.3309, "step": 64810 }, { "epoch": 0.32370346325750954, "grad_norm": 0.09484836459159851, "learning_rate": 2.0339683095947333e-05, "loss": 9.3211, "step": 64820 }, { "epoch": 0.323753402082449, "grad_norm": 0.09082531183958054, "learning_rate": 2.0338181181005783e-05, "loss": 9.324, "step": 64830 }, { "epoch": 0.32380334090738844, "grad_norm": 0.09505081921815872, "learning_rate": 2.0336679266064233e-05, "loss": 9.3322, "step": 64840 }, { "epoch": 0.3238532797323279, "grad_norm": 0.09214816242456436, "learning_rate": 2.0335177351122684e-05, "loss": 9.3252, "step": 64850 }, { "epoch": 0.32390321855726734, "grad_norm": 0.09512375295162201, "learning_rate": 2.033367543618113e-05, "loss": 9.3216, "step": 64860 }, { "epoch": 0.3239531573822068, "grad_norm": 0.09365373104810715, "learning_rate": 2.033217352123958e-05, "loss": 9.3385, "step": 64870 }, { "epoch": 0.32400309620714624, "grad_norm": 0.09065309166908264, "learning_rate": 2.033067160629803e-05, "loss": 9.3191, "step": 64880 }, { "epoch": 0.3240530350320857, "grad_norm": 0.09178464859724045, "learning_rate": 2.032916969135648e-05, "loss": 9.3268, "step": 64890 }, { "epoch": 0.32410297385702513, "grad_norm": 0.09575120359659195, "learning_rate": 2.032766777641493e-05, "loss": 9.3209, "step": 64900 }, { "epoch": 0.3241529126819646, "grad_norm": 0.09635672718286514, "learning_rate": 2.0326165861473378e-05, "loss": 9.3288, "step": 64910 }, { "epoch": 0.32420285150690403, "grad_norm": 0.09563815593719482, "learning_rate": 2.0324663946531828e-05, "loss": 9.3158, "step": 64920 }, { "epoch": 0.3242527903318435, "grad_norm": 0.09533455222845078, "learning_rate": 2.0323162031590278e-05, "loss": 9.325, "step": 64930 }, { "epoch": 0.32430272915678293, "grad_norm": 0.09778301417827606, "learning_rate": 2.032166011664873e-05, "loss": 9.3241, "step": 64940 }, { "epoch": 0.3243526679817224, "grad_norm": 0.09181251376867294, "learning_rate": 2.032015820170718e-05, "loss": 9.3237, "step": 64950 }, { "epoch": 0.32440260680666183, "grad_norm": 0.09311333298683167, "learning_rate": 2.0318656286765625e-05, "loss": 9.3285, "step": 64960 }, { "epoch": 0.3244525456316013, "grad_norm": 0.0935024842619896, "learning_rate": 2.0317154371824076e-05, "loss": 9.324, "step": 64970 }, { "epoch": 0.32450248445654073, "grad_norm": 0.10296680778265, "learning_rate": 2.0315652456882526e-05, "loss": 9.3188, "step": 64980 }, { "epoch": 0.3245524232814802, "grad_norm": 0.0903451070189476, "learning_rate": 2.0314150541940976e-05, "loss": 9.3293, "step": 64990 }, { "epoch": 0.32460236210641963, "grad_norm": 0.09111391007900238, "learning_rate": 2.0312648626999426e-05, "loss": 9.3187, "step": 65000 }, { "epoch": 0.3246523009313591, "grad_norm": 0.09491260349750519, "learning_rate": 2.0311146712057873e-05, "loss": 9.3262, "step": 65010 }, { "epoch": 0.3247022397562985, "grad_norm": 0.09257805347442627, "learning_rate": 2.0309644797116323e-05, "loss": 9.3281, "step": 65020 }, { "epoch": 0.324752178581238, "grad_norm": 0.09723227471113205, "learning_rate": 2.0308142882174773e-05, "loss": 9.3198, "step": 65030 }, { "epoch": 0.3248021174061774, "grad_norm": 0.09565103054046631, "learning_rate": 2.0306640967233223e-05, "loss": 9.3241, "step": 65040 }, { "epoch": 0.3248520562311169, "grad_norm": 0.09280645102262497, "learning_rate": 2.0305139052291674e-05, "loss": 9.3315, "step": 65050 }, { "epoch": 0.3249019950560563, "grad_norm": 0.09414808452129364, "learning_rate": 2.030363713735012e-05, "loss": 9.318, "step": 65060 }, { "epoch": 0.3249519338809958, "grad_norm": 0.09369461238384247, "learning_rate": 2.030213522240857e-05, "loss": 9.3343, "step": 65070 }, { "epoch": 0.3250018727059352, "grad_norm": 0.09321442246437073, "learning_rate": 2.030063330746702e-05, "loss": 9.3193, "step": 65080 }, { "epoch": 0.3250518115308747, "grad_norm": 0.0960422232747078, "learning_rate": 2.029913139252547e-05, "loss": 9.3226, "step": 65090 }, { "epoch": 0.3251017503558141, "grad_norm": 0.0959312692284584, "learning_rate": 2.029762947758392e-05, "loss": 9.3237, "step": 65100 }, { "epoch": 0.3251516891807536, "grad_norm": 0.09452484548091888, "learning_rate": 2.0296127562642368e-05, "loss": 9.3171, "step": 65110 }, { "epoch": 0.325201628005693, "grad_norm": 0.09644035995006561, "learning_rate": 2.0294625647700818e-05, "loss": 9.3132, "step": 65120 }, { "epoch": 0.3252515668306325, "grad_norm": 0.09128149598836899, "learning_rate": 2.029312373275927e-05, "loss": 9.3258, "step": 65130 }, { "epoch": 0.3253015056555719, "grad_norm": 0.08971234411001205, "learning_rate": 2.029162181781772e-05, "loss": 9.3171, "step": 65140 }, { "epoch": 0.3253514444805114, "grad_norm": 0.09548742324113846, "learning_rate": 2.029011990287617e-05, "loss": 9.3202, "step": 65150 }, { "epoch": 0.3254013833054508, "grad_norm": 0.0900539830327034, "learning_rate": 2.0288617987934615e-05, "loss": 9.3234, "step": 65160 }, { "epoch": 0.3254513221303903, "grad_norm": 0.08711142092943192, "learning_rate": 2.028711607299307e-05, "loss": 9.3191, "step": 65170 }, { "epoch": 0.3255012609553297, "grad_norm": 0.09119576215744019, "learning_rate": 2.0285614158051516e-05, "loss": 9.3174, "step": 65180 }, { "epoch": 0.3255511997802692, "grad_norm": 0.09764738380908966, "learning_rate": 2.0284112243109966e-05, "loss": 9.3225, "step": 65190 }, { "epoch": 0.3256011386052086, "grad_norm": 0.09541153162717819, "learning_rate": 2.0282610328168416e-05, "loss": 9.3139, "step": 65200 }, { "epoch": 0.3256510774301481, "grad_norm": 0.09369537979364395, "learning_rate": 2.0281108413226863e-05, "loss": 9.325, "step": 65210 }, { "epoch": 0.3257010162550875, "grad_norm": 0.09462661296129227, "learning_rate": 2.0279606498285317e-05, "loss": 9.3226, "step": 65220 }, { "epoch": 0.325750955080027, "grad_norm": 0.09155022352933884, "learning_rate": 2.0278104583343763e-05, "loss": 9.3225, "step": 65230 }, { "epoch": 0.3258008939049664, "grad_norm": 0.09792565554380417, "learning_rate": 2.0276602668402214e-05, "loss": 9.3181, "step": 65240 }, { "epoch": 0.3258508327299059, "grad_norm": 0.09263359010219574, "learning_rate": 2.0275100753460664e-05, "loss": 9.3194, "step": 65250 }, { "epoch": 0.3259007715548453, "grad_norm": 0.09698522835969925, "learning_rate": 2.027359883851911e-05, "loss": 9.3212, "step": 65260 }, { "epoch": 0.32595071037978474, "grad_norm": 0.09113046526908875, "learning_rate": 2.0272096923577564e-05, "loss": 9.3182, "step": 65270 }, { "epoch": 0.3260006492047242, "grad_norm": 0.10038909316062927, "learning_rate": 2.027059500863601e-05, "loss": 9.3206, "step": 65280 }, { "epoch": 0.32605058802966363, "grad_norm": 0.09228735417127609, "learning_rate": 2.026909309369446e-05, "loss": 9.3112, "step": 65290 }, { "epoch": 0.3261005268546031, "grad_norm": 0.09285099059343338, "learning_rate": 2.026759117875291e-05, "loss": 9.319, "step": 65300 }, { "epoch": 0.32615046567954253, "grad_norm": 0.09246840327978134, "learning_rate": 2.0266089263811358e-05, "loss": 9.3158, "step": 65310 }, { "epoch": 0.326200404504482, "grad_norm": 0.09260924160480499, "learning_rate": 2.026458734886981e-05, "loss": 9.3126, "step": 65320 }, { "epoch": 0.32625034332942143, "grad_norm": 0.09276574850082397, "learning_rate": 2.026308543392826e-05, "loss": 9.3142, "step": 65330 }, { "epoch": 0.3263002821543609, "grad_norm": 0.09407460689544678, "learning_rate": 2.026158351898671e-05, "loss": 9.3089, "step": 65340 }, { "epoch": 0.32635022097930033, "grad_norm": 0.09039918333292007, "learning_rate": 2.026008160404516e-05, "loss": 9.3121, "step": 65350 }, { "epoch": 0.3264001598042398, "grad_norm": 0.09433960914611816, "learning_rate": 2.0258579689103605e-05, "loss": 9.3153, "step": 65360 }, { "epoch": 0.32645009862917923, "grad_norm": 0.0907716453075409, "learning_rate": 2.025707777416206e-05, "loss": 9.313, "step": 65370 }, { "epoch": 0.3265000374541187, "grad_norm": 0.097490094602108, "learning_rate": 2.0255575859220506e-05, "loss": 9.3096, "step": 65380 }, { "epoch": 0.32654997627905813, "grad_norm": 0.0970582440495491, "learning_rate": 2.0254073944278956e-05, "loss": 9.3117, "step": 65390 }, { "epoch": 0.3265999151039976, "grad_norm": 0.09796217083930969, "learning_rate": 2.0252572029337406e-05, "loss": 9.3095, "step": 65400 }, { "epoch": 0.32664985392893703, "grad_norm": 0.09917185455560684, "learning_rate": 2.0251070114395853e-05, "loss": 9.315, "step": 65410 }, { "epoch": 0.3266997927538765, "grad_norm": 0.0976761057972908, "learning_rate": 2.0249568199454307e-05, "loss": 9.3177, "step": 65420 }, { "epoch": 0.3267497315788159, "grad_norm": 0.09032870829105377, "learning_rate": 2.0248066284512753e-05, "loss": 9.3253, "step": 65430 }, { "epoch": 0.3267996704037554, "grad_norm": 0.0941029042005539, "learning_rate": 2.0246564369571204e-05, "loss": 9.3135, "step": 65440 }, { "epoch": 0.3268496092286948, "grad_norm": 0.09661585092544556, "learning_rate": 2.0245062454629654e-05, "loss": 9.3097, "step": 65450 }, { "epoch": 0.3268995480536343, "grad_norm": 0.09369435906410217, "learning_rate": 2.02435605396881e-05, "loss": 9.3211, "step": 65460 }, { "epoch": 0.3269494868785737, "grad_norm": 0.0993766039609909, "learning_rate": 2.0242058624746554e-05, "loss": 9.3172, "step": 65470 }, { "epoch": 0.3269994257035132, "grad_norm": 0.0999893844127655, "learning_rate": 2.0240556709805e-05, "loss": 9.3048, "step": 65480 }, { "epoch": 0.3270493645284526, "grad_norm": 0.09266514331102371, "learning_rate": 2.0239054794863454e-05, "loss": 9.3177, "step": 65490 }, { "epoch": 0.3270993033533921, "grad_norm": 0.09590865671634674, "learning_rate": 2.02375528799219e-05, "loss": 9.3148, "step": 65500 }, { "epoch": 0.3271492421783315, "grad_norm": 0.10051383823156357, "learning_rate": 2.0236050964980348e-05, "loss": 9.3207, "step": 65510 }, { "epoch": 0.327199181003271, "grad_norm": 0.09786202013492584, "learning_rate": 2.02345490500388e-05, "loss": 9.3105, "step": 65520 }, { "epoch": 0.3272491198282104, "grad_norm": 0.09516331553459167, "learning_rate": 2.023304713509725e-05, "loss": 9.3199, "step": 65530 }, { "epoch": 0.3272990586531499, "grad_norm": 0.09292834252119064, "learning_rate": 2.0231545220155702e-05, "loss": 9.3057, "step": 65540 }, { "epoch": 0.3273489974780893, "grad_norm": 0.0924135223031044, "learning_rate": 2.023004330521415e-05, "loss": 9.3168, "step": 65550 }, { "epoch": 0.3273989363030288, "grad_norm": 0.08832070231437683, "learning_rate": 2.0228541390272595e-05, "loss": 9.3189, "step": 65560 }, { "epoch": 0.3274488751279682, "grad_norm": 0.09260696917772293, "learning_rate": 2.022703947533105e-05, "loss": 9.3099, "step": 65570 }, { "epoch": 0.3274988139529077, "grad_norm": 0.09234783053398132, "learning_rate": 2.0225537560389496e-05, "loss": 9.3106, "step": 65580 }, { "epoch": 0.3275487527778471, "grad_norm": 0.09439610689878464, "learning_rate": 2.022403564544795e-05, "loss": 9.309, "step": 65590 }, { "epoch": 0.3275986916027866, "grad_norm": 0.09166769683361053, "learning_rate": 2.0222533730506396e-05, "loss": 9.3154, "step": 65600 }, { "epoch": 0.327648630427726, "grad_norm": 0.09410682320594788, "learning_rate": 2.0221031815564843e-05, "loss": 9.3146, "step": 65610 }, { "epoch": 0.3276985692526655, "grad_norm": 0.09807700663805008, "learning_rate": 2.0219529900623297e-05, "loss": 9.3173, "step": 65620 }, { "epoch": 0.3277485080776049, "grad_norm": 0.10019030421972275, "learning_rate": 2.0218027985681743e-05, "loss": 9.3117, "step": 65630 }, { "epoch": 0.3277984469025444, "grad_norm": 0.09734559059143066, "learning_rate": 2.0216526070740197e-05, "loss": 9.3175, "step": 65640 }, { "epoch": 0.3278483857274838, "grad_norm": 0.09119731932878494, "learning_rate": 2.0215024155798644e-05, "loss": 9.3123, "step": 65650 }, { "epoch": 0.3278983245524233, "grad_norm": 0.09404603391885757, "learning_rate": 2.021352224085709e-05, "loss": 9.3134, "step": 65660 }, { "epoch": 0.3279482633773627, "grad_norm": 0.09394799917936325, "learning_rate": 2.0212020325915544e-05, "loss": 9.3197, "step": 65670 }, { "epoch": 0.3279982022023022, "grad_norm": 0.0975813940167427, "learning_rate": 2.021051841097399e-05, "loss": 9.3185, "step": 65680 }, { "epoch": 0.3280481410272416, "grad_norm": 0.09289105236530304, "learning_rate": 2.0209016496032444e-05, "loss": 9.317, "step": 65690 }, { "epoch": 0.3280980798521811, "grad_norm": 0.08842571079730988, "learning_rate": 2.020751458109089e-05, "loss": 9.3099, "step": 65700 }, { "epoch": 0.3281480186771205, "grad_norm": 0.09984628111124039, "learning_rate": 2.0206012666149338e-05, "loss": 9.3071, "step": 65710 }, { "epoch": 0.32819795750206, "grad_norm": 0.09205308556556702, "learning_rate": 2.020451075120779e-05, "loss": 9.3137, "step": 65720 }, { "epoch": 0.3282478963269994, "grad_norm": 0.09461898356676102, "learning_rate": 2.020300883626624e-05, "loss": 9.3055, "step": 65730 }, { "epoch": 0.3282978351519389, "grad_norm": 0.09863221645355225, "learning_rate": 2.0201506921324692e-05, "loss": 9.3214, "step": 65740 }, { "epoch": 0.3283477739768783, "grad_norm": 0.09489452093839645, "learning_rate": 2.020000500638314e-05, "loss": 9.3222, "step": 65750 }, { "epoch": 0.3283977128018178, "grad_norm": 0.09285447001457214, "learning_rate": 2.0198503091441586e-05, "loss": 9.3073, "step": 65760 }, { "epoch": 0.3284476516267572, "grad_norm": 0.09722167253494263, "learning_rate": 2.019700117650004e-05, "loss": 9.3122, "step": 65770 }, { "epoch": 0.3284975904516967, "grad_norm": 0.09583593159914017, "learning_rate": 2.0195499261558486e-05, "loss": 9.3235, "step": 65780 }, { "epoch": 0.3285475292766361, "grad_norm": 0.09201577305793762, "learning_rate": 2.019399734661694e-05, "loss": 9.3062, "step": 65790 }, { "epoch": 0.3285974681015756, "grad_norm": 0.09347929805517197, "learning_rate": 2.0192495431675386e-05, "loss": 9.3122, "step": 65800 }, { "epoch": 0.328647406926515, "grad_norm": 0.0961994007229805, "learning_rate": 2.0190993516733836e-05, "loss": 9.3093, "step": 65810 }, { "epoch": 0.3286973457514545, "grad_norm": 0.0859488919377327, "learning_rate": 2.0189491601792287e-05, "loss": 9.3174, "step": 65820 }, { "epoch": 0.3287472845763939, "grad_norm": 0.09572838991880417, "learning_rate": 2.0187989686850733e-05, "loss": 9.3193, "step": 65830 }, { "epoch": 0.3287972234013334, "grad_norm": 0.09346995502710342, "learning_rate": 2.0186487771909187e-05, "loss": 9.3144, "step": 65840 }, { "epoch": 0.3288471622262728, "grad_norm": 0.10546184331178665, "learning_rate": 2.0184985856967634e-05, "loss": 9.3125, "step": 65850 }, { "epoch": 0.3288971010512123, "grad_norm": 0.08991089463233948, "learning_rate": 2.0183483942026084e-05, "loss": 9.3123, "step": 65860 }, { "epoch": 0.3289470398761517, "grad_norm": 0.09135644137859344, "learning_rate": 2.0181982027084534e-05, "loss": 9.3112, "step": 65870 }, { "epoch": 0.3289969787010912, "grad_norm": 0.0967700332403183, "learning_rate": 2.018048011214298e-05, "loss": 9.3096, "step": 65880 }, { "epoch": 0.3290469175260306, "grad_norm": 0.09798181802034378, "learning_rate": 2.0178978197201434e-05, "loss": 9.3139, "step": 65890 }, { "epoch": 0.3290968563509701, "grad_norm": 0.09936074167490005, "learning_rate": 2.017747628225988e-05, "loss": 9.3105, "step": 65900 }, { "epoch": 0.3291467951759095, "grad_norm": 0.094666488468647, "learning_rate": 2.017597436731833e-05, "loss": 9.3142, "step": 65910 }, { "epoch": 0.329196734000849, "grad_norm": 0.09142427891492844, "learning_rate": 2.017447245237678e-05, "loss": 9.3042, "step": 65920 }, { "epoch": 0.3292466728257884, "grad_norm": 0.09408102929592133, "learning_rate": 2.017297053743523e-05, "loss": 9.3053, "step": 65930 }, { "epoch": 0.3292966116507279, "grad_norm": 0.09405004233121872, "learning_rate": 2.0171468622493682e-05, "loss": 9.3083, "step": 65940 }, { "epoch": 0.3293465504756673, "grad_norm": 0.09288471937179565, "learning_rate": 2.016996670755213e-05, "loss": 9.3006, "step": 65950 }, { "epoch": 0.3293964893006068, "grad_norm": 0.09375298768281937, "learning_rate": 2.016846479261058e-05, "loss": 9.3127, "step": 65960 }, { "epoch": 0.3294464281255462, "grad_norm": 0.0929841622710228, "learning_rate": 2.016696287766903e-05, "loss": 9.3043, "step": 65970 }, { "epoch": 0.3294963669504857, "grad_norm": 0.09631754457950592, "learning_rate": 2.0165460962727476e-05, "loss": 9.3143, "step": 65980 }, { "epoch": 0.3295463057754251, "grad_norm": 0.09023533016443253, "learning_rate": 2.016395904778593e-05, "loss": 9.3028, "step": 65990 }, { "epoch": 0.3295962446003646, "grad_norm": 0.09742230176925659, "learning_rate": 2.0162457132844376e-05, "loss": 9.308, "step": 66000 }, { "epoch": 0.329646183425304, "grad_norm": 0.09984151273965836, "learning_rate": 2.0160955217902826e-05, "loss": 9.309, "step": 66010 }, { "epoch": 0.3296961222502435, "grad_norm": 0.09294534474611282, "learning_rate": 2.0159453302961277e-05, "loss": 9.3217, "step": 66020 }, { "epoch": 0.3297460610751829, "grad_norm": 0.09389375895261765, "learning_rate": 2.0157951388019723e-05, "loss": 9.3111, "step": 66030 }, { "epoch": 0.32979599990012237, "grad_norm": 0.09610763192176819, "learning_rate": 2.0156449473078177e-05, "loss": 9.2965, "step": 66040 }, { "epoch": 0.3298459387250618, "grad_norm": 0.1011497899889946, "learning_rate": 2.0154947558136624e-05, "loss": 9.3115, "step": 66050 }, { "epoch": 0.32989587755000127, "grad_norm": 0.09551998227834702, "learning_rate": 2.0153445643195074e-05, "loss": 9.312, "step": 66060 }, { "epoch": 0.3299458163749407, "grad_norm": 0.09386122971773148, "learning_rate": 2.0151943728253524e-05, "loss": 9.3002, "step": 66070 }, { "epoch": 0.32999575519988017, "grad_norm": 0.08605486899614334, "learning_rate": 2.015044181331197e-05, "loss": 9.3053, "step": 66080 }, { "epoch": 0.3300456940248196, "grad_norm": 0.09588326513767242, "learning_rate": 2.0148939898370424e-05, "loss": 9.3046, "step": 66090 }, { "epoch": 0.33009563284975907, "grad_norm": 0.09192916750907898, "learning_rate": 2.014743798342887e-05, "loss": 9.3079, "step": 66100 }, { "epoch": 0.3301455716746985, "grad_norm": 0.0940568745136261, "learning_rate": 2.014593606848732e-05, "loss": 9.2998, "step": 66110 }, { "epoch": 0.33019551049963797, "grad_norm": 0.09915883094072342, "learning_rate": 2.014443415354577e-05, "loss": 9.2972, "step": 66120 }, { "epoch": 0.3302454493245774, "grad_norm": 0.09629667550325394, "learning_rate": 2.0142932238604222e-05, "loss": 9.3091, "step": 66130 }, { "epoch": 0.33029538814951687, "grad_norm": 0.08921325206756592, "learning_rate": 2.0141430323662672e-05, "loss": 9.3056, "step": 66140 }, { "epoch": 0.3303453269744563, "grad_norm": 0.09300815314054489, "learning_rate": 2.013992840872112e-05, "loss": 9.298, "step": 66150 }, { "epoch": 0.33039526579939577, "grad_norm": 0.09541340917348862, "learning_rate": 2.013842649377957e-05, "loss": 9.323, "step": 66160 }, { "epoch": 0.3304452046243352, "grad_norm": 0.09386754035949707, "learning_rate": 2.013692457883802e-05, "loss": 9.3023, "step": 66170 }, { "epoch": 0.33049514344927466, "grad_norm": 0.09818427264690399, "learning_rate": 2.013542266389647e-05, "loss": 9.2981, "step": 66180 }, { "epoch": 0.3305450822742141, "grad_norm": 0.09862353652715683, "learning_rate": 2.013392074895492e-05, "loss": 9.3156, "step": 66190 }, { "epoch": 0.33059502109915356, "grad_norm": 0.10540249943733215, "learning_rate": 2.0132418834013366e-05, "loss": 9.3077, "step": 66200 }, { "epoch": 0.330644959924093, "grad_norm": 0.09350631386041641, "learning_rate": 2.0130916919071816e-05, "loss": 9.309, "step": 66210 }, { "epoch": 0.33069489874903246, "grad_norm": 0.09390132129192352, "learning_rate": 2.0129415004130267e-05, "loss": 9.2976, "step": 66220 }, { "epoch": 0.3307448375739719, "grad_norm": 0.09093783795833588, "learning_rate": 2.0127913089188717e-05, "loss": 9.3009, "step": 66230 }, { "epoch": 0.33079477639891136, "grad_norm": 0.09060053527355194, "learning_rate": 2.0126411174247167e-05, "loss": 9.2951, "step": 66240 }, { "epoch": 0.3308447152238508, "grad_norm": 0.09574903547763824, "learning_rate": 2.0124909259305614e-05, "loss": 9.3095, "step": 66250 }, { "epoch": 0.3308946540487902, "grad_norm": 0.09056057035923004, "learning_rate": 2.0123407344364064e-05, "loss": 9.3067, "step": 66260 }, { "epoch": 0.3309445928737297, "grad_norm": 0.09261029213666916, "learning_rate": 2.0121905429422514e-05, "loss": 9.3051, "step": 66270 }, { "epoch": 0.3309945316986691, "grad_norm": 0.0922454297542572, "learning_rate": 2.0120403514480964e-05, "loss": 9.3012, "step": 66280 }, { "epoch": 0.3310444705236086, "grad_norm": 0.0930676981806755, "learning_rate": 2.0118901599539414e-05, "loss": 9.2958, "step": 66290 }, { "epoch": 0.331094409348548, "grad_norm": 0.0963793620467186, "learning_rate": 2.011739968459786e-05, "loss": 9.2974, "step": 66300 }, { "epoch": 0.3311443481734875, "grad_norm": 0.09505847096443176, "learning_rate": 2.011589776965631e-05, "loss": 9.3042, "step": 66310 }, { "epoch": 0.3311942869984269, "grad_norm": 0.09292503446340561, "learning_rate": 2.011439585471476e-05, "loss": 9.3068, "step": 66320 }, { "epoch": 0.3312442258233664, "grad_norm": 0.09125857800245285, "learning_rate": 2.0112893939773212e-05, "loss": 9.3085, "step": 66330 }, { "epoch": 0.3312941646483058, "grad_norm": 0.09520285576581955, "learning_rate": 2.0111392024831662e-05, "loss": 9.3046, "step": 66340 }, { "epoch": 0.3313441034732453, "grad_norm": 0.0909276232123375, "learning_rate": 2.010989010989011e-05, "loss": 9.3044, "step": 66350 }, { "epoch": 0.3313940422981847, "grad_norm": 0.0963204950094223, "learning_rate": 2.010838819494856e-05, "loss": 9.3014, "step": 66360 }, { "epoch": 0.3314439811231242, "grad_norm": 0.09370825439691544, "learning_rate": 2.010688628000701e-05, "loss": 9.3011, "step": 66370 }, { "epoch": 0.3314939199480636, "grad_norm": 0.09406444430351257, "learning_rate": 2.010538436506546e-05, "loss": 9.3059, "step": 66380 }, { "epoch": 0.3315438587730031, "grad_norm": 0.0930875912308693, "learning_rate": 2.010388245012391e-05, "loss": 9.2974, "step": 66390 }, { "epoch": 0.3315937975979425, "grad_norm": 0.09899143129587173, "learning_rate": 2.0102380535182356e-05, "loss": 9.2958, "step": 66400 }, { "epoch": 0.331643736422882, "grad_norm": 0.09795042127370834, "learning_rate": 2.0100878620240806e-05, "loss": 9.2952, "step": 66410 }, { "epoch": 0.3316936752478214, "grad_norm": 0.09661424905061722, "learning_rate": 2.0099376705299257e-05, "loss": 9.3051, "step": 66420 }, { "epoch": 0.3317436140727609, "grad_norm": 0.09605047106742859, "learning_rate": 2.0097874790357707e-05, "loss": 9.2961, "step": 66430 }, { "epoch": 0.3317935528977003, "grad_norm": 0.09522906690835953, "learning_rate": 2.0096372875416157e-05, "loss": 9.3023, "step": 66440 }, { "epoch": 0.33184349172263977, "grad_norm": 0.09131506085395813, "learning_rate": 2.0094870960474607e-05, "loss": 9.2996, "step": 66450 }, { "epoch": 0.3318934305475792, "grad_norm": 0.09322021901607513, "learning_rate": 2.0093369045533054e-05, "loss": 9.3029, "step": 66460 }, { "epoch": 0.33194336937251867, "grad_norm": 0.09292212128639221, "learning_rate": 2.0091867130591504e-05, "loss": 9.3136, "step": 66470 }, { "epoch": 0.3319933081974581, "grad_norm": 0.09753609448671341, "learning_rate": 2.0090365215649954e-05, "loss": 9.2932, "step": 66480 }, { "epoch": 0.33204324702239757, "grad_norm": 0.09411279857158661, "learning_rate": 2.0088863300708404e-05, "loss": 9.3017, "step": 66490 }, { "epoch": 0.332093185847337, "grad_norm": 0.09396139532327652, "learning_rate": 2.0087361385766855e-05, "loss": 9.296, "step": 66500 }, { "epoch": 0.33214312467227647, "grad_norm": 0.089934341609478, "learning_rate": 2.00858594708253e-05, "loss": 9.2877, "step": 66510 }, { "epoch": 0.3321930634972159, "grad_norm": 0.09298644214868546, "learning_rate": 2.008435755588375e-05, "loss": 9.2991, "step": 66520 }, { "epoch": 0.33224300232215537, "grad_norm": 0.09051031619310379, "learning_rate": 2.0082855640942202e-05, "loss": 9.2918, "step": 66530 }, { "epoch": 0.3322929411470948, "grad_norm": 0.0935496836900711, "learning_rate": 2.0081353726000652e-05, "loss": 9.3055, "step": 66540 }, { "epoch": 0.33234287997203427, "grad_norm": 0.0968925952911377, "learning_rate": 2.0079851811059102e-05, "loss": 9.2846, "step": 66550 }, { "epoch": 0.3323928187969737, "grad_norm": 0.09733214229345322, "learning_rate": 2.007834989611755e-05, "loss": 9.2999, "step": 66560 }, { "epoch": 0.33244275762191317, "grad_norm": 0.08981287479400635, "learning_rate": 2.0076847981176e-05, "loss": 9.2988, "step": 66570 }, { "epoch": 0.3324926964468526, "grad_norm": 0.09502604603767395, "learning_rate": 2.007534606623445e-05, "loss": 9.3041, "step": 66580 }, { "epoch": 0.33254263527179206, "grad_norm": 0.0965120941400528, "learning_rate": 2.00738441512929e-05, "loss": 9.3026, "step": 66590 }, { "epoch": 0.3325925740967315, "grad_norm": 0.09275338053703308, "learning_rate": 2.007234223635135e-05, "loss": 9.3102, "step": 66600 }, { "epoch": 0.33264251292167096, "grad_norm": 0.09039387851953506, "learning_rate": 2.0070840321409796e-05, "loss": 9.3041, "step": 66610 }, { "epoch": 0.3326924517466104, "grad_norm": 0.09301149100065231, "learning_rate": 2.0069338406468247e-05, "loss": 9.295, "step": 66620 }, { "epoch": 0.33274239057154986, "grad_norm": 0.09034721553325653, "learning_rate": 2.0067836491526697e-05, "loss": 9.3058, "step": 66630 }, { "epoch": 0.3327923293964893, "grad_norm": 0.09412179887294769, "learning_rate": 2.0066334576585147e-05, "loss": 9.2925, "step": 66640 }, { "epoch": 0.33284226822142876, "grad_norm": 0.09281489998102188, "learning_rate": 2.0064832661643597e-05, "loss": 9.303, "step": 66650 }, { "epoch": 0.3328922070463682, "grad_norm": 0.09423484653234482, "learning_rate": 2.0063330746702044e-05, "loss": 9.3017, "step": 66660 }, { "epoch": 0.33294214587130766, "grad_norm": 0.0937783271074295, "learning_rate": 2.0061828831760494e-05, "loss": 9.2948, "step": 66670 }, { "epoch": 0.3329920846962471, "grad_norm": 0.09942499548196793, "learning_rate": 2.0060326916818944e-05, "loss": 9.3057, "step": 66680 }, { "epoch": 0.33304202352118656, "grad_norm": 0.09352242201566696, "learning_rate": 2.0058825001877394e-05, "loss": 9.3025, "step": 66690 }, { "epoch": 0.333091962346126, "grad_norm": 0.09306678175926208, "learning_rate": 2.0057323086935845e-05, "loss": 9.2967, "step": 66700 }, { "epoch": 0.33314190117106546, "grad_norm": 0.09103065729141235, "learning_rate": 2.005582117199429e-05, "loss": 9.2991, "step": 66710 }, { "epoch": 0.3331918399960049, "grad_norm": 0.0972660630941391, "learning_rate": 2.005431925705274e-05, "loss": 9.304, "step": 66720 }, { "epoch": 0.33324177882094436, "grad_norm": 0.09878892451524734, "learning_rate": 2.0052817342111192e-05, "loss": 9.2933, "step": 66730 }, { "epoch": 0.3332917176458838, "grad_norm": 0.08945970237255096, "learning_rate": 2.0051315427169642e-05, "loss": 9.296, "step": 66740 }, { "epoch": 0.33334165647082326, "grad_norm": 0.0968952402472496, "learning_rate": 2.0049813512228092e-05, "loss": 9.304, "step": 66750 }, { "epoch": 0.3333915952957627, "grad_norm": 0.09566505998373032, "learning_rate": 2.004831159728654e-05, "loss": 9.2936, "step": 66760 }, { "epoch": 0.33344153412070215, "grad_norm": 0.09034927189350128, "learning_rate": 2.0046809682344993e-05, "loss": 9.2968, "step": 66770 }, { "epoch": 0.3334914729456416, "grad_norm": 0.09364010393619537, "learning_rate": 2.004530776740344e-05, "loss": 9.3066, "step": 66780 }, { "epoch": 0.33354141177058105, "grad_norm": 0.09636852890253067, "learning_rate": 2.004380585246189e-05, "loss": 9.3017, "step": 66790 }, { "epoch": 0.3335913505955205, "grad_norm": 0.09206737577915192, "learning_rate": 2.004230393752034e-05, "loss": 9.3021, "step": 66800 }, { "epoch": 0.33364128942045995, "grad_norm": 0.09280619025230408, "learning_rate": 2.0040802022578786e-05, "loss": 9.2885, "step": 66810 }, { "epoch": 0.3336912282453994, "grad_norm": 0.09028727561235428, "learning_rate": 2.003930010763724e-05, "loss": 9.2951, "step": 66820 }, { "epoch": 0.33374116707033885, "grad_norm": 0.0951092317700386, "learning_rate": 2.0037798192695687e-05, "loss": 9.305, "step": 66830 }, { "epoch": 0.3337911058952783, "grad_norm": 0.09845414012670517, "learning_rate": 2.0036296277754137e-05, "loss": 9.2961, "step": 66840 }, { "epoch": 0.33384104472021775, "grad_norm": 0.09636221081018448, "learning_rate": 2.0034794362812587e-05, "loss": 9.2949, "step": 66850 }, { "epoch": 0.33389098354515717, "grad_norm": 0.09213186800479889, "learning_rate": 2.0033292447871034e-05, "loss": 9.2933, "step": 66860 }, { "epoch": 0.33394092237009665, "grad_norm": 0.09357234835624695, "learning_rate": 2.0031790532929488e-05, "loss": 9.3005, "step": 66870 }, { "epoch": 0.33399086119503607, "grad_norm": 0.0960192084312439, "learning_rate": 2.0030288617987934e-05, "loss": 9.2875, "step": 66880 }, { "epoch": 0.33404080001997555, "grad_norm": 0.09275079518556595, "learning_rate": 2.0028786703046385e-05, "loss": 9.2984, "step": 66890 }, { "epoch": 0.33409073884491497, "grad_norm": 0.08964371681213379, "learning_rate": 2.0027284788104835e-05, "loss": 9.301, "step": 66900 }, { "epoch": 0.33414067766985445, "grad_norm": 0.096300408244133, "learning_rate": 2.002578287316328e-05, "loss": 9.3032, "step": 66910 }, { "epoch": 0.33419061649479387, "grad_norm": 0.09406279772520065, "learning_rate": 2.0024280958221735e-05, "loss": 9.29, "step": 66920 }, { "epoch": 0.33424055531973335, "grad_norm": 0.09408601373434067, "learning_rate": 2.0022779043280182e-05, "loss": 9.2842, "step": 66930 }, { "epoch": 0.33429049414467277, "grad_norm": 0.10076167434453964, "learning_rate": 2.0021277128338632e-05, "loss": 9.2893, "step": 66940 }, { "epoch": 0.33434043296961224, "grad_norm": 0.09452462196350098, "learning_rate": 2.0019775213397082e-05, "loss": 9.3011, "step": 66950 }, { "epoch": 0.33439037179455167, "grad_norm": 0.1001209020614624, "learning_rate": 2.001827329845553e-05, "loss": 9.2893, "step": 66960 }, { "epoch": 0.33444031061949114, "grad_norm": 0.09850946813821793, "learning_rate": 2.0016771383513983e-05, "loss": 9.2799, "step": 66970 }, { "epoch": 0.33449024944443057, "grad_norm": 0.09696486592292786, "learning_rate": 2.001526946857243e-05, "loss": 9.2929, "step": 66980 }, { "epoch": 0.33454018826937004, "grad_norm": 0.09150009602308273, "learning_rate": 2.001376755363088e-05, "loss": 9.2956, "step": 66990 }, { "epoch": 0.33459012709430946, "grad_norm": 0.09880898147821426, "learning_rate": 2.001226563868933e-05, "loss": 9.2894, "step": 67000 }, { "epoch": 0.33464006591924894, "grad_norm": 0.1000562235713005, "learning_rate": 2.0010763723747776e-05, "loss": 9.2878, "step": 67010 }, { "epoch": 0.33469000474418836, "grad_norm": 0.09261028468608856, "learning_rate": 2.000926180880623e-05, "loss": 9.3001, "step": 67020 }, { "epoch": 0.33473994356912784, "grad_norm": 0.09857013821601868, "learning_rate": 2.0007759893864677e-05, "loss": 9.2877, "step": 67030 }, { "epoch": 0.33478988239406726, "grad_norm": 0.09461692720651627, "learning_rate": 2.0006257978923127e-05, "loss": 9.2916, "step": 67040 }, { "epoch": 0.33483982121900674, "grad_norm": 0.09334218502044678, "learning_rate": 2.0004756063981577e-05, "loss": 9.2941, "step": 67050 }, { "epoch": 0.33488976004394616, "grad_norm": 0.09218782186508179, "learning_rate": 2.0003254149040024e-05, "loss": 9.2912, "step": 67060 }, { "epoch": 0.33493969886888564, "grad_norm": 0.09374010562896729, "learning_rate": 2.0001752234098478e-05, "loss": 9.3061, "step": 67070 }, { "epoch": 0.33498963769382506, "grad_norm": 0.09382879734039307, "learning_rate": 2.0000250319156924e-05, "loss": 9.2875, "step": 67080 }, { "epoch": 0.33503957651876454, "grad_norm": 0.0962020605802536, "learning_rate": 1.9998748404215378e-05, "loss": 9.2897, "step": 67090 }, { "epoch": 0.33508951534370396, "grad_norm": 0.08903557062149048, "learning_rate": 1.9997246489273825e-05, "loss": 9.2978, "step": 67100 }, { "epoch": 0.33513945416864344, "grad_norm": 0.09756463766098022, "learning_rate": 1.999574457433227e-05, "loss": 9.2837, "step": 67110 }, { "epoch": 0.33518939299358286, "grad_norm": 0.09464458376169205, "learning_rate": 1.9994242659390725e-05, "loss": 9.2818, "step": 67120 }, { "epoch": 0.33523933181852233, "grad_norm": 0.09560305625200272, "learning_rate": 1.9992740744449172e-05, "loss": 9.299, "step": 67130 }, { "epoch": 0.33528927064346176, "grad_norm": 0.09456129372119904, "learning_rate": 1.9991238829507625e-05, "loss": 9.2885, "step": 67140 }, { "epoch": 0.33533920946840123, "grad_norm": 0.09367375075817108, "learning_rate": 1.9989736914566072e-05, "loss": 9.2979, "step": 67150 }, { "epoch": 0.33538914829334066, "grad_norm": 0.09220660477876663, "learning_rate": 1.998823499962452e-05, "loss": 9.2871, "step": 67160 }, { "epoch": 0.33543908711828013, "grad_norm": 0.09274411201477051, "learning_rate": 1.9986733084682973e-05, "loss": 9.2825, "step": 67170 }, { "epoch": 0.33548902594321955, "grad_norm": 0.09277375042438507, "learning_rate": 1.998523116974142e-05, "loss": 9.2908, "step": 67180 }, { "epoch": 0.33553896476815903, "grad_norm": 0.09955313801765442, "learning_rate": 1.9983729254799873e-05, "loss": 9.2862, "step": 67190 }, { "epoch": 0.33558890359309845, "grad_norm": 0.09518168866634369, "learning_rate": 1.998222733985832e-05, "loss": 9.2919, "step": 67200 }, { "epoch": 0.33563884241803793, "grad_norm": 0.09314264357089996, "learning_rate": 1.9980725424916767e-05, "loss": 9.2893, "step": 67210 }, { "epoch": 0.33568878124297735, "grad_norm": 0.09429054707288742, "learning_rate": 1.997922350997522e-05, "loss": 9.3004, "step": 67220 }, { "epoch": 0.33573872006791683, "grad_norm": 0.09523293375968933, "learning_rate": 1.9977721595033667e-05, "loss": 9.3031, "step": 67230 }, { "epoch": 0.33578865889285625, "grad_norm": 0.09158944338560104, "learning_rate": 1.997621968009212e-05, "loss": 9.2918, "step": 67240 }, { "epoch": 0.3358385977177957, "grad_norm": 0.09265114367008209, "learning_rate": 1.9974717765150567e-05, "loss": 9.2749, "step": 67250 }, { "epoch": 0.33588853654273515, "grad_norm": 0.09522463381290436, "learning_rate": 1.9973215850209014e-05, "loss": 9.288, "step": 67260 }, { "epoch": 0.33593847536767457, "grad_norm": 0.08956436067819595, "learning_rate": 1.9971713935267468e-05, "loss": 9.2902, "step": 67270 }, { "epoch": 0.33598841419261405, "grad_norm": 0.09145728498697281, "learning_rate": 1.9970212020325914e-05, "loss": 9.2872, "step": 67280 }, { "epoch": 0.33603835301755347, "grad_norm": 0.093343086540699, "learning_rate": 1.9968710105384368e-05, "loss": 9.2793, "step": 67290 }, { "epoch": 0.33608829184249295, "grad_norm": 0.08741866797208786, "learning_rate": 1.9967208190442815e-05, "loss": 9.2963, "step": 67300 }, { "epoch": 0.33613823066743237, "grad_norm": 0.09440305083990097, "learning_rate": 1.996570627550126e-05, "loss": 9.2873, "step": 67310 }, { "epoch": 0.33618816949237185, "grad_norm": 0.09325043857097626, "learning_rate": 1.9964204360559715e-05, "loss": 9.2893, "step": 67320 }, { "epoch": 0.33623810831731127, "grad_norm": 0.09446980804204941, "learning_rate": 1.9962702445618162e-05, "loss": 9.2772, "step": 67330 }, { "epoch": 0.33628804714225075, "grad_norm": 0.09616648405790329, "learning_rate": 1.9961200530676615e-05, "loss": 9.2826, "step": 67340 }, { "epoch": 0.33633798596719017, "grad_norm": 0.09551723301410675, "learning_rate": 1.9959698615735062e-05, "loss": 9.279, "step": 67350 }, { "epoch": 0.33638792479212964, "grad_norm": 0.09174014627933502, "learning_rate": 1.995819670079351e-05, "loss": 9.2902, "step": 67360 }, { "epoch": 0.33643786361706907, "grad_norm": 0.09369547665119171, "learning_rate": 1.9956694785851963e-05, "loss": 9.297, "step": 67370 }, { "epoch": 0.33648780244200854, "grad_norm": 0.09878925234079361, "learning_rate": 1.995519287091041e-05, "loss": 9.283, "step": 67380 }, { "epoch": 0.33653774126694797, "grad_norm": 0.09697102010250092, "learning_rate": 1.9953690955968863e-05, "loss": 9.2967, "step": 67390 }, { "epoch": 0.33658768009188744, "grad_norm": 0.10312024503946304, "learning_rate": 1.995218904102731e-05, "loss": 9.2857, "step": 67400 }, { "epoch": 0.33663761891682686, "grad_norm": 0.10092073678970337, "learning_rate": 1.995068712608576e-05, "loss": 9.2912, "step": 67410 }, { "epoch": 0.33668755774176634, "grad_norm": 0.08944955468177795, "learning_rate": 1.994918521114421e-05, "loss": 9.2866, "step": 67420 }, { "epoch": 0.33673749656670576, "grad_norm": 0.09892292320728302, "learning_rate": 1.9947683296202657e-05, "loss": 9.2795, "step": 67430 }, { "epoch": 0.33678743539164524, "grad_norm": 0.09553686529397964, "learning_rate": 1.994618138126111e-05, "loss": 9.2821, "step": 67440 }, { "epoch": 0.33683737421658466, "grad_norm": 0.09259875863790512, "learning_rate": 1.9944679466319557e-05, "loss": 9.2918, "step": 67450 }, { "epoch": 0.33688731304152414, "grad_norm": 0.09672240912914276, "learning_rate": 1.9943177551378007e-05, "loss": 9.2893, "step": 67460 }, { "epoch": 0.33693725186646356, "grad_norm": 0.09132887423038483, "learning_rate": 1.9941675636436458e-05, "loss": 9.2791, "step": 67470 }, { "epoch": 0.33698719069140304, "grad_norm": 0.09487363696098328, "learning_rate": 1.9940173721494904e-05, "loss": 9.2924, "step": 67480 }, { "epoch": 0.33703712951634246, "grad_norm": 0.09358032792806625, "learning_rate": 1.9938671806553358e-05, "loss": 9.2942, "step": 67490 }, { "epoch": 0.33708706834128194, "grad_norm": 0.09767814725637436, "learning_rate": 1.9937169891611805e-05, "loss": 9.2936, "step": 67500 }, { "epoch": 0.33713700716622136, "grad_norm": 0.0938803032040596, "learning_rate": 1.9935667976670255e-05, "loss": 9.2903, "step": 67510 }, { "epoch": 0.33718694599116084, "grad_norm": 0.09350651502609253, "learning_rate": 1.9934166061728705e-05, "loss": 9.2828, "step": 67520 }, { "epoch": 0.33723688481610026, "grad_norm": 0.09044313430786133, "learning_rate": 1.9932664146787152e-05, "loss": 9.2797, "step": 67530 }, { "epoch": 0.33728682364103973, "grad_norm": 0.09183132648468018, "learning_rate": 1.9931162231845605e-05, "loss": 9.2823, "step": 67540 }, { "epoch": 0.33733676246597916, "grad_norm": 0.09145781397819519, "learning_rate": 1.9929660316904052e-05, "loss": 9.2747, "step": 67550 }, { "epoch": 0.33738670129091863, "grad_norm": 0.09186349809169769, "learning_rate": 1.9928158401962502e-05, "loss": 9.283, "step": 67560 }, { "epoch": 0.33743664011585806, "grad_norm": 0.09399545937776566, "learning_rate": 1.9926656487020953e-05, "loss": 9.2948, "step": 67570 }, { "epoch": 0.33748657894079753, "grad_norm": 0.09273233264684677, "learning_rate": 1.99251545720794e-05, "loss": 9.2942, "step": 67580 }, { "epoch": 0.33753651776573695, "grad_norm": 0.09469340741634369, "learning_rate": 1.9923652657137853e-05, "loss": 9.2731, "step": 67590 }, { "epoch": 0.33758645659067643, "grad_norm": 0.0966138169169426, "learning_rate": 1.99221507421963e-05, "loss": 9.2785, "step": 67600 }, { "epoch": 0.33763639541561585, "grad_norm": 0.08690140396356583, "learning_rate": 1.992064882725475e-05, "loss": 9.2847, "step": 67610 }, { "epoch": 0.33768633424055533, "grad_norm": 0.09711094200611115, "learning_rate": 1.99191469123132e-05, "loss": 9.2989, "step": 67620 }, { "epoch": 0.33773627306549475, "grad_norm": 0.09582893550395966, "learning_rate": 1.9917644997371647e-05, "loss": 9.2828, "step": 67630 }, { "epoch": 0.33778621189043423, "grad_norm": 0.09695571660995483, "learning_rate": 1.99161430824301e-05, "loss": 9.2946, "step": 67640 }, { "epoch": 0.33783615071537365, "grad_norm": 0.09266763180494308, "learning_rate": 1.9914641167488547e-05, "loss": 9.2909, "step": 67650 }, { "epoch": 0.33788608954031313, "grad_norm": 0.09419077634811401, "learning_rate": 1.9913139252546997e-05, "loss": 9.2872, "step": 67660 }, { "epoch": 0.33793602836525255, "grad_norm": 0.09436140954494476, "learning_rate": 1.9911637337605448e-05, "loss": 9.2898, "step": 67670 }, { "epoch": 0.337985967190192, "grad_norm": 0.09518197923898697, "learning_rate": 1.9910135422663894e-05, "loss": 9.2801, "step": 67680 }, { "epoch": 0.33803590601513145, "grad_norm": 0.09597449004650116, "learning_rate": 1.9908633507722348e-05, "loss": 9.2772, "step": 67690 }, { "epoch": 0.3380858448400709, "grad_norm": 0.09348492324352264, "learning_rate": 1.9907131592780795e-05, "loss": 9.2983, "step": 67700 }, { "epoch": 0.33813578366501035, "grad_norm": 0.09242329746484756, "learning_rate": 1.9905629677839245e-05, "loss": 9.2903, "step": 67710 }, { "epoch": 0.3381857224899498, "grad_norm": 0.09632088989019394, "learning_rate": 1.9904127762897695e-05, "loss": 9.2947, "step": 67720 }, { "epoch": 0.33823566131488925, "grad_norm": 0.09371468424797058, "learning_rate": 1.9902625847956142e-05, "loss": 9.2904, "step": 67730 }, { "epoch": 0.3382856001398287, "grad_norm": 0.09897583723068237, "learning_rate": 1.9901123933014595e-05, "loss": 9.2772, "step": 67740 }, { "epoch": 0.33833553896476815, "grad_norm": 0.0937107503414154, "learning_rate": 1.9899622018073042e-05, "loss": 9.2801, "step": 67750 }, { "epoch": 0.3383854777897076, "grad_norm": 0.09183280169963837, "learning_rate": 1.9898120103131492e-05, "loss": 9.2962, "step": 67760 }, { "epoch": 0.33843541661464704, "grad_norm": 0.09664995968341827, "learning_rate": 1.9896618188189943e-05, "loss": 9.2849, "step": 67770 }, { "epoch": 0.3384853554395865, "grad_norm": 0.09200311452150345, "learning_rate": 1.9895116273248393e-05, "loss": 9.2824, "step": 67780 }, { "epoch": 0.33853529426452594, "grad_norm": 0.09540259838104248, "learning_rate": 1.9893614358306843e-05, "loss": 9.2852, "step": 67790 }, { "epoch": 0.3385852330894654, "grad_norm": 0.09478534013032913, "learning_rate": 1.989211244336529e-05, "loss": 9.2823, "step": 67800 }, { "epoch": 0.33863517191440484, "grad_norm": 0.09047430753707886, "learning_rate": 1.989061052842374e-05, "loss": 9.2842, "step": 67810 }, { "epoch": 0.3386851107393443, "grad_norm": 0.09374905377626419, "learning_rate": 1.988910861348219e-05, "loss": 9.2843, "step": 67820 }, { "epoch": 0.33873504956428374, "grad_norm": 0.08902385830879211, "learning_rate": 1.988760669854064e-05, "loss": 9.2835, "step": 67830 }, { "epoch": 0.3387849883892232, "grad_norm": 0.09766287356615067, "learning_rate": 1.988610478359909e-05, "loss": 9.2869, "step": 67840 }, { "epoch": 0.33883492721416264, "grad_norm": 0.0938125029206276, "learning_rate": 1.9884602868657537e-05, "loss": 9.2816, "step": 67850 }, { "epoch": 0.3388848660391021, "grad_norm": 0.09323128312826157, "learning_rate": 1.9883100953715987e-05, "loss": 9.268, "step": 67860 }, { "epoch": 0.33893480486404154, "grad_norm": 0.092827707529068, "learning_rate": 1.9881599038774438e-05, "loss": 9.2823, "step": 67870 }, { "epoch": 0.338984743688981, "grad_norm": 0.09353949874639511, "learning_rate": 1.9880097123832888e-05, "loss": 9.2872, "step": 67880 }, { "epoch": 0.33903468251392044, "grad_norm": 0.09071552753448486, "learning_rate": 1.9878595208891338e-05, "loss": 9.2822, "step": 67890 }, { "epoch": 0.3390846213388599, "grad_norm": 0.09243173152208328, "learning_rate": 1.9877093293949785e-05, "loss": 9.2805, "step": 67900 }, { "epoch": 0.33913456016379934, "grad_norm": 0.09218911081552505, "learning_rate": 1.987559137900824e-05, "loss": 9.2797, "step": 67910 }, { "epoch": 0.3391844989887388, "grad_norm": 0.09476479142904282, "learning_rate": 1.9874089464066685e-05, "loss": 9.2845, "step": 67920 }, { "epoch": 0.33923443781367824, "grad_norm": 0.10015514492988586, "learning_rate": 1.9872587549125135e-05, "loss": 9.2723, "step": 67930 }, { "epoch": 0.3392843766386177, "grad_norm": 0.09330648928880692, "learning_rate": 1.9871085634183585e-05, "loss": 9.2797, "step": 67940 }, { "epoch": 0.33933431546355713, "grad_norm": 0.0925719290971756, "learning_rate": 1.9869583719242032e-05, "loss": 9.2761, "step": 67950 }, { "epoch": 0.3393842542884966, "grad_norm": 0.0938037782907486, "learning_rate": 1.9868081804300486e-05, "loss": 9.2795, "step": 67960 }, { "epoch": 0.33943419311343603, "grad_norm": 0.09363068640232086, "learning_rate": 1.9866579889358933e-05, "loss": 9.2788, "step": 67970 }, { "epoch": 0.3394841319383755, "grad_norm": 0.09175083041191101, "learning_rate": 1.9865077974417383e-05, "loss": 9.2829, "step": 67980 }, { "epoch": 0.33953407076331493, "grad_norm": 0.09043523669242859, "learning_rate": 1.9863576059475833e-05, "loss": 9.2802, "step": 67990 }, { "epoch": 0.3395840095882544, "grad_norm": 0.09502432495355606, "learning_rate": 1.986207414453428e-05, "loss": 9.2757, "step": 68000 }, { "epoch": 0.33963394841319383, "grad_norm": 0.09515243023633957, "learning_rate": 1.9860572229592733e-05, "loss": 9.2656, "step": 68010 }, { "epoch": 0.3396838872381333, "grad_norm": 0.0931011438369751, "learning_rate": 1.985907031465118e-05, "loss": 9.271, "step": 68020 }, { "epoch": 0.33973382606307273, "grad_norm": 0.09343037009239197, "learning_rate": 1.985756839970963e-05, "loss": 9.2821, "step": 68030 }, { "epoch": 0.3397837648880122, "grad_norm": 0.08982733637094498, "learning_rate": 1.985606648476808e-05, "loss": 9.2779, "step": 68040 }, { "epoch": 0.33983370371295163, "grad_norm": 0.09892088919878006, "learning_rate": 1.9854564569826527e-05, "loss": 9.2804, "step": 68050 }, { "epoch": 0.3398836425378911, "grad_norm": 0.09419706463813782, "learning_rate": 1.985306265488498e-05, "loss": 9.2811, "step": 68060 }, { "epoch": 0.33993358136283053, "grad_norm": 0.09958070516586304, "learning_rate": 1.9851560739943428e-05, "loss": 9.2808, "step": 68070 }, { "epoch": 0.33998352018777, "grad_norm": 0.09093392640352249, "learning_rate": 1.9850058825001878e-05, "loss": 9.2843, "step": 68080 }, { "epoch": 0.3400334590127094, "grad_norm": 0.0960288792848587, "learning_rate": 1.9848556910060328e-05, "loss": 9.2756, "step": 68090 }, { "epoch": 0.3400833978376489, "grad_norm": 0.0980386883020401, "learning_rate": 1.9847054995118778e-05, "loss": 9.2687, "step": 68100 }, { "epoch": 0.3401333366625883, "grad_norm": 0.09633202850818634, "learning_rate": 1.984555308017723e-05, "loss": 9.2726, "step": 68110 }, { "epoch": 0.3401832754875278, "grad_norm": 0.09432492405176163, "learning_rate": 1.9844051165235675e-05, "loss": 9.2645, "step": 68120 }, { "epoch": 0.3402332143124672, "grad_norm": 0.09382835030555725, "learning_rate": 1.9842549250294125e-05, "loss": 9.2777, "step": 68130 }, { "epoch": 0.3402831531374067, "grad_norm": 0.09988310188055038, "learning_rate": 1.9841047335352575e-05, "loss": 9.2738, "step": 68140 }, { "epoch": 0.3403330919623461, "grad_norm": 0.09288916736841202, "learning_rate": 1.9839545420411026e-05, "loss": 9.2727, "step": 68150 }, { "epoch": 0.3403830307872856, "grad_norm": 0.08969771862030029, "learning_rate": 1.9838043505469476e-05, "loss": 9.2757, "step": 68160 }, { "epoch": 0.340432969612225, "grad_norm": 0.09481577575206757, "learning_rate": 1.9836541590527923e-05, "loss": 9.2677, "step": 68170 }, { "epoch": 0.3404829084371645, "grad_norm": 0.09572725743055344, "learning_rate": 1.9835039675586373e-05, "loss": 9.2771, "step": 68180 }, { "epoch": 0.3405328472621039, "grad_norm": 0.09492908418178558, "learning_rate": 1.9833537760644823e-05, "loss": 9.2752, "step": 68190 }, { "epoch": 0.3405827860870434, "grad_norm": 0.09422598034143448, "learning_rate": 1.9832035845703273e-05, "loss": 9.2728, "step": 68200 }, { "epoch": 0.3406327249119828, "grad_norm": 0.08828997611999512, "learning_rate": 1.9830533930761723e-05, "loss": 9.2755, "step": 68210 }, { "epoch": 0.3406826637369223, "grad_norm": 0.09550122916698456, "learning_rate": 1.982903201582017e-05, "loss": 9.2761, "step": 68220 }, { "epoch": 0.3407326025618617, "grad_norm": 0.09217823296785355, "learning_rate": 1.982753010087862e-05, "loss": 9.28, "step": 68230 }, { "epoch": 0.34078254138680114, "grad_norm": 0.09574399888515472, "learning_rate": 1.982602818593707e-05, "loss": 9.2811, "step": 68240 }, { "epoch": 0.3408324802117406, "grad_norm": 0.09653940051794052, "learning_rate": 1.982452627099552e-05, "loss": 9.2771, "step": 68250 }, { "epoch": 0.34088241903668004, "grad_norm": 0.09815537184476852, "learning_rate": 1.982302435605397e-05, "loss": 9.2731, "step": 68260 }, { "epoch": 0.3409323578616195, "grad_norm": 0.09170655906200409, "learning_rate": 1.9821522441112418e-05, "loss": 9.2808, "step": 68270 }, { "epoch": 0.34098229668655894, "grad_norm": 0.09200761467218399, "learning_rate": 1.9820020526170868e-05, "loss": 9.2747, "step": 68280 }, { "epoch": 0.3410322355114984, "grad_norm": 0.08978271484375, "learning_rate": 1.9818518611229318e-05, "loss": 9.2733, "step": 68290 }, { "epoch": 0.34108217433643784, "grad_norm": 0.09526646882295609, "learning_rate": 1.9817016696287768e-05, "loss": 9.2776, "step": 68300 }, { "epoch": 0.3411321131613773, "grad_norm": 0.0968102440237999, "learning_rate": 1.981551478134622e-05, "loss": 9.267, "step": 68310 }, { "epoch": 0.34118205198631674, "grad_norm": 0.09626856446266174, "learning_rate": 1.9814012866404665e-05, "loss": 9.2769, "step": 68320 }, { "epoch": 0.3412319908112562, "grad_norm": 0.09758766740560532, "learning_rate": 1.9812510951463115e-05, "loss": 9.2728, "step": 68330 }, { "epoch": 0.34128192963619564, "grad_norm": 0.09370464831590652, "learning_rate": 1.9811009036521566e-05, "loss": 9.2702, "step": 68340 }, { "epoch": 0.3413318684611351, "grad_norm": 0.0944569855928421, "learning_rate": 1.9809507121580016e-05, "loss": 9.282, "step": 68350 }, { "epoch": 0.34138180728607453, "grad_norm": 0.09909389168024063, "learning_rate": 1.9808005206638466e-05, "loss": 9.2794, "step": 68360 }, { "epoch": 0.341431746111014, "grad_norm": 0.097555011510849, "learning_rate": 1.9806503291696913e-05, "loss": 9.2815, "step": 68370 }, { "epoch": 0.34148168493595343, "grad_norm": 0.0935775637626648, "learning_rate": 1.9805001376755363e-05, "loss": 9.2732, "step": 68380 }, { "epoch": 0.3415316237608929, "grad_norm": 0.09548195451498032, "learning_rate": 1.9803499461813813e-05, "loss": 9.2617, "step": 68390 }, { "epoch": 0.34158156258583233, "grad_norm": 0.09691401571035385, "learning_rate": 1.9801997546872263e-05, "loss": 9.2762, "step": 68400 }, { "epoch": 0.3416315014107718, "grad_norm": 0.09655098617076874, "learning_rate": 1.9800495631930713e-05, "loss": 9.2838, "step": 68410 }, { "epoch": 0.34168144023571123, "grad_norm": 0.09758975356817245, "learning_rate": 1.9798993716989164e-05, "loss": 9.2779, "step": 68420 }, { "epoch": 0.3417313790606507, "grad_norm": 0.0990617647767067, "learning_rate": 1.979749180204761e-05, "loss": 9.2683, "step": 68430 }, { "epoch": 0.34178131788559013, "grad_norm": 0.09439375251531601, "learning_rate": 1.979598988710606e-05, "loss": 9.2771, "step": 68440 }, { "epoch": 0.3418312567105296, "grad_norm": 0.09546418488025665, "learning_rate": 1.979448797216451e-05, "loss": 9.2703, "step": 68450 }, { "epoch": 0.34188119553546903, "grad_norm": 0.09475396573543549, "learning_rate": 1.979298605722296e-05, "loss": 9.2702, "step": 68460 }, { "epoch": 0.3419311343604085, "grad_norm": 0.08913654834032059, "learning_rate": 1.979148414228141e-05, "loss": 9.2771, "step": 68470 }, { "epoch": 0.34198107318534793, "grad_norm": 0.09099783003330231, "learning_rate": 1.9789982227339858e-05, "loss": 9.2692, "step": 68480 }, { "epoch": 0.3420310120102874, "grad_norm": 0.09147406369447708, "learning_rate": 1.9788480312398308e-05, "loss": 9.2736, "step": 68490 }, { "epoch": 0.3420809508352268, "grad_norm": 0.09418332576751709, "learning_rate": 1.9786978397456758e-05, "loss": 9.2729, "step": 68500 }, { "epoch": 0.3421308896601663, "grad_norm": 0.09262189269065857, "learning_rate": 1.978547648251521e-05, "loss": 9.2679, "step": 68510 }, { "epoch": 0.3421808284851057, "grad_norm": 0.10010916739702225, "learning_rate": 1.978397456757366e-05, "loss": 9.2779, "step": 68520 }, { "epoch": 0.3422307673100452, "grad_norm": 0.09533961862325668, "learning_rate": 1.9782472652632105e-05, "loss": 9.2709, "step": 68530 }, { "epoch": 0.3422807061349846, "grad_norm": 0.09138842672109604, "learning_rate": 1.9780970737690556e-05, "loss": 9.2689, "step": 68540 }, { "epoch": 0.3423306449599241, "grad_norm": 0.09413190186023712, "learning_rate": 1.9779468822749006e-05, "loss": 9.2706, "step": 68550 }, { "epoch": 0.3423805837848635, "grad_norm": 0.09632963687181473, "learning_rate": 1.9777966907807456e-05, "loss": 9.2716, "step": 68560 }, { "epoch": 0.342430522609803, "grad_norm": 0.09684199839830399, "learning_rate": 1.9776464992865906e-05, "loss": 9.2731, "step": 68570 }, { "epoch": 0.3424804614347424, "grad_norm": 0.09297710657119751, "learning_rate": 1.9774963077924353e-05, "loss": 9.2758, "step": 68580 }, { "epoch": 0.3425304002596819, "grad_norm": 0.09406734257936478, "learning_rate": 1.9773461162982803e-05, "loss": 9.2581, "step": 68590 }, { "epoch": 0.3425803390846213, "grad_norm": 0.09379177540540695, "learning_rate": 1.9771959248041253e-05, "loss": 9.2652, "step": 68600 }, { "epoch": 0.3426302779095608, "grad_norm": 0.0957665890455246, "learning_rate": 1.9770457333099703e-05, "loss": 9.2713, "step": 68610 }, { "epoch": 0.3426802167345002, "grad_norm": 0.0952882319688797, "learning_rate": 1.9768955418158154e-05, "loss": 9.2763, "step": 68620 }, { "epoch": 0.3427301555594397, "grad_norm": 0.09209991991519928, "learning_rate": 1.97674535032166e-05, "loss": 9.2651, "step": 68630 }, { "epoch": 0.3427800943843791, "grad_norm": 0.09648279845714569, "learning_rate": 1.976595158827505e-05, "loss": 9.2669, "step": 68640 }, { "epoch": 0.3428300332093186, "grad_norm": 0.09398400038480759, "learning_rate": 1.97644496733335e-05, "loss": 9.2713, "step": 68650 }, { "epoch": 0.342879972034258, "grad_norm": 0.0925755649805069, "learning_rate": 1.976294775839195e-05, "loss": 9.2797, "step": 68660 }, { "epoch": 0.3429299108591975, "grad_norm": 0.0941460132598877, "learning_rate": 1.97614458434504e-05, "loss": 9.2715, "step": 68670 }, { "epoch": 0.3429798496841369, "grad_norm": 0.0912114679813385, "learning_rate": 1.9759943928508848e-05, "loss": 9.2772, "step": 68680 }, { "epoch": 0.3430297885090764, "grad_norm": 0.09493990242481232, "learning_rate": 1.9758442013567298e-05, "loss": 9.26, "step": 68690 }, { "epoch": 0.3430797273340158, "grad_norm": 0.09542454034090042, "learning_rate": 1.9756940098625748e-05, "loss": 9.2713, "step": 68700 }, { "epoch": 0.3431296661589553, "grad_norm": 0.09154530614614487, "learning_rate": 1.97554381836842e-05, "loss": 9.2737, "step": 68710 }, { "epoch": 0.3431796049838947, "grad_norm": 0.09703396260738373, "learning_rate": 1.975393626874265e-05, "loss": 9.2639, "step": 68720 }, { "epoch": 0.3432295438088342, "grad_norm": 0.09249304234981537, "learning_rate": 1.9752434353801095e-05, "loss": 9.265, "step": 68730 }, { "epoch": 0.3432794826337736, "grad_norm": 0.10178221017122269, "learning_rate": 1.975093243885955e-05, "loss": 9.2766, "step": 68740 }, { "epoch": 0.3433294214587131, "grad_norm": 0.09313027560710907, "learning_rate": 1.9749430523917996e-05, "loss": 9.2733, "step": 68750 }, { "epoch": 0.3433793602836525, "grad_norm": 0.09608368575572968, "learning_rate": 1.9747928608976446e-05, "loss": 9.2663, "step": 68760 }, { "epoch": 0.343429299108592, "grad_norm": 0.09877718985080719, "learning_rate": 1.9746426694034896e-05, "loss": 9.2556, "step": 68770 }, { "epoch": 0.3434792379335314, "grad_norm": 0.09784068167209625, "learning_rate": 1.9744924779093343e-05, "loss": 9.2615, "step": 68780 }, { "epoch": 0.3435291767584709, "grad_norm": 0.09086261689662933, "learning_rate": 1.9743422864151796e-05, "loss": 9.2652, "step": 68790 }, { "epoch": 0.3435791155834103, "grad_norm": 0.09742986410856247, "learning_rate": 1.9741920949210243e-05, "loss": 9.2638, "step": 68800 }, { "epoch": 0.3436290544083498, "grad_norm": 0.09637963771820068, "learning_rate": 1.9740419034268693e-05, "loss": 9.2678, "step": 68810 }, { "epoch": 0.3436789932332892, "grad_norm": 0.09257320314645767, "learning_rate": 1.9738917119327144e-05, "loss": 9.2625, "step": 68820 }, { "epoch": 0.3437289320582287, "grad_norm": 0.09262960404157639, "learning_rate": 1.973741520438559e-05, "loss": 9.2747, "step": 68830 }, { "epoch": 0.3437788708831681, "grad_norm": 0.09434715658426285, "learning_rate": 1.9735913289444044e-05, "loss": 9.2595, "step": 68840 }, { "epoch": 0.3438288097081076, "grad_norm": 0.09280122816562653, "learning_rate": 1.973441137450249e-05, "loss": 9.2605, "step": 68850 }, { "epoch": 0.343878748533047, "grad_norm": 0.09355524182319641, "learning_rate": 1.973290945956094e-05, "loss": 9.2708, "step": 68860 }, { "epoch": 0.3439286873579865, "grad_norm": 0.09285325556993484, "learning_rate": 1.973140754461939e-05, "loss": 9.277, "step": 68870 }, { "epoch": 0.3439786261829259, "grad_norm": 0.09656701236963272, "learning_rate": 1.9729905629677838e-05, "loss": 9.2666, "step": 68880 }, { "epoch": 0.3440285650078654, "grad_norm": 0.10322730988264084, "learning_rate": 1.972840371473629e-05, "loss": 9.2636, "step": 68890 }, { "epoch": 0.3440785038328048, "grad_norm": 0.09647629410028458, "learning_rate": 1.9726901799794738e-05, "loss": 9.276, "step": 68900 }, { "epoch": 0.3441284426577443, "grad_norm": 0.09318669140338898, "learning_rate": 1.972539988485319e-05, "loss": 9.2602, "step": 68910 }, { "epoch": 0.3441783814826837, "grad_norm": 0.09435082972049713, "learning_rate": 1.972389796991164e-05, "loss": 9.2701, "step": 68920 }, { "epoch": 0.3442283203076232, "grad_norm": 0.09164106100797653, "learning_rate": 1.9722396054970085e-05, "loss": 9.2785, "step": 68930 }, { "epoch": 0.3442782591325626, "grad_norm": 0.08812560886144638, "learning_rate": 1.972089414002854e-05, "loss": 9.2758, "step": 68940 }, { "epoch": 0.3443281979575021, "grad_norm": 0.0958331823348999, "learning_rate": 1.9719392225086986e-05, "loss": 9.2655, "step": 68950 }, { "epoch": 0.3443781367824415, "grad_norm": 0.0955391451716423, "learning_rate": 1.9717890310145436e-05, "loss": 9.2702, "step": 68960 }, { "epoch": 0.344428075607381, "grad_norm": 0.09317716956138611, "learning_rate": 1.9716388395203886e-05, "loss": 9.2806, "step": 68970 }, { "epoch": 0.3444780144323204, "grad_norm": 0.09601125121116638, "learning_rate": 1.9714886480262333e-05, "loss": 9.2552, "step": 68980 }, { "epoch": 0.3445279532572599, "grad_norm": 0.08980926871299744, "learning_rate": 1.9713384565320786e-05, "loss": 9.2754, "step": 68990 }, { "epoch": 0.3445778920821993, "grad_norm": 0.09495431184768677, "learning_rate": 1.9711882650379233e-05, "loss": 9.2638, "step": 69000 }, { "epoch": 0.3446278309071388, "grad_norm": 0.09350882470607758, "learning_rate": 1.9710380735437683e-05, "loss": 9.2606, "step": 69010 }, { "epoch": 0.3446777697320782, "grad_norm": 0.08809029310941696, "learning_rate": 1.9708878820496134e-05, "loss": 9.262, "step": 69020 }, { "epoch": 0.3447277085570177, "grad_norm": 0.09742359071969986, "learning_rate": 1.970737690555458e-05, "loss": 9.2644, "step": 69030 }, { "epoch": 0.3447776473819571, "grad_norm": 0.09343401342630386, "learning_rate": 1.9705874990613034e-05, "loss": 9.2674, "step": 69040 }, { "epoch": 0.3448275862068966, "grad_norm": 0.09134664386510849, "learning_rate": 1.970437307567148e-05, "loss": 9.2566, "step": 69050 }, { "epoch": 0.344877525031836, "grad_norm": 0.09671717882156372, "learning_rate": 1.9702871160729934e-05, "loss": 9.2612, "step": 69060 }, { "epoch": 0.3449274638567755, "grad_norm": 0.09348636120557785, "learning_rate": 1.970136924578838e-05, "loss": 9.2665, "step": 69070 }, { "epoch": 0.3449774026817149, "grad_norm": 0.0987660363316536, "learning_rate": 1.9699867330846828e-05, "loss": 9.2633, "step": 69080 }, { "epoch": 0.3450273415066544, "grad_norm": 0.09916608035564423, "learning_rate": 1.969836541590528e-05, "loss": 9.2581, "step": 69090 }, { "epoch": 0.3450772803315938, "grad_norm": 0.0879778116941452, "learning_rate": 1.9696863500963728e-05, "loss": 9.258, "step": 69100 }, { "epoch": 0.34512721915653327, "grad_norm": 0.09248411655426025, "learning_rate": 1.9695361586022182e-05, "loss": 9.262, "step": 69110 }, { "epoch": 0.3451771579814727, "grad_norm": 0.09013424068689346, "learning_rate": 1.969385967108063e-05, "loss": 9.2611, "step": 69120 }, { "epoch": 0.34522709680641217, "grad_norm": 0.09179334342479706, "learning_rate": 1.9692357756139075e-05, "loss": 9.2494, "step": 69130 }, { "epoch": 0.3452770356313516, "grad_norm": 0.09357766062021255, "learning_rate": 1.969085584119753e-05, "loss": 9.2625, "step": 69140 }, { "epoch": 0.34532697445629107, "grad_norm": 0.09096767753362656, "learning_rate": 1.9689353926255976e-05, "loss": 9.2662, "step": 69150 }, { "epoch": 0.3453769132812305, "grad_norm": 0.0954696536064148, "learning_rate": 1.968785201131443e-05, "loss": 9.2555, "step": 69160 }, { "epoch": 0.34542685210616997, "grad_norm": 0.09414954483509064, "learning_rate": 1.9686350096372876e-05, "loss": 9.2556, "step": 69170 }, { "epoch": 0.3454767909311094, "grad_norm": 0.09444237500429153, "learning_rate": 1.9684848181431323e-05, "loss": 9.2622, "step": 69180 }, { "epoch": 0.34552672975604887, "grad_norm": 0.09886594861745834, "learning_rate": 1.9683346266489776e-05, "loss": 9.2541, "step": 69190 }, { "epoch": 0.3455766685809883, "grad_norm": 0.09522037953138351, "learning_rate": 1.9681844351548223e-05, "loss": 9.2655, "step": 69200 }, { "epoch": 0.34562660740592777, "grad_norm": 0.09733402729034424, "learning_rate": 1.9680342436606677e-05, "loss": 9.2584, "step": 69210 }, { "epoch": 0.3456765462308672, "grad_norm": 0.09290558099746704, "learning_rate": 1.9678840521665124e-05, "loss": 9.2615, "step": 69220 }, { "epoch": 0.3457264850558066, "grad_norm": 0.09087418764829636, "learning_rate": 1.967733860672357e-05, "loss": 9.2717, "step": 69230 }, { "epoch": 0.3457764238807461, "grad_norm": 0.09687705338001251, "learning_rate": 1.9675836691782024e-05, "loss": 9.2599, "step": 69240 }, { "epoch": 0.3458263627056855, "grad_norm": 0.09234501421451569, "learning_rate": 1.967433477684047e-05, "loss": 9.2586, "step": 69250 }, { "epoch": 0.345876301530625, "grad_norm": 0.09087526053190231, "learning_rate": 1.9672832861898924e-05, "loss": 9.2582, "step": 69260 }, { "epoch": 0.3459262403555644, "grad_norm": 0.0971473827958107, "learning_rate": 1.967133094695737e-05, "loss": 9.2627, "step": 69270 }, { "epoch": 0.3459761791805039, "grad_norm": 0.0920579731464386, "learning_rate": 1.9669829032015818e-05, "loss": 9.2558, "step": 69280 }, { "epoch": 0.3460261180054433, "grad_norm": 0.09076216071844101, "learning_rate": 1.966832711707427e-05, "loss": 9.2622, "step": 69290 }, { "epoch": 0.3460760568303828, "grad_norm": 0.09444985538721085, "learning_rate": 1.9666825202132718e-05, "loss": 9.2538, "step": 69300 }, { "epoch": 0.3461259956553222, "grad_norm": 0.08982237428426743, "learning_rate": 1.9665323287191172e-05, "loss": 9.257, "step": 69310 }, { "epoch": 0.3461759344802617, "grad_norm": 0.09677401930093765, "learning_rate": 1.966382137224962e-05, "loss": 9.2609, "step": 69320 }, { "epoch": 0.3462258733052011, "grad_norm": 0.09712191671133041, "learning_rate": 1.9662319457308065e-05, "loss": 9.2516, "step": 69330 }, { "epoch": 0.3462758121301406, "grad_norm": 0.09230844676494598, "learning_rate": 1.966081754236652e-05, "loss": 9.2593, "step": 69340 }, { "epoch": 0.34632575095508, "grad_norm": 0.09202716499567032, "learning_rate": 1.9659315627424966e-05, "loss": 9.2567, "step": 69350 }, { "epoch": 0.3463756897800195, "grad_norm": 0.09389092773199081, "learning_rate": 1.965781371248342e-05, "loss": 9.2622, "step": 69360 }, { "epoch": 0.3464256286049589, "grad_norm": 0.09576646238565445, "learning_rate": 1.9656311797541866e-05, "loss": 9.267, "step": 69370 }, { "epoch": 0.3464755674298984, "grad_norm": 0.09165678918361664, "learning_rate": 1.9654809882600316e-05, "loss": 9.2661, "step": 69380 }, { "epoch": 0.3465255062548378, "grad_norm": 0.09849521517753601, "learning_rate": 1.9653307967658766e-05, "loss": 9.2615, "step": 69390 }, { "epoch": 0.3465754450797773, "grad_norm": 0.09328168630599976, "learning_rate": 1.9651806052717213e-05, "loss": 9.2594, "step": 69400 }, { "epoch": 0.3466253839047167, "grad_norm": 0.09389086067676544, "learning_rate": 1.9650304137775667e-05, "loss": 9.2545, "step": 69410 }, { "epoch": 0.3466753227296562, "grad_norm": 0.09465324133634567, "learning_rate": 1.9648802222834114e-05, "loss": 9.2589, "step": 69420 }, { "epoch": 0.3467252615545956, "grad_norm": 0.09655381739139557, "learning_rate": 1.9647300307892564e-05, "loss": 9.2549, "step": 69430 }, { "epoch": 0.3467752003795351, "grad_norm": 0.09776412695646286, "learning_rate": 1.9645798392951014e-05, "loss": 9.2511, "step": 69440 }, { "epoch": 0.3468251392044745, "grad_norm": 0.09319782257080078, "learning_rate": 1.964429647800946e-05, "loss": 9.2605, "step": 69450 }, { "epoch": 0.346875078029414, "grad_norm": 0.096680648624897, "learning_rate": 1.9642794563067914e-05, "loss": 9.2572, "step": 69460 }, { "epoch": 0.3469250168543534, "grad_norm": 0.0916510671377182, "learning_rate": 1.964129264812636e-05, "loss": 9.2463, "step": 69470 }, { "epoch": 0.3469749556792929, "grad_norm": 0.09772134572267532, "learning_rate": 1.963979073318481e-05, "loss": 9.2529, "step": 69480 }, { "epoch": 0.3470248945042323, "grad_norm": 0.0930207371711731, "learning_rate": 1.963828881824326e-05, "loss": 9.2567, "step": 69490 }, { "epoch": 0.3470748333291718, "grad_norm": 0.09252675622701645, "learning_rate": 1.9636786903301708e-05, "loss": 9.2588, "step": 69500 }, { "epoch": 0.3471247721541112, "grad_norm": 0.09338805079460144, "learning_rate": 1.9635284988360162e-05, "loss": 9.2576, "step": 69510 }, { "epoch": 0.34717471097905067, "grad_norm": 0.09176600724458694, "learning_rate": 1.963378307341861e-05, "loss": 18.6508, "step": 69520 }, { "epoch": 0.3472246498039901, "grad_norm": 0.09212823212146759, "learning_rate": 1.963228115847706e-05, "loss": 15.6648, "step": 69530 }, { "epoch": 0.34727458862892957, "grad_norm": 0.092359758913517, "learning_rate": 1.963077924353551e-05, "loss": 9.2493, "step": 69540 }, { "epoch": 0.347324527453869, "grad_norm": 0.08757336437702179, "learning_rate": 1.9629277328593956e-05, "loss": 9.2656, "step": 69550 }, { "epoch": 0.34737446627880847, "grad_norm": 0.08831485360860825, "learning_rate": 1.962777541365241e-05, "loss": 9.2628, "step": 69560 }, { "epoch": 0.3474244051037479, "grad_norm": 0.09570340067148209, "learning_rate": 1.9626273498710856e-05, "loss": 9.2617, "step": 69570 }, { "epoch": 0.34747434392868737, "grad_norm": 0.09006953239440918, "learning_rate": 1.9624771583769306e-05, "loss": 9.2511, "step": 69580 }, { "epoch": 0.3475242827536268, "grad_norm": 0.09478635340929031, "learning_rate": 1.9623269668827756e-05, "loss": 9.268, "step": 69590 }, { "epoch": 0.34757422157856627, "grad_norm": 0.09337896853685379, "learning_rate": 1.9621767753886203e-05, "loss": 9.2629, "step": 69600 }, { "epoch": 0.3476241604035057, "grad_norm": 0.09267710894346237, "learning_rate": 1.9620265838944657e-05, "loss": 9.2478, "step": 69610 }, { "epoch": 0.34767409922844517, "grad_norm": 0.09669796377420425, "learning_rate": 1.9618763924003104e-05, "loss": 9.2465, "step": 69620 }, { "epoch": 0.3477240380533846, "grad_norm": 0.09430265426635742, "learning_rate": 1.9617262009061554e-05, "loss": 9.2617, "step": 69630 }, { "epoch": 0.34777397687832406, "grad_norm": 0.09212590754032135, "learning_rate": 1.9615760094120004e-05, "loss": 9.2578, "step": 69640 }, { "epoch": 0.3478239157032635, "grad_norm": 0.09378217160701752, "learning_rate": 1.961425817917845e-05, "loss": 9.2584, "step": 69650 }, { "epoch": 0.34787385452820296, "grad_norm": 0.09014896303415298, "learning_rate": 1.9612756264236904e-05, "loss": 9.2596, "step": 69660 }, { "epoch": 0.3479237933531424, "grad_norm": 0.0964147299528122, "learning_rate": 1.961125434929535e-05, "loss": 9.2503, "step": 69670 }, { "epoch": 0.34797373217808186, "grad_norm": 0.09427673369646072, "learning_rate": 1.96097524343538e-05, "loss": 9.2688, "step": 69680 }, { "epoch": 0.3480236710030213, "grad_norm": 0.09578701853752136, "learning_rate": 1.960825051941225e-05, "loss": 9.2582, "step": 69690 }, { "epoch": 0.34807360982796076, "grad_norm": 0.09862515330314636, "learning_rate": 1.96067486044707e-05, "loss": 9.2589, "step": 69700 }, { "epoch": 0.3481235486529002, "grad_norm": 0.09679950028657913, "learning_rate": 1.9605246689529152e-05, "loss": 9.2639, "step": 69710 }, { "epoch": 0.34817348747783966, "grad_norm": 0.09329384565353394, "learning_rate": 1.96037447745876e-05, "loss": 9.2534, "step": 69720 }, { "epoch": 0.3482234263027791, "grad_norm": 0.09050307422876358, "learning_rate": 1.960224285964605e-05, "loss": 9.258, "step": 69730 }, { "epoch": 0.34827336512771856, "grad_norm": 0.08931013941764832, "learning_rate": 1.96007409447045e-05, "loss": 9.2506, "step": 69740 }, { "epoch": 0.348323303952658, "grad_norm": 0.09590967744588852, "learning_rate": 1.959923902976295e-05, "loss": 9.2381, "step": 69750 }, { "epoch": 0.34837324277759746, "grad_norm": 0.09243065118789673, "learning_rate": 1.95977371148214e-05, "loss": 9.2571, "step": 69760 }, { "epoch": 0.3484231816025369, "grad_norm": 0.10139338672161102, "learning_rate": 1.9596235199879846e-05, "loss": 9.2542, "step": 69770 }, { "epoch": 0.34847312042747636, "grad_norm": 0.09506342560052872, "learning_rate": 1.9594733284938296e-05, "loss": 9.2574, "step": 69780 }, { "epoch": 0.3485230592524158, "grad_norm": 0.09532468020915985, "learning_rate": 1.9593231369996746e-05, "loss": 9.2527, "step": 69790 }, { "epoch": 0.34857299807735526, "grad_norm": 0.09643257409334183, "learning_rate": 1.9591729455055197e-05, "loss": 9.2545, "step": 69800 }, { "epoch": 0.3486229369022947, "grad_norm": 0.09859835356473923, "learning_rate": 1.9590227540113647e-05, "loss": 9.2565, "step": 69810 }, { "epoch": 0.34867287572723416, "grad_norm": 0.09410768747329712, "learning_rate": 1.9588725625172094e-05, "loss": 9.2615, "step": 69820 }, { "epoch": 0.3487228145521736, "grad_norm": 0.0925586149096489, "learning_rate": 1.9587223710230544e-05, "loss": 9.2492, "step": 69830 }, { "epoch": 0.34877275337711305, "grad_norm": 0.09222124516963959, "learning_rate": 1.9585721795288994e-05, "loss": 9.2527, "step": 69840 }, { "epoch": 0.3488226922020525, "grad_norm": 0.08799050003290176, "learning_rate": 1.9584219880347444e-05, "loss": 9.2572, "step": 69850 }, { "epoch": 0.34887263102699195, "grad_norm": 0.09063519537448883, "learning_rate": 1.9582717965405894e-05, "loss": 9.2635, "step": 69860 }, { "epoch": 0.3489225698519314, "grad_norm": 0.09138821065425873, "learning_rate": 1.958121605046434e-05, "loss": 9.2414, "step": 69870 }, { "epoch": 0.34897250867687085, "grad_norm": 0.0935179740190506, "learning_rate": 1.957971413552279e-05, "loss": 9.2509, "step": 69880 }, { "epoch": 0.3490224475018103, "grad_norm": 0.09351733326911926, "learning_rate": 1.957821222058124e-05, "loss": 9.2526, "step": 69890 }, { "epoch": 0.34907238632674975, "grad_norm": 0.09050756692886353, "learning_rate": 1.957671030563969e-05, "loss": 9.2616, "step": 69900 }, { "epoch": 0.3491223251516892, "grad_norm": 0.09149663895368576, "learning_rate": 1.9575208390698142e-05, "loss": 9.2517, "step": 69910 }, { "epoch": 0.34917226397662865, "grad_norm": 0.09362272918224335, "learning_rate": 1.957370647575659e-05, "loss": 9.2577, "step": 69920 }, { "epoch": 0.34922220280156807, "grad_norm": 0.09068772941827774, "learning_rate": 1.957220456081504e-05, "loss": 9.2577, "step": 69930 }, { "epoch": 0.34927214162650755, "grad_norm": 0.08827726542949677, "learning_rate": 1.957070264587349e-05, "loss": 9.2646, "step": 69940 }, { "epoch": 0.34932208045144697, "grad_norm": 0.09964391589164734, "learning_rate": 1.956920073093194e-05, "loss": 9.2494, "step": 69950 }, { "epoch": 0.34937201927638645, "grad_norm": 0.09400717914104462, "learning_rate": 1.956769881599039e-05, "loss": 9.2437, "step": 69960 }, { "epoch": 0.34942195810132587, "grad_norm": 0.0930166095495224, "learning_rate": 1.9566196901048836e-05, "loss": 9.2528, "step": 69970 }, { "epoch": 0.34947189692626535, "grad_norm": 0.09116925299167633, "learning_rate": 1.9564694986107286e-05, "loss": 9.2635, "step": 69980 }, { "epoch": 0.34952183575120477, "grad_norm": 0.09678564965724945, "learning_rate": 1.9563193071165737e-05, "loss": 9.2448, "step": 69990 }, { "epoch": 0.34957177457614425, "grad_norm": 0.09541235119104385, "learning_rate": 1.9561691156224187e-05, "loss": 9.2439, "step": 70000 }, { "epoch": 0.34962171340108367, "grad_norm": 0.09007764607667923, "learning_rate": 1.9560189241282637e-05, "loss": 9.2601, "step": 70010 }, { "epoch": 0.34967165222602314, "grad_norm": 0.09365357458591461, "learning_rate": 1.9558687326341087e-05, "loss": 9.2501, "step": 70020 }, { "epoch": 0.34972159105096257, "grad_norm": 0.09102270007133484, "learning_rate": 1.9557185411399534e-05, "loss": 9.2509, "step": 70030 }, { "epoch": 0.34977152987590204, "grad_norm": 0.09124330431222916, "learning_rate": 1.9555683496457984e-05, "loss": 9.2493, "step": 70040 }, { "epoch": 0.34982146870084146, "grad_norm": 0.09593762457370758, "learning_rate": 1.9554181581516434e-05, "loss": 9.2587, "step": 70050 }, { "epoch": 0.34987140752578094, "grad_norm": 0.09397412836551666, "learning_rate": 1.9552679666574884e-05, "loss": 9.2518, "step": 70060 }, { "epoch": 0.34992134635072036, "grad_norm": 0.09382015466690063, "learning_rate": 1.9551177751633335e-05, "loss": 9.2479, "step": 70070 }, { "epoch": 0.34997128517565984, "grad_norm": 0.09169856458902359, "learning_rate": 1.954967583669178e-05, "loss": 9.2492, "step": 70080 }, { "epoch": 0.35002122400059926, "grad_norm": 0.09524683654308319, "learning_rate": 1.954817392175023e-05, "loss": 9.2446, "step": 70090 }, { "epoch": 0.35007116282553874, "grad_norm": 0.09151513129472733, "learning_rate": 1.9546672006808682e-05, "loss": 9.26, "step": 70100 }, { "epoch": 0.35012110165047816, "grad_norm": 0.09831450879573822, "learning_rate": 1.9545170091867132e-05, "loss": 9.2467, "step": 70110 }, { "epoch": 0.35017104047541764, "grad_norm": 0.08933068811893463, "learning_rate": 1.9543668176925582e-05, "loss": 9.2457, "step": 70120 }, { "epoch": 0.35022097930035706, "grad_norm": 0.09220155328512192, "learning_rate": 1.954216626198403e-05, "loss": 9.2452, "step": 70130 }, { "epoch": 0.35027091812529654, "grad_norm": 0.09587565809488297, "learning_rate": 1.954066434704248e-05, "loss": 9.2367, "step": 70140 }, { "epoch": 0.35032085695023596, "grad_norm": 0.09450395405292511, "learning_rate": 1.953916243210093e-05, "loss": 9.2412, "step": 70150 }, { "epoch": 0.35037079577517544, "grad_norm": 0.0945536196231842, "learning_rate": 1.953766051715938e-05, "loss": 9.2447, "step": 70160 }, { "epoch": 0.35042073460011486, "grad_norm": 0.09448517858982086, "learning_rate": 1.953615860221783e-05, "loss": 9.2549, "step": 70170 }, { "epoch": 0.35047067342505434, "grad_norm": 0.09346494823694229, "learning_rate": 1.9534656687276276e-05, "loss": 9.2633, "step": 70180 }, { "epoch": 0.35052061224999376, "grad_norm": 0.0924750491976738, "learning_rate": 1.9533154772334727e-05, "loss": 9.243, "step": 70190 }, { "epoch": 0.35057055107493323, "grad_norm": 0.09105124324560165, "learning_rate": 1.9531652857393177e-05, "loss": 9.2572, "step": 70200 }, { "epoch": 0.35062048989987266, "grad_norm": 0.09556052833795547, "learning_rate": 1.9530150942451627e-05, "loss": 9.2382, "step": 70210 }, { "epoch": 0.3506704287248121, "grad_norm": 0.09211760014295578, "learning_rate": 1.9528649027510077e-05, "loss": 9.2363, "step": 70220 }, { "epoch": 0.35072036754975155, "grad_norm": 0.09628025442361832, "learning_rate": 1.9527147112568524e-05, "loss": 9.249, "step": 70230 }, { "epoch": 0.350770306374691, "grad_norm": 0.09602992981672287, "learning_rate": 1.9525645197626974e-05, "loss": 9.2451, "step": 70240 }, { "epoch": 0.35082024519963045, "grad_norm": 0.09380794316530228, "learning_rate": 1.9524143282685424e-05, "loss": 9.2573, "step": 70250 }, { "epoch": 0.3508701840245699, "grad_norm": 0.09438929706811905, "learning_rate": 1.9522641367743874e-05, "loss": 9.2478, "step": 70260 }, { "epoch": 0.35092012284950935, "grad_norm": 0.09579429030418396, "learning_rate": 1.9521139452802325e-05, "loss": 9.2498, "step": 70270 }, { "epoch": 0.3509700616744488, "grad_norm": 0.0962810218334198, "learning_rate": 1.951963753786077e-05, "loss": 9.2395, "step": 70280 }, { "epoch": 0.35102000049938825, "grad_norm": 0.0956900343298912, "learning_rate": 1.951813562291922e-05, "loss": 9.2403, "step": 70290 }, { "epoch": 0.3510699393243277, "grad_norm": 0.09246978163719177, "learning_rate": 1.9516633707977672e-05, "loss": 9.2375, "step": 70300 }, { "epoch": 0.35111987814926715, "grad_norm": 0.09685346484184265, "learning_rate": 1.9515131793036122e-05, "loss": 9.2514, "step": 70310 }, { "epoch": 0.3511698169742066, "grad_norm": 0.09638645499944687, "learning_rate": 1.9513629878094572e-05, "loss": 9.2439, "step": 70320 }, { "epoch": 0.35121975579914605, "grad_norm": 0.09618096798658371, "learning_rate": 1.951212796315302e-05, "loss": 9.2504, "step": 70330 }, { "epoch": 0.35126969462408547, "grad_norm": 0.09635983407497406, "learning_rate": 1.9510626048211472e-05, "loss": 9.2449, "step": 70340 }, { "epoch": 0.35131963344902495, "grad_norm": 0.09640626609325409, "learning_rate": 1.950912413326992e-05, "loss": 9.2455, "step": 70350 }, { "epoch": 0.35136957227396437, "grad_norm": 0.08988912403583527, "learning_rate": 1.950762221832837e-05, "loss": 9.2467, "step": 70360 }, { "epoch": 0.35141951109890385, "grad_norm": 0.09091424196958542, "learning_rate": 1.950612030338682e-05, "loss": 9.2542, "step": 70370 }, { "epoch": 0.35146944992384327, "grad_norm": 0.09256154298782349, "learning_rate": 1.9504618388445266e-05, "loss": 9.2376, "step": 70380 }, { "epoch": 0.35151938874878275, "grad_norm": 0.09144950658082962, "learning_rate": 1.950311647350372e-05, "loss": 9.2372, "step": 70390 }, { "epoch": 0.35156932757372217, "grad_norm": 0.09599640965461731, "learning_rate": 1.9501614558562167e-05, "loss": 9.2434, "step": 70400 }, { "epoch": 0.35161926639866165, "grad_norm": 0.0928352028131485, "learning_rate": 1.9500112643620617e-05, "loss": 9.253, "step": 70410 }, { "epoch": 0.35166920522360107, "grad_norm": 0.09690866619348526, "learning_rate": 1.9498610728679067e-05, "loss": 9.2466, "step": 70420 }, { "epoch": 0.35171914404854054, "grad_norm": 0.09045714139938354, "learning_rate": 1.9497108813737514e-05, "loss": 9.2495, "step": 70430 }, { "epoch": 0.35176908287347997, "grad_norm": 0.09313403815031052, "learning_rate": 1.9495606898795967e-05, "loss": 9.2456, "step": 70440 }, { "epoch": 0.35181902169841944, "grad_norm": 0.0966280922293663, "learning_rate": 1.9494104983854414e-05, "loss": 9.2418, "step": 70450 }, { "epoch": 0.35186896052335886, "grad_norm": 0.09932943433523178, "learning_rate": 1.9492603068912864e-05, "loss": 9.24, "step": 70460 }, { "epoch": 0.35191889934829834, "grad_norm": 0.10107915848493576, "learning_rate": 1.9491101153971315e-05, "loss": 9.2464, "step": 70470 }, { "epoch": 0.35196883817323776, "grad_norm": 0.09643235802650452, "learning_rate": 1.948959923902976e-05, "loss": 9.2393, "step": 70480 }, { "epoch": 0.35201877699817724, "grad_norm": 0.09176507592201233, "learning_rate": 1.9488097324088215e-05, "loss": 9.2509, "step": 70490 }, { "epoch": 0.35206871582311666, "grad_norm": 0.09726466983556747, "learning_rate": 1.9486595409146662e-05, "loss": 9.235, "step": 70500 }, { "epoch": 0.35211865464805614, "grad_norm": 0.09814643114805222, "learning_rate": 1.9485093494205112e-05, "loss": 9.2476, "step": 70510 }, { "epoch": 0.35216859347299556, "grad_norm": 0.09261635690927505, "learning_rate": 1.9483591579263562e-05, "loss": 9.2453, "step": 70520 }, { "epoch": 0.35221853229793504, "grad_norm": 0.09454991668462753, "learning_rate": 1.948208966432201e-05, "loss": 9.2552, "step": 70530 }, { "epoch": 0.35226847112287446, "grad_norm": 0.09696483612060547, "learning_rate": 1.9480587749380462e-05, "loss": 9.2484, "step": 70540 }, { "epoch": 0.35231840994781394, "grad_norm": 0.09152613580226898, "learning_rate": 1.947908583443891e-05, "loss": 9.249, "step": 70550 }, { "epoch": 0.35236834877275336, "grad_norm": 0.09792204946279526, "learning_rate": 1.947758391949736e-05, "loss": 9.2417, "step": 70560 }, { "epoch": 0.35241828759769284, "grad_norm": 0.09588468819856644, "learning_rate": 1.947608200455581e-05, "loss": 9.2381, "step": 70570 }, { "epoch": 0.35246822642263226, "grad_norm": 0.09020346403121948, "learning_rate": 1.9474580089614256e-05, "loss": 9.2366, "step": 70580 }, { "epoch": 0.35251816524757174, "grad_norm": 0.08940046280622482, "learning_rate": 1.947307817467271e-05, "loss": 9.2436, "step": 70590 }, { "epoch": 0.35256810407251116, "grad_norm": 0.09213332086801529, "learning_rate": 1.9471576259731157e-05, "loss": 9.2367, "step": 70600 }, { "epoch": 0.35261804289745063, "grad_norm": 0.09027668833732605, "learning_rate": 1.9470074344789607e-05, "loss": 9.242, "step": 70610 }, { "epoch": 0.35266798172239006, "grad_norm": 0.0947854295372963, "learning_rate": 1.9468572429848057e-05, "loss": 9.2426, "step": 70620 }, { "epoch": 0.35271792054732953, "grad_norm": 0.09516201168298721, "learning_rate": 1.9467070514906504e-05, "loss": 9.2549, "step": 70630 }, { "epoch": 0.35276785937226895, "grad_norm": 0.09099344164133072, "learning_rate": 1.9465568599964957e-05, "loss": 9.2451, "step": 70640 }, { "epoch": 0.35281779819720843, "grad_norm": 0.09635750204324722, "learning_rate": 1.9464066685023404e-05, "loss": 9.2471, "step": 70650 }, { "epoch": 0.35286773702214785, "grad_norm": 0.0907939150929451, "learning_rate": 1.9462564770081854e-05, "loss": 9.2428, "step": 70660 }, { "epoch": 0.35291767584708733, "grad_norm": 0.0959140956401825, "learning_rate": 1.9461062855140305e-05, "loss": 9.244, "step": 70670 }, { "epoch": 0.35296761467202675, "grad_norm": 0.09400851279497147, "learning_rate": 1.945956094019875e-05, "loss": 9.2519, "step": 70680 }, { "epoch": 0.35301755349696623, "grad_norm": 0.09195028990507126, "learning_rate": 1.9458059025257205e-05, "loss": 9.244, "step": 70690 }, { "epoch": 0.35306749232190565, "grad_norm": 0.09665300697088242, "learning_rate": 1.9456557110315652e-05, "loss": 9.2462, "step": 70700 }, { "epoch": 0.35311743114684513, "grad_norm": 0.09066880494356155, "learning_rate": 1.9455055195374105e-05, "loss": 9.2491, "step": 70710 }, { "epoch": 0.35316736997178455, "grad_norm": 0.0936349481344223, "learning_rate": 1.9453553280432552e-05, "loss": 9.2481, "step": 70720 }, { "epoch": 0.353217308796724, "grad_norm": 0.0960550457239151, "learning_rate": 1.9452051365491e-05, "loss": 9.2365, "step": 70730 }, { "epoch": 0.35326724762166345, "grad_norm": 0.09481688588857651, "learning_rate": 1.9450549450549452e-05, "loss": 9.2371, "step": 70740 }, { "epoch": 0.3533171864466029, "grad_norm": 0.099585622549057, "learning_rate": 1.94490475356079e-05, "loss": 9.241, "step": 70750 }, { "epoch": 0.35336712527154235, "grad_norm": 0.09653332084417343, "learning_rate": 1.9447545620666353e-05, "loss": 9.2421, "step": 70760 }, { "epoch": 0.3534170640964818, "grad_norm": 0.09810932725667953, "learning_rate": 1.94460437057248e-05, "loss": 9.2449, "step": 70770 }, { "epoch": 0.35346700292142125, "grad_norm": 0.09474793076515198, "learning_rate": 1.9444541790783246e-05, "loss": 9.2413, "step": 70780 }, { "epoch": 0.3535169417463607, "grad_norm": 0.09423346817493439, "learning_rate": 1.94430398758417e-05, "loss": 9.244, "step": 70790 }, { "epoch": 0.35356688057130015, "grad_norm": 0.09000688046216965, "learning_rate": 1.9441537960900147e-05, "loss": 9.2306, "step": 70800 }, { "epoch": 0.3536168193962396, "grad_norm": 0.09328895062208176, "learning_rate": 1.94400360459586e-05, "loss": 9.2454, "step": 70810 }, { "epoch": 0.35366675822117905, "grad_norm": 0.09733468294143677, "learning_rate": 1.9438534131017047e-05, "loss": 9.2466, "step": 70820 }, { "epoch": 0.3537166970461185, "grad_norm": 0.09681283682584763, "learning_rate": 1.9437032216075494e-05, "loss": 9.2315, "step": 70830 }, { "epoch": 0.35376663587105794, "grad_norm": 0.09605410695075989, "learning_rate": 1.9435530301133947e-05, "loss": 9.2368, "step": 70840 }, { "epoch": 0.3538165746959974, "grad_norm": 0.10018260776996613, "learning_rate": 1.9434028386192394e-05, "loss": 9.2381, "step": 70850 }, { "epoch": 0.35386651352093684, "grad_norm": 0.09301795065402985, "learning_rate": 1.9432526471250848e-05, "loss": 9.2454, "step": 70860 }, { "epoch": 0.3539164523458763, "grad_norm": 0.09586118161678314, "learning_rate": 1.9431024556309295e-05, "loss": 9.2435, "step": 70870 }, { "epoch": 0.35396639117081574, "grad_norm": 0.09101895242929459, "learning_rate": 1.942952264136774e-05, "loss": 9.2297, "step": 70880 }, { "epoch": 0.3540163299957552, "grad_norm": 0.08979874104261398, "learning_rate": 1.9428020726426195e-05, "loss": 9.2355, "step": 70890 }, { "epoch": 0.35406626882069464, "grad_norm": 0.09098219126462936, "learning_rate": 1.9426518811484642e-05, "loss": 9.2347, "step": 70900 }, { "epoch": 0.3541162076456341, "grad_norm": 0.09509828686714172, "learning_rate": 1.9425016896543095e-05, "loss": 9.2406, "step": 70910 }, { "epoch": 0.35416614647057354, "grad_norm": 0.09496752172708511, "learning_rate": 1.9423514981601542e-05, "loss": 9.246, "step": 70920 }, { "epoch": 0.354216085295513, "grad_norm": 0.09700390696525574, "learning_rate": 1.942201306665999e-05, "loss": 9.2376, "step": 70930 }, { "epoch": 0.35426602412045244, "grad_norm": 0.09865306317806244, "learning_rate": 1.9420511151718442e-05, "loss": 9.2343, "step": 70940 }, { "epoch": 0.3543159629453919, "grad_norm": 0.09420738369226456, "learning_rate": 1.941900923677689e-05, "loss": 9.2405, "step": 70950 }, { "epoch": 0.35436590177033134, "grad_norm": 0.09997241944074631, "learning_rate": 1.9417507321835343e-05, "loss": 9.2398, "step": 70960 }, { "epoch": 0.3544158405952708, "grad_norm": 0.09588761627674103, "learning_rate": 1.941600540689379e-05, "loss": 9.2359, "step": 70970 }, { "epoch": 0.35446577942021024, "grad_norm": 0.09791746735572815, "learning_rate": 1.9414503491952236e-05, "loss": 9.2349, "step": 70980 }, { "epoch": 0.3545157182451497, "grad_norm": 0.0913577750325203, "learning_rate": 1.941300157701069e-05, "loss": 9.238, "step": 70990 }, { "epoch": 0.35456565707008914, "grad_norm": 0.08983467519283295, "learning_rate": 1.9411499662069137e-05, "loss": 9.2347, "step": 71000 }, { "epoch": 0.3546155958950286, "grad_norm": 0.09971988946199417, "learning_rate": 1.940999774712759e-05, "loss": 9.2341, "step": 71010 }, { "epoch": 0.35466553471996803, "grad_norm": 0.0975378155708313, "learning_rate": 1.9408495832186037e-05, "loss": 9.2304, "step": 71020 }, { "epoch": 0.3547154735449075, "grad_norm": 0.09106051921844482, "learning_rate": 1.9406993917244487e-05, "loss": 9.2377, "step": 71030 }, { "epoch": 0.35476541236984693, "grad_norm": 0.09189257025718689, "learning_rate": 1.9405492002302937e-05, "loss": 9.2449, "step": 71040 }, { "epoch": 0.3548153511947864, "grad_norm": 0.09312552958726883, "learning_rate": 1.9403990087361384e-05, "loss": 9.2376, "step": 71050 }, { "epoch": 0.35486529001972583, "grad_norm": 0.09094572812318802, "learning_rate": 1.9402488172419838e-05, "loss": 9.2444, "step": 71060 }, { "epoch": 0.3549152288446653, "grad_norm": 0.09242424368858337, "learning_rate": 1.9400986257478285e-05, "loss": 9.2318, "step": 71070 }, { "epoch": 0.35496516766960473, "grad_norm": 0.09304199367761612, "learning_rate": 1.9399484342536735e-05, "loss": 9.25, "step": 71080 }, { "epoch": 0.3550151064945442, "grad_norm": 0.09726741909980774, "learning_rate": 1.9397982427595185e-05, "loss": 9.2376, "step": 71090 }, { "epoch": 0.35506504531948363, "grad_norm": 0.09090828150510788, "learning_rate": 1.9396480512653632e-05, "loss": 9.2332, "step": 71100 }, { "epoch": 0.3551149841444231, "grad_norm": 0.09156540036201477, "learning_rate": 1.9394978597712085e-05, "loss": 9.2448, "step": 71110 }, { "epoch": 0.35516492296936253, "grad_norm": 0.0888853594660759, "learning_rate": 1.9393476682770532e-05, "loss": 9.2353, "step": 71120 }, { "epoch": 0.355214861794302, "grad_norm": 0.09629367291927338, "learning_rate": 1.9391974767828982e-05, "loss": 9.2288, "step": 71130 }, { "epoch": 0.3552648006192414, "grad_norm": 0.09092582762241364, "learning_rate": 1.9390472852887432e-05, "loss": 9.2368, "step": 71140 }, { "epoch": 0.3553147394441809, "grad_norm": 0.09426487237215042, "learning_rate": 1.938897093794588e-05, "loss": 9.2381, "step": 71150 }, { "epoch": 0.3553646782691203, "grad_norm": 0.09681005775928497, "learning_rate": 1.9387469023004333e-05, "loss": 9.2227, "step": 71160 }, { "epoch": 0.3554146170940598, "grad_norm": 0.0895727351307869, "learning_rate": 1.938596710806278e-05, "loss": 9.2364, "step": 71170 }, { "epoch": 0.3554645559189992, "grad_norm": 0.09309788048267365, "learning_rate": 1.938446519312123e-05, "loss": 9.2421, "step": 71180 }, { "epoch": 0.3555144947439387, "grad_norm": 0.09737984091043472, "learning_rate": 1.938296327817968e-05, "loss": 9.236, "step": 71190 }, { "epoch": 0.3555644335688781, "grad_norm": 0.09271812438964844, "learning_rate": 1.9381461363238127e-05, "loss": 9.23, "step": 71200 }, { "epoch": 0.35561437239381755, "grad_norm": 0.09458890557289124, "learning_rate": 1.937995944829658e-05, "loss": 9.2267, "step": 71210 }, { "epoch": 0.355664311218757, "grad_norm": 0.09335251152515411, "learning_rate": 1.9378457533355027e-05, "loss": 9.2298, "step": 71220 }, { "epoch": 0.35571425004369644, "grad_norm": 0.09530318528413773, "learning_rate": 1.9376955618413477e-05, "loss": 9.2515, "step": 71230 }, { "epoch": 0.3557641888686359, "grad_norm": 0.0916392132639885, "learning_rate": 1.9375453703471927e-05, "loss": 9.2341, "step": 71240 }, { "epoch": 0.35581412769357534, "grad_norm": 0.09264536947011948, "learning_rate": 1.9373951788530374e-05, "loss": 9.2404, "step": 71250 }, { "epoch": 0.3558640665185148, "grad_norm": 0.09547609835863113, "learning_rate": 1.9372449873588828e-05, "loss": 9.2349, "step": 71260 }, { "epoch": 0.35591400534345424, "grad_norm": 0.099251389503479, "learning_rate": 1.9370947958647275e-05, "loss": 9.234, "step": 71270 }, { "epoch": 0.3559639441683937, "grad_norm": 0.08727670460939407, "learning_rate": 1.9369446043705725e-05, "loss": 9.2337, "step": 71280 }, { "epoch": 0.35601388299333314, "grad_norm": 0.09636933356523514, "learning_rate": 1.9367944128764175e-05, "loss": 9.2463, "step": 71290 }, { "epoch": 0.3560638218182726, "grad_norm": 0.09374624490737915, "learning_rate": 1.9366442213822622e-05, "loss": 9.2331, "step": 71300 }, { "epoch": 0.35611376064321204, "grad_norm": 0.08606693148612976, "learning_rate": 1.9364940298881075e-05, "loss": 9.22, "step": 71310 }, { "epoch": 0.3561636994681515, "grad_norm": 0.09905658662319183, "learning_rate": 1.9363438383939522e-05, "loss": 9.2281, "step": 71320 }, { "epoch": 0.35621363829309094, "grad_norm": 0.09433192014694214, "learning_rate": 1.9361936468997972e-05, "loss": 9.2424, "step": 71330 }, { "epoch": 0.3562635771180304, "grad_norm": 0.09173821657896042, "learning_rate": 1.9360434554056423e-05, "loss": 9.2255, "step": 71340 }, { "epoch": 0.35631351594296984, "grad_norm": 0.09594710916280746, "learning_rate": 1.9358932639114873e-05, "loss": 9.2267, "step": 71350 }, { "epoch": 0.3563634547679093, "grad_norm": 0.09441184997558594, "learning_rate": 1.9357430724173323e-05, "loss": 9.2384, "step": 71360 }, { "epoch": 0.35641339359284874, "grad_norm": 0.09391657263040543, "learning_rate": 1.935592880923177e-05, "loss": 9.2407, "step": 71370 }, { "epoch": 0.3564633324177882, "grad_norm": 0.09173735976219177, "learning_rate": 1.935442689429022e-05, "loss": 9.2305, "step": 71380 }, { "epoch": 0.35651327124272764, "grad_norm": 0.09266167879104614, "learning_rate": 1.935292497934867e-05, "loss": 9.2273, "step": 71390 }, { "epoch": 0.3565632100676671, "grad_norm": 0.09733938425779343, "learning_rate": 1.935142306440712e-05, "loss": 9.222, "step": 71400 }, { "epoch": 0.35661314889260654, "grad_norm": 0.09893626719713211, "learning_rate": 1.934992114946557e-05, "loss": 9.2267, "step": 71410 }, { "epoch": 0.356663087717546, "grad_norm": 0.08972984552383423, "learning_rate": 1.9348419234524017e-05, "loss": 9.2272, "step": 71420 }, { "epoch": 0.35671302654248543, "grad_norm": 0.09227081388235092, "learning_rate": 1.9346917319582467e-05, "loss": 9.2393, "step": 71430 }, { "epoch": 0.3567629653674249, "grad_norm": 0.09158577024936676, "learning_rate": 1.9345415404640918e-05, "loss": 9.2323, "step": 71440 }, { "epoch": 0.35681290419236433, "grad_norm": 0.09240981191396713, "learning_rate": 1.9343913489699368e-05, "loss": 9.2337, "step": 71450 }, { "epoch": 0.3568628430173038, "grad_norm": 0.09477624297142029, "learning_rate": 1.9342411574757818e-05, "loss": 9.2299, "step": 71460 }, { "epoch": 0.35691278184224323, "grad_norm": 0.0945139229297638, "learning_rate": 1.9340909659816265e-05, "loss": 9.2225, "step": 71470 }, { "epoch": 0.3569627206671827, "grad_norm": 0.09509062021970749, "learning_rate": 1.9339407744874715e-05, "loss": 9.2309, "step": 71480 }, { "epoch": 0.35701265949212213, "grad_norm": 0.08713449537754059, "learning_rate": 1.9337905829933165e-05, "loss": 9.2212, "step": 71490 }, { "epoch": 0.3570625983170616, "grad_norm": 0.10225412994623184, "learning_rate": 1.9336403914991615e-05, "loss": 9.2294, "step": 71500 }, { "epoch": 0.35711253714200103, "grad_norm": 0.09623237699270248, "learning_rate": 1.9334902000050065e-05, "loss": 9.2218, "step": 71510 }, { "epoch": 0.3571624759669405, "grad_norm": 0.09265895932912827, "learning_rate": 1.9333400085108512e-05, "loss": 9.2148, "step": 71520 }, { "epoch": 0.35721241479187993, "grad_norm": 0.09351124614477158, "learning_rate": 1.9331898170166962e-05, "loss": 9.2253, "step": 71530 }, { "epoch": 0.3572623536168194, "grad_norm": 0.09479804337024689, "learning_rate": 1.9330396255225413e-05, "loss": 9.2182, "step": 71540 }, { "epoch": 0.3573122924417588, "grad_norm": 0.09572158753871918, "learning_rate": 1.9328894340283863e-05, "loss": 9.2224, "step": 71550 }, { "epoch": 0.3573622312666983, "grad_norm": 0.09059182554483414, "learning_rate": 1.9327392425342313e-05, "loss": 9.2344, "step": 71560 }, { "epoch": 0.3574121700916377, "grad_norm": 0.09375757724046707, "learning_rate": 1.932589051040076e-05, "loss": 9.2307, "step": 71570 }, { "epoch": 0.3574621089165772, "grad_norm": 0.08641654253005981, "learning_rate": 1.932438859545921e-05, "loss": 9.2391, "step": 71580 }, { "epoch": 0.3575120477415166, "grad_norm": 0.09109997749328613, "learning_rate": 1.932288668051766e-05, "loss": 9.2417, "step": 71590 }, { "epoch": 0.3575619865664561, "grad_norm": 0.1009616032242775, "learning_rate": 1.932138476557611e-05, "loss": 9.2308, "step": 71600 }, { "epoch": 0.3576119253913955, "grad_norm": 0.09245188534259796, "learning_rate": 1.931988285063456e-05, "loss": 9.2251, "step": 71610 }, { "epoch": 0.357661864216335, "grad_norm": 0.09447357058525085, "learning_rate": 1.9318380935693007e-05, "loss": 9.2347, "step": 71620 }, { "epoch": 0.3577118030412744, "grad_norm": 0.0957716852426529, "learning_rate": 1.9316879020751457e-05, "loss": 9.2316, "step": 71630 }, { "epoch": 0.3577617418662139, "grad_norm": 0.09259764105081558, "learning_rate": 1.9315377105809908e-05, "loss": 9.2325, "step": 71640 }, { "epoch": 0.3578116806911533, "grad_norm": 0.09912045300006866, "learning_rate": 1.9313875190868358e-05, "loss": 9.2225, "step": 71650 }, { "epoch": 0.3578616195160928, "grad_norm": 0.09714524447917938, "learning_rate": 1.9312373275926808e-05, "loss": 9.2267, "step": 71660 }, { "epoch": 0.3579115583410322, "grad_norm": 0.08966837078332901, "learning_rate": 1.9310871360985258e-05, "loss": 9.2311, "step": 71670 }, { "epoch": 0.3579614971659717, "grad_norm": 0.09709369391202927, "learning_rate": 1.9309369446043705e-05, "loss": 9.2262, "step": 71680 }, { "epoch": 0.3580114359909111, "grad_norm": 0.0912432074546814, "learning_rate": 1.9307867531102155e-05, "loss": 9.2241, "step": 71690 }, { "epoch": 0.3580613748158506, "grad_norm": 0.09726667404174805, "learning_rate": 1.9306365616160605e-05, "loss": 9.2349, "step": 71700 }, { "epoch": 0.35811131364079, "grad_norm": 0.10004935413599014, "learning_rate": 1.9304863701219055e-05, "loss": 9.2206, "step": 71710 }, { "epoch": 0.3581612524657295, "grad_norm": 0.0955386683344841, "learning_rate": 1.9303361786277506e-05, "loss": 9.2124, "step": 71720 }, { "epoch": 0.3582111912906689, "grad_norm": 0.09714854508638382, "learning_rate": 1.9301859871335952e-05, "loss": 9.2274, "step": 71730 }, { "epoch": 0.3582611301156084, "grad_norm": 0.0956362634897232, "learning_rate": 1.9300357956394403e-05, "loss": 9.2244, "step": 71740 }, { "epoch": 0.3583110689405478, "grad_norm": 0.08816993236541748, "learning_rate": 1.9298856041452853e-05, "loss": 9.2184, "step": 71750 }, { "epoch": 0.3583610077654873, "grad_norm": 0.09849813580513, "learning_rate": 1.9297354126511303e-05, "loss": 9.2217, "step": 71760 }, { "epoch": 0.3584109465904267, "grad_norm": 0.0990118458867073, "learning_rate": 1.9295852211569753e-05, "loss": 9.2132, "step": 71770 }, { "epoch": 0.3584608854153662, "grad_norm": 0.0910298228263855, "learning_rate": 1.92943502966282e-05, "loss": 9.2336, "step": 71780 }, { "epoch": 0.3585108242403056, "grad_norm": 0.09591134637594223, "learning_rate": 1.929284838168665e-05, "loss": 9.2223, "step": 71790 }, { "epoch": 0.3585607630652451, "grad_norm": 0.0979471430182457, "learning_rate": 1.92913464667451e-05, "loss": 9.2379, "step": 71800 }, { "epoch": 0.3586107018901845, "grad_norm": 0.10352909564971924, "learning_rate": 1.928984455180355e-05, "loss": 9.2422, "step": 71810 }, { "epoch": 0.358660640715124, "grad_norm": 0.09178424626588821, "learning_rate": 1.9288342636862e-05, "loss": 9.2233, "step": 71820 }, { "epoch": 0.3587105795400634, "grad_norm": 0.09857258945703506, "learning_rate": 1.9286840721920447e-05, "loss": 9.2315, "step": 71830 }, { "epoch": 0.3587605183650029, "grad_norm": 0.09110614657402039, "learning_rate": 1.9285338806978898e-05, "loss": 9.221, "step": 71840 }, { "epoch": 0.3588104571899423, "grad_norm": 0.09467658400535583, "learning_rate": 1.9283836892037348e-05, "loss": 9.2234, "step": 71850 }, { "epoch": 0.3588603960148818, "grad_norm": 0.10426273941993713, "learning_rate": 1.9282334977095798e-05, "loss": 9.2211, "step": 71860 }, { "epoch": 0.3589103348398212, "grad_norm": 0.09133409708738327, "learning_rate": 1.9280833062154248e-05, "loss": 9.2181, "step": 71870 }, { "epoch": 0.3589602736647607, "grad_norm": 0.09624101221561432, "learning_rate": 1.9279331147212695e-05, "loss": 9.2161, "step": 71880 }, { "epoch": 0.3590102124897001, "grad_norm": 0.08982755988836288, "learning_rate": 1.9277829232271145e-05, "loss": 9.2249, "step": 71890 }, { "epoch": 0.3590601513146396, "grad_norm": 0.09154070168733597, "learning_rate": 1.9276327317329595e-05, "loss": 9.2266, "step": 71900 }, { "epoch": 0.359110090139579, "grad_norm": 0.09227220714092255, "learning_rate": 1.9274825402388045e-05, "loss": 9.2306, "step": 71910 }, { "epoch": 0.3591600289645185, "grad_norm": 0.09317903965711594, "learning_rate": 1.9273323487446496e-05, "loss": 9.217, "step": 71920 }, { "epoch": 0.3592099677894579, "grad_norm": 0.09271214157342911, "learning_rate": 1.9271821572504942e-05, "loss": 9.2249, "step": 71930 }, { "epoch": 0.3592599066143974, "grad_norm": 0.10173588246107101, "learning_rate": 1.9270319657563393e-05, "loss": 9.2237, "step": 71940 }, { "epoch": 0.3593098454393368, "grad_norm": 0.09192373603582382, "learning_rate": 1.9268817742621843e-05, "loss": 9.2223, "step": 71950 }, { "epoch": 0.3593597842642763, "grad_norm": 0.09208924323320389, "learning_rate": 1.9267315827680293e-05, "loss": 9.2111, "step": 71960 }, { "epoch": 0.3594097230892157, "grad_norm": 0.09781036525964737, "learning_rate": 1.9265813912738743e-05, "loss": 9.2236, "step": 71970 }, { "epoch": 0.3594596619141552, "grad_norm": 0.09488288313150406, "learning_rate": 1.926431199779719e-05, "loss": 9.2249, "step": 71980 }, { "epoch": 0.3595096007390946, "grad_norm": 0.08927453309297562, "learning_rate": 1.9262810082855643e-05, "loss": 9.2323, "step": 71990 }, { "epoch": 0.3595595395640341, "grad_norm": 0.09006758779287338, "learning_rate": 1.926130816791409e-05, "loss": 9.2224, "step": 72000 }, { "epoch": 0.3596094783889735, "grad_norm": 0.09184703975915909, "learning_rate": 1.925980625297254e-05, "loss": 9.2305, "step": 72010 }, { "epoch": 0.359659417213913, "grad_norm": 0.0887824296951294, "learning_rate": 1.925830433803099e-05, "loss": 9.2437, "step": 72020 }, { "epoch": 0.3597093560388524, "grad_norm": 0.09438683092594147, "learning_rate": 1.9256802423089437e-05, "loss": 9.2207, "step": 72030 }, { "epoch": 0.3597592948637919, "grad_norm": 0.0963621437549591, "learning_rate": 1.925530050814789e-05, "loss": 9.2236, "step": 72040 }, { "epoch": 0.3598092336887313, "grad_norm": 0.09187499433755875, "learning_rate": 1.9253798593206338e-05, "loss": 9.2143, "step": 72050 }, { "epoch": 0.3598591725136708, "grad_norm": 0.0932173877954483, "learning_rate": 1.9252296678264788e-05, "loss": 9.228, "step": 72060 }, { "epoch": 0.3599091113386102, "grad_norm": 0.09906915575265884, "learning_rate": 1.9250794763323238e-05, "loss": 9.2218, "step": 72070 }, { "epoch": 0.3599590501635497, "grad_norm": 0.09343940764665604, "learning_rate": 1.9249292848381685e-05, "loss": 9.2335, "step": 72080 }, { "epoch": 0.3600089889884891, "grad_norm": 0.09147544950246811, "learning_rate": 1.924779093344014e-05, "loss": 9.2215, "step": 72090 }, { "epoch": 0.3600589278134286, "grad_norm": 0.0935380831360817, "learning_rate": 1.9246289018498585e-05, "loss": 9.217, "step": 72100 }, { "epoch": 0.360108866638368, "grad_norm": 0.09664879739284515, "learning_rate": 1.9244787103557035e-05, "loss": 9.2295, "step": 72110 }, { "epoch": 0.3601588054633075, "grad_norm": 0.09369748085737228, "learning_rate": 1.9243285188615486e-05, "loss": 9.2209, "step": 72120 }, { "epoch": 0.3602087442882469, "grad_norm": 0.09158635139465332, "learning_rate": 1.9241783273673936e-05, "loss": 9.2208, "step": 72130 }, { "epoch": 0.3602586831131864, "grad_norm": 0.08802355080842972, "learning_rate": 1.9240281358732386e-05, "loss": 9.2261, "step": 72140 }, { "epoch": 0.3603086219381258, "grad_norm": 0.09446180611848831, "learning_rate": 1.9238779443790833e-05, "loss": 9.2287, "step": 72150 }, { "epoch": 0.36035856076306527, "grad_norm": 0.10025842487812042, "learning_rate": 1.9237277528849283e-05, "loss": 9.2188, "step": 72160 }, { "epoch": 0.3604084995880047, "grad_norm": 0.09552531689405441, "learning_rate": 1.9235775613907733e-05, "loss": 9.213, "step": 72170 }, { "epoch": 0.36045843841294417, "grad_norm": 0.09716292470693588, "learning_rate": 1.9234273698966183e-05, "loss": 9.2195, "step": 72180 }, { "epoch": 0.3605083772378836, "grad_norm": 0.09107545018196106, "learning_rate": 1.9232771784024633e-05, "loss": 9.2349, "step": 72190 }, { "epoch": 0.360558316062823, "grad_norm": 0.09314329922199249, "learning_rate": 1.923126986908308e-05, "loss": 9.2195, "step": 72200 }, { "epoch": 0.3606082548877625, "grad_norm": 0.09497959911823273, "learning_rate": 1.922976795414153e-05, "loss": 9.216, "step": 72210 }, { "epoch": 0.3606581937127019, "grad_norm": 0.09450450539588928, "learning_rate": 1.922826603919998e-05, "loss": 9.2196, "step": 72220 }, { "epoch": 0.3607081325376414, "grad_norm": 0.09029270708560944, "learning_rate": 1.922676412425843e-05, "loss": 9.2221, "step": 72230 }, { "epoch": 0.3607580713625808, "grad_norm": 0.09402526915073395, "learning_rate": 1.922526220931688e-05, "loss": 9.2112, "step": 72240 }, { "epoch": 0.3608080101875203, "grad_norm": 0.09604809433221817, "learning_rate": 1.9223760294375328e-05, "loss": 9.2266, "step": 72250 }, { "epoch": 0.3608579490124597, "grad_norm": 0.09894632548093796, "learning_rate": 1.9222258379433778e-05, "loss": 9.2271, "step": 72260 }, { "epoch": 0.3609078878373992, "grad_norm": 0.0926297977566719, "learning_rate": 1.9220756464492228e-05, "loss": 9.219, "step": 72270 }, { "epoch": 0.3609578266623386, "grad_norm": 0.08637505769729614, "learning_rate": 1.9219254549550678e-05, "loss": 9.2202, "step": 72280 }, { "epoch": 0.3610077654872781, "grad_norm": 0.09499508887529373, "learning_rate": 1.921775263460913e-05, "loss": 9.2172, "step": 72290 }, { "epoch": 0.3610577043122175, "grad_norm": 0.09541734308004379, "learning_rate": 1.9216250719667575e-05, "loss": 9.2201, "step": 72300 }, { "epoch": 0.361107643137157, "grad_norm": 0.09449708461761475, "learning_rate": 1.921474880472603e-05, "loss": 9.2271, "step": 72310 }, { "epoch": 0.3611575819620964, "grad_norm": 0.1025526151061058, "learning_rate": 1.9213246889784476e-05, "loss": 9.2293, "step": 72320 }, { "epoch": 0.3612075207870359, "grad_norm": 0.08863754570484161, "learning_rate": 1.9211744974842926e-05, "loss": 9.2345, "step": 72330 }, { "epoch": 0.3612574596119753, "grad_norm": 0.09427140653133392, "learning_rate": 1.9210243059901376e-05, "loss": 9.2335, "step": 72340 }, { "epoch": 0.3613073984369148, "grad_norm": 0.09397964179515839, "learning_rate": 1.9208741144959823e-05, "loss": 9.2185, "step": 72350 }, { "epoch": 0.3613573372618542, "grad_norm": 0.09295367449522018, "learning_rate": 1.9207239230018276e-05, "loss": 9.2255, "step": 72360 }, { "epoch": 0.3614072760867937, "grad_norm": 0.09041722118854523, "learning_rate": 1.9205737315076723e-05, "loss": 9.2303, "step": 72370 }, { "epoch": 0.3614572149117331, "grad_norm": 0.08931518346071243, "learning_rate": 1.9204235400135173e-05, "loss": 9.2179, "step": 72380 }, { "epoch": 0.3615071537366726, "grad_norm": 0.0969778448343277, "learning_rate": 1.9202733485193623e-05, "loss": 9.2101, "step": 72390 }, { "epoch": 0.361557092561612, "grad_norm": 0.09464286267757416, "learning_rate": 1.920123157025207e-05, "loss": 9.2113, "step": 72400 }, { "epoch": 0.3616070313865515, "grad_norm": 0.09430237114429474, "learning_rate": 1.9199729655310524e-05, "loss": 9.2103, "step": 72410 }, { "epoch": 0.3616569702114909, "grad_norm": 0.0944664403796196, "learning_rate": 1.919822774036897e-05, "loss": 9.2215, "step": 72420 }, { "epoch": 0.3617069090364304, "grad_norm": 0.09468348324298859, "learning_rate": 1.919672582542742e-05, "loss": 9.2222, "step": 72430 }, { "epoch": 0.3617568478613698, "grad_norm": 0.08907091617584229, "learning_rate": 1.919522391048587e-05, "loss": 9.2238, "step": 72440 }, { "epoch": 0.3618067866863093, "grad_norm": 0.09387124329805374, "learning_rate": 1.9193721995544318e-05, "loss": 9.2185, "step": 72450 }, { "epoch": 0.3618567255112487, "grad_norm": 0.09770091623067856, "learning_rate": 1.919222008060277e-05, "loss": 9.2177, "step": 72460 }, { "epoch": 0.3619066643361882, "grad_norm": 0.09893268346786499, "learning_rate": 1.9190718165661218e-05, "loss": 9.2117, "step": 72470 }, { "epoch": 0.3619566031611276, "grad_norm": 0.09433605521917343, "learning_rate": 1.9189216250719668e-05, "loss": 9.2256, "step": 72480 }, { "epoch": 0.3620065419860671, "grad_norm": 0.09570042043924332, "learning_rate": 1.918771433577812e-05, "loss": 9.2137, "step": 72490 }, { "epoch": 0.3620564808110065, "grad_norm": 0.09163347631692886, "learning_rate": 1.9186212420836565e-05, "loss": 9.2188, "step": 72500 }, { "epoch": 0.362106419635946, "grad_norm": 0.08918924629688263, "learning_rate": 1.918471050589502e-05, "loss": 9.215, "step": 72510 }, { "epoch": 0.3621563584608854, "grad_norm": 0.10020863264799118, "learning_rate": 1.9183208590953466e-05, "loss": 9.2027, "step": 72520 }, { "epoch": 0.3622062972858249, "grad_norm": 0.09962757676839828, "learning_rate": 1.9181706676011916e-05, "loss": 9.2011, "step": 72530 }, { "epoch": 0.3622562361107643, "grad_norm": 0.09085911512374878, "learning_rate": 1.9180204761070366e-05, "loss": 9.2144, "step": 72540 }, { "epoch": 0.3623061749357038, "grad_norm": 0.0901787281036377, "learning_rate": 1.9178702846128813e-05, "loss": 9.2207, "step": 72550 }, { "epoch": 0.3623561137606432, "grad_norm": 0.09482782334089279, "learning_rate": 1.9177200931187266e-05, "loss": 9.2262, "step": 72560 }, { "epoch": 0.36240605258558267, "grad_norm": 0.09666303545236588, "learning_rate": 1.9175699016245713e-05, "loss": 9.2132, "step": 72570 }, { "epoch": 0.3624559914105221, "grad_norm": 0.09145186096429825, "learning_rate": 1.9174197101304163e-05, "loss": 9.219, "step": 72580 }, { "epoch": 0.36250593023546157, "grad_norm": 0.09651076048612595, "learning_rate": 1.9172695186362613e-05, "loss": 9.2178, "step": 72590 }, { "epoch": 0.362555869060401, "grad_norm": 0.0937245786190033, "learning_rate": 1.917119327142106e-05, "loss": 9.2151, "step": 72600 }, { "epoch": 0.36260580788534047, "grad_norm": 0.09232796728610992, "learning_rate": 1.9169691356479514e-05, "loss": 9.2195, "step": 72610 }, { "epoch": 0.3626557467102799, "grad_norm": 0.09451702237129211, "learning_rate": 1.916818944153796e-05, "loss": 9.2251, "step": 72620 }, { "epoch": 0.36270568553521937, "grad_norm": 0.08963721245527267, "learning_rate": 1.9166687526596414e-05, "loss": 9.2175, "step": 72630 }, { "epoch": 0.3627556243601588, "grad_norm": 0.09087999910116196, "learning_rate": 1.916518561165486e-05, "loss": 9.2081, "step": 72640 }, { "epoch": 0.36280556318509827, "grad_norm": 0.09726609289646149, "learning_rate": 1.9163683696713308e-05, "loss": 9.2042, "step": 72650 }, { "epoch": 0.3628555020100377, "grad_norm": 0.09195300191640854, "learning_rate": 1.916218178177176e-05, "loss": 9.2071, "step": 72660 }, { "epoch": 0.36290544083497717, "grad_norm": 0.09186240285634995, "learning_rate": 1.9160679866830208e-05, "loss": 9.1994, "step": 72670 }, { "epoch": 0.3629553796599166, "grad_norm": 0.09509220719337463, "learning_rate": 1.9159177951888662e-05, "loss": 9.2143, "step": 72680 }, { "epoch": 0.36300531848485607, "grad_norm": 0.10255935788154602, "learning_rate": 1.915767603694711e-05, "loss": 9.2106, "step": 72690 }, { "epoch": 0.3630552573097955, "grad_norm": 0.09038600325584412, "learning_rate": 1.9156174122005555e-05, "loss": 9.2043, "step": 72700 }, { "epoch": 0.36310519613473496, "grad_norm": 0.09224878251552582, "learning_rate": 1.915467220706401e-05, "loss": 9.201, "step": 72710 }, { "epoch": 0.3631551349596744, "grad_norm": 0.0966305211186409, "learning_rate": 1.9153170292122456e-05, "loss": 9.2141, "step": 72720 }, { "epoch": 0.36320507378461386, "grad_norm": 0.0945696085691452, "learning_rate": 1.915166837718091e-05, "loss": 9.2121, "step": 72730 }, { "epoch": 0.3632550126095533, "grad_norm": 0.09432712197303772, "learning_rate": 1.9150166462239356e-05, "loss": 9.2173, "step": 72740 }, { "epoch": 0.36330495143449276, "grad_norm": 0.09079490602016449, "learning_rate": 1.9148664547297803e-05, "loss": 9.2089, "step": 72750 }, { "epoch": 0.3633548902594322, "grad_norm": 0.09984689205884933, "learning_rate": 1.9147162632356256e-05, "loss": 9.2111, "step": 72760 }, { "epoch": 0.36340482908437166, "grad_norm": 0.09602593630552292, "learning_rate": 1.9145660717414703e-05, "loss": 9.207, "step": 72770 }, { "epoch": 0.3634547679093111, "grad_norm": 0.09502044320106506, "learning_rate": 1.9144158802473157e-05, "loss": 9.2066, "step": 72780 }, { "epoch": 0.36350470673425056, "grad_norm": 0.0925392359495163, "learning_rate": 1.9142656887531603e-05, "loss": 9.2145, "step": 72790 }, { "epoch": 0.36355464555919, "grad_norm": 0.09208685904741287, "learning_rate": 1.914115497259005e-05, "loss": 9.229, "step": 72800 }, { "epoch": 0.36360458438412946, "grad_norm": 0.09518954902887344, "learning_rate": 1.9139653057648504e-05, "loss": 9.2098, "step": 72810 }, { "epoch": 0.3636545232090689, "grad_norm": 0.09336472302675247, "learning_rate": 1.913815114270695e-05, "loss": 9.2115, "step": 72820 }, { "epoch": 0.36370446203400836, "grad_norm": 0.09499195218086243, "learning_rate": 1.9136649227765404e-05, "loss": 9.2051, "step": 72830 }, { "epoch": 0.3637544008589478, "grad_norm": 0.09333008527755737, "learning_rate": 1.913514731282385e-05, "loss": 9.2088, "step": 72840 }, { "epoch": 0.36380433968388726, "grad_norm": 0.0993448942899704, "learning_rate": 1.9133645397882298e-05, "loss": 9.2097, "step": 72850 }, { "epoch": 0.3638542785088267, "grad_norm": 0.0906200036406517, "learning_rate": 1.913214348294075e-05, "loss": 9.2196, "step": 72860 }, { "epoch": 0.36390421733376616, "grad_norm": 0.09111246466636658, "learning_rate": 1.9130641567999198e-05, "loss": 9.21, "step": 72870 }, { "epoch": 0.3639541561587056, "grad_norm": 0.09346730262041092, "learning_rate": 1.9129139653057652e-05, "loss": 9.2001, "step": 72880 }, { "epoch": 0.36400409498364505, "grad_norm": 0.09085837006568909, "learning_rate": 1.91276377381161e-05, "loss": 9.2215, "step": 72890 }, { "epoch": 0.3640540338085845, "grad_norm": 0.09417754411697388, "learning_rate": 1.9126135823174545e-05, "loss": 9.2197, "step": 72900 }, { "epoch": 0.36410397263352395, "grad_norm": 0.09656990319490433, "learning_rate": 1.9124633908233e-05, "loss": 9.2044, "step": 72910 }, { "epoch": 0.3641539114584634, "grad_norm": 0.0960773453116417, "learning_rate": 1.9123131993291446e-05, "loss": 9.2099, "step": 72920 }, { "epoch": 0.36420385028340285, "grad_norm": 0.10151112079620361, "learning_rate": 1.91216300783499e-05, "loss": 9.2108, "step": 72930 }, { "epoch": 0.3642537891083423, "grad_norm": 0.09444685280323029, "learning_rate": 1.9120128163408346e-05, "loss": 9.2017, "step": 72940 }, { "epoch": 0.36430372793328175, "grad_norm": 0.09510553628206253, "learning_rate": 1.9118626248466796e-05, "loss": 9.2096, "step": 72950 }, { "epoch": 0.3643536667582212, "grad_norm": 0.09737194329500198, "learning_rate": 1.9117124333525246e-05, "loss": 9.2117, "step": 72960 }, { "epoch": 0.36440360558316065, "grad_norm": 0.09484457224607468, "learning_rate": 1.9115622418583693e-05, "loss": 9.1955, "step": 72970 }, { "epoch": 0.36445354440810007, "grad_norm": 0.09121875464916229, "learning_rate": 1.9114120503642147e-05, "loss": 9.2148, "step": 72980 }, { "epoch": 0.36450348323303955, "grad_norm": 0.09583168476819992, "learning_rate": 1.9112618588700594e-05, "loss": 9.2091, "step": 72990 }, { "epoch": 0.36455342205797897, "grad_norm": 0.09783457964658737, "learning_rate": 1.9111116673759044e-05, "loss": 9.1992, "step": 73000 }, { "epoch": 0.36460336088291845, "grad_norm": 0.09332898259162903, "learning_rate": 1.9109614758817494e-05, "loss": 9.2091, "step": 73010 }, { "epoch": 0.36465329970785787, "grad_norm": 0.09087220579385757, "learning_rate": 1.910811284387594e-05, "loss": 9.2208, "step": 73020 }, { "epoch": 0.36470323853279735, "grad_norm": 0.09480462223291397, "learning_rate": 1.9106610928934394e-05, "loss": 9.2173, "step": 73030 }, { "epoch": 0.36475317735773677, "grad_norm": 0.0977669283747673, "learning_rate": 1.910510901399284e-05, "loss": 9.2026, "step": 73040 }, { "epoch": 0.36480311618267625, "grad_norm": 0.08492804318666458, "learning_rate": 1.910360709905129e-05, "loss": 9.2201, "step": 73050 }, { "epoch": 0.36485305500761567, "grad_norm": 0.09246183931827545, "learning_rate": 1.910210518410974e-05, "loss": 9.2205, "step": 73060 }, { "epoch": 0.36490299383255514, "grad_norm": 0.08867323398590088, "learning_rate": 1.9100603269168188e-05, "loss": 9.2047, "step": 73070 }, { "epoch": 0.36495293265749457, "grad_norm": 0.09597288072109222, "learning_rate": 1.9099101354226642e-05, "loss": 9.2167, "step": 73080 }, { "epoch": 0.36500287148243404, "grad_norm": 0.0952095165848732, "learning_rate": 1.909759943928509e-05, "loss": 9.2064, "step": 73090 }, { "epoch": 0.36505281030737347, "grad_norm": 0.08946932852268219, "learning_rate": 1.909609752434354e-05, "loss": 9.2084, "step": 73100 }, { "epoch": 0.36510274913231294, "grad_norm": 0.09637124091386795, "learning_rate": 1.909459560940199e-05, "loss": 9.2031, "step": 73110 }, { "epoch": 0.36515268795725236, "grad_norm": 0.09121710807085037, "learning_rate": 1.9093093694460436e-05, "loss": 9.2069, "step": 73120 }, { "epoch": 0.36520262678219184, "grad_norm": 0.09637744724750519, "learning_rate": 1.909159177951889e-05, "loss": 9.2288, "step": 73130 }, { "epoch": 0.36525256560713126, "grad_norm": 0.09214238077402115, "learning_rate": 1.9090089864577336e-05, "loss": 9.2124, "step": 73140 }, { "epoch": 0.36530250443207074, "grad_norm": 0.08832499384880066, "learning_rate": 1.9088587949635786e-05, "loss": 9.2153, "step": 73150 }, { "epoch": 0.36535244325701016, "grad_norm": 0.09127902239561081, "learning_rate": 1.9087086034694236e-05, "loss": 9.2171, "step": 73160 }, { "epoch": 0.36540238208194964, "grad_norm": 0.09237400442361832, "learning_rate": 1.9085584119752683e-05, "loss": 9.2115, "step": 73170 }, { "epoch": 0.36545232090688906, "grad_norm": 0.09848447889089584, "learning_rate": 1.9084082204811137e-05, "loss": 9.2101, "step": 73180 }, { "epoch": 0.3655022597318285, "grad_norm": 0.09511636197566986, "learning_rate": 1.9082580289869584e-05, "loss": 9.2122, "step": 73190 }, { "epoch": 0.36555219855676796, "grad_norm": 0.09176871925592422, "learning_rate": 1.9081078374928034e-05, "loss": 9.2076, "step": 73200 }, { "epoch": 0.3656021373817074, "grad_norm": 0.0967327430844307, "learning_rate": 1.9079576459986484e-05, "loss": 9.213, "step": 73210 }, { "epoch": 0.36565207620664686, "grad_norm": 0.09490523487329483, "learning_rate": 1.907807454504493e-05, "loss": 9.2131, "step": 73220 }, { "epoch": 0.3657020150315863, "grad_norm": 0.09480072557926178, "learning_rate": 1.9076572630103384e-05, "loss": 9.2003, "step": 73230 }, { "epoch": 0.36575195385652576, "grad_norm": 0.08888121694326401, "learning_rate": 1.907507071516183e-05, "loss": 9.1996, "step": 73240 }, { "epoch": 0.3658018926814652, "grad_norm": 0.09302229434251785, "learning_rate": 1.907356880022028e-05, "loss": 9.2104, "step": 73250 }, { "epoch": 0.36585183150640466, "grad_norm": 0.09518302977085114, "learning_rate": 1.907206688527873e-05, "loss": 9.1888, "step": 73260 }, { "epoch": 0.3659017703313441, "grad_norm": 0.09398709237575531, "learning_rate": 1.907056497033718e-05, "loss": 9.2083, "step": 73270 }, { "epoch": 0.36595170915628356, "grad_norm": 0.09403207898139954, "learning_rate": 1.9069063055395632e-05, "loss": 9.2055, "step": 73280 }, { "epoch": 0.366001647981223, "grad_norm": 0.09166137129068375, "learning_rate": 1.906756114045408e-05, "loss": 9.2105, "step": 73290 }, { "epoch": 0.36605158680616245, "grad_norm": 0.08635640889406204, "learning_rate": 1.906605922551253e-05, "loss": 9.2103, "step": 73300 }, { "epoch": 0.3661015256311019, "grad_norm": 0.09352033585309982, "learning_rate": 1.906455731057098e-05, "loss": 9.2081, "step": 73310 }, { "epoch": 0.36615146445604135, "grad_norm": 0.09585576504468918, "learning_rate": 1.906305539562943e-05, "loss": 9.2063, "step": 73320 }, { "epoch": 0.3662014032809808, "grad_norm": 0.10186425596475601, "learning_rate": 1.906155348068788e-05, "loss": 9.2024, "step": 73330 }, { "epoch": 0.36625134210592025, "grad_norm": 0.08858046680688858, "learning_rate": 1.9060051565746326e-05, "loss": 9.2075, "step": 73340 }, { "epoch": 0.3663012809308597, "grad_norm": 0.09600647538900375, "learning_rate": 1.9058549650804776e-05, "loss": 9.1993, "step": 73350 }, { "epoch": 0.36635121975579915, "grad_norm": 0.093485526740551, "learning_rate": 1.9057047735863226e-05, "loss": 9.2022, "step": 73360 }, { "epoch": 0.3664011585807386, "grad_norm": 0.09988955408334732, "learning_rate": 1.9055545820921677e-05, "loss": 9.2141, "step": 73370 }, { "epoch": 0.36645109740567805, "grad_norm": 0.10079696774482727, "learning_rate": 1.9054043905980127e-05, "loss": 9.2063, "step": 73380 }, { "epoch": 0.36650103623061747, "grad_norm": 0.09347499907016754, "learning_rate": 1.9052541991038574e-05, "loss": 9.2044, "step": 73390 }, { "epoch": 0.36655097505555695, "grad_norm": 0.09521867334842682, "learning_rate": 1.9051040076097024e-05, "loss": 9.2019, "step": 73400 }, { "epoch": 0.36660091388049637, "grad_norm": 0.09266289323568344, "learning_rate": 1.9049538161155474e-05, "loss": 9.2043, "step": 73410 }, { "epoch": 0.36665085270543585, "grad_norm": 0.09217309206724167, "learning_rate": 1.9048036246213924e-05, "loss": 9.2132, "step": 73420 }, { "epoch": 0.36670079153037527, "grad_norm": 0.0919642373919487, "learning_rate": 1.9046534331272374e-05, "loss": 9.2002, "step": 73430 }, { "epoch": 0.36675073035531475, "grad_norm": 0.0903974175453186, "learning_rate": 1.904503241633082e-05, "loss": 9.2031, "step": 73440 }, { "epoch": 0.36680066918025417, "grad_norm": 0.09081780910491943, "learning_rate": 1.904353050138927e-05, "loss": 9.2017, "step": 73450 }, { "epoch": 0.36685060800519365, "grad_norm": 0.09484659135341644, "learning_rate": 1.904202858644772e-05, "loss": 9.2111, "step": 73460 }, { "epoch": 0.36690054683013307, "grad_norm": 0.09380007535219193, "learning_rate": 1.904052667150617e-05, "loss": 9.2005, "step": 73470 }, { "epoch": 0.36695048565507254, "grad_norm": 0.09407030791044235, "learning_rate": 1.9039024756564622e-05, "loss": 9.2089, "step": 73480 }, { "epoch": 0.36700042448001197, "grad_norm": 0.09412670135498047, "learning_rate": 1.903752284162307e-05, "loss": 9.2009, "step": 73490 }, { "epoch": 0.36705036330495144, "grad_norm": 0.09622817486524582, "learning_rate": 1.903602092668152e-05, "loss": 9.2022, "step": 73500 }, { "epoch": 0.36710030212989087, "grad_norm": 0.09574905782938004, "learning_rate": 1.903451901173997e-05, "loss": 9.2067, "step": 73510 }, { "epoch": 0.36715024095483034, "grad_norm": 0.09512118250131607, "learning_rate": 1.903301709679842e-05, "loss": 9.2085, "step": 73520 }, { "epoch": 0.36720017977976976, "grad_norm": 0.08740708976984024, "learning_rate": 1.903151518185687e-05, "loss": 9.2168, "step": 73530 }, { "epoch": 0.36725011860470924, "grad_norm": 0.091730996966362, "learning_rate": 1.9030013266915316e-05, "loss": 9.2058, "step": 73540 }, { "epoch": 0.36730005742964866, "grad_norm": 0.09345594793558121, "learning_rate": 1.9028511351973766e-05, "loss": 9.1933, "step": 73550 }, { "epoch": 0.36734999625458814, "grad_norm": 0.1008530855178833, "learning_rate": 1.9027009437032216e-05, "loss": 9.195, "step": 73560 }, { "epoch": 0.36739993507952756, "grad_norm": 0.10389885306358337, "learning_rate": 1.9025507522090667e-05, "loss": 9.2012, "step": 73570 }, { "epoch": 0.36744987390446704, "grad_norm": 0.09878107160329819, "learning_rate": 1.9024005607149117e-05, "loss": 9.2016, "step": 73580 }, { "epoch": 0.36749981272940646, "grad_norm": 0.09092985093593597, "learning_rate": 1.9022503692207564e-05, "loss": 9.2007, "step": 73590 }, { "epoch": 0.36754975155434594, "grad_norm": 0.09142585843801498, "learning_rate": 1.9021001777266014e-05, "loss": 9.2079, "step": 73600 }, { "epoch": 0.36759969037928536, "grad_norm": 0.09261012822389603, "learning_rate": 1.9019499862324464e-05, "loss": 9.2041, "step": 73610 }, { "epoch": 0.36764962920422484, "grad_norm": 0.09699272364377975, "learning_rate": 1.9017997947382914e-05, "loss": 9.2008, "step": 73620 }, { "epoch": 0.36769956802916426, "grad_norm": 0.0889492779970169, "learning_rate": 1.9016496032441364e-05, "loss": 9.1979, "step": 73630 }, { "epoch": 0.36774950685410374, "grad_norm": 0.09963024407625198, "learning_rate": 1.9014994117499814e-05, "loss": 9.2071, "step": 73640 }, { "epoch": 0.36779944567904316, "grad_norm": 0.08926968276500702, "learning_rate": 1.901349220255826e-05, "loss": 9.2077, "step": 73650 }, { "epoch": 0.36784938450398263, "grad_norm": 0.10151121020317078, "learning_rate": 1.901199028761671e-05, "loss": 9.1939, "step": 73660 }, { "epoch": 0.36789932332892206, "grad_norm": 0.09443957358598709, "learning_rate": 1.901048837267516e-05, "loss": 9.2078, "step": 73670 }, { "epoch": 0.36794926215386153, "grad_norm": 0.09295155853033066, "learning_rate": 1.9008986457733612e-05, "loss": 9.2032, "step": 73680 }, { "epoch": 0.36799920097880096, "grad_norm": 0.09365091472864151, "learning_rate": 1.9007484542792062e-05, "loss": 9.1929, "step": 73690 }, { "epoch": 0.36804913980374043, "grad_norm": 0.09538646787405014, "learning_rate": 1.900598262785051e-05, "loss": 9.1853, "step": 73700 }, { "epoch": 0.36809907862867985, "grad_norm": 0.09404728561639786, "learning_rate": 1.900448071290896e-05, "loss": 9.2012, "step": 73710 }, { "epoch": 0.36814901745361933, "grad_norm": 0.09444870799779892, "learning_rate": 1.900297879796741e-05, "loss": 9.1991, "step": 73720 }, { "epoch": 0.36819895627855875, "grad_norm": 0.09309567511081696, "learning_rate": 1.900147688302586e-05, "loss": 9.1996, "step": 73730 }, { "epoch": 0.36824889510349823, "grad_norm": 0.09412892907857895, "learning_rate": 1.899997496808431e-05, "loss": 9.1956, "step": 73740 }, { "epoch": 0.36829883392843765, "grad_norm": 0.09056131541728973, "learning_rate": 1.8998473053142756e-05, "loss": 9.2004, "step": 73750 }, { "epoch": 0.36834877275337713, "grad_norm": 0.09614895284175873, "learning_rate": 1.8996971138201206e-05, "loss": 9.2077, "step": 73760 }, { "epoch": 0.36839871157831655, "grad_norm": 0.09568426012992859, "learning_rate": 1.8995469223259657e-05, "loss": 9.196, "step": 73770 }, { "epoch": 0.36844865040325603, "grad_norm": 0.09588686376810074, "learning_rate": 1.8993967308318107e-05, "loss": 9.1894, "step": 73780 }, { "epoch": 0.36849858922819545, "grad_norm": 0.09887325763702393, "learning_rate": 1.8992465393376557e-05, "loss": 9.1864, "step": 73790 }, { "epoch": 0.3685485280531349, "grad_norm": 0.09332797676324844, "learning_rate": 1.8990963478435004e-05, "loss": 9.1968, "step": 73800 }, { "epoch": 0.36859846687807435, "grad_norm": 0.10205885022878647, "learning_rate": 1.8989461563493454e-05, "loss": 9.1965, "step": 73810 }, { "epoch": 0.3686484057030138, "grad_norm": 0.09981803596019745, "learning_rate": 1.8987959648551904e-05, "loss": 9.195, "step": 73820 }, { "epoch": 0.36869834452795325, "grad_norm": 0.09201083332300186, "learning_rate": 1.8986457733610354e-05, "loss": 9.2003, "step": 73830 }, { "epoch": 0.3687482833528927, "grad_norm": 0.09341354668140411, "learning_rate": 1.8984955818668804e-05, "loss": 9.2033, "step": 73840 }, { "epoch": 0.36879822217783215, "grad_norm": 0.0915553867816925, "learning_rate": 1.898345390372725e-05, "loss": 9.2004, "step": 73850 }, { "epoch": 0.3688481610027716, "grad_norm": 0.09335311502218246, "learning_rate": 1.89819519887857e-05, "loss": 9.2035, "step": 73860 }, { "epoch": 0.36889809982771105, "grad_norm": 0.10016678273677826, "learning_rate": 1.898045007384415e-05, "loss": 9.196, "step": 73870 }, { "epoch": 0.3689480386526505, "grad_norm": 0.09652851521968842, "learning_rate": 1.8978948158902602e-05, "loss": 9.202, "step": 73880 }, { "epoch": 0.36899797747758994, "grad_norm": 0.09326925128698349, "learning_rate": 1.8977446243961052e-05, "loss": 9.1939, "step": 73890 }, { "epoch": 0.3690479163025294, "grad_norm": 0.09441187977790833, "learning_rate": 1.89759443290195e-05, "loss": 9.1978, "step": 73900 }, { "epoch": 0.36909785512746884, "grad_norm": 0.09244664758443832, "learning_rate": 1.897444241407795e-05, "loss": 9.1851, "step": 73910 }, { "epoch": 0.3691477939524083, "grad_norm": 0.09576987475156784, "learning_rate": 1.89729404991364e-05, "loss": 9.1974, "step": 73920 }, { "epoch": 0.36919773277734774, "grad_norm": 0.10163576900959015, "learning_rate": 1.897143858419485e-05, "loss": 9.1942, "step": 73930 }, { "epoch": 0.3692476716022872, "grad_norm": 0.09293472766876221, "learning_rate": 1.89699366692533e-05, "loss": 9.2004, "step": 73940 }, { "epoch": 0.36929761042722664, "grad_norm": 0.09498841315507889, "learning_rate": 1.8968434754311746e-05, "loss": 9.2015, "step": 73950 }, { "epoch": 0.3693475492521661, "grad_norm": 0.0949832871556282, "learning_rate": 1.89669328393702e-05, "loss": 9.1872, "step": 73960 }, { "epoch": 0.36939748807710554, "grad_norm": 0.09256528317928314, "learning_rate": 1.8965430924428647e-05, "loss": 9.1972, "step": 73970 }, { "epoch": 0.369447426902045, "grad_norm": 0.0952831581234932, "learning_rate": 1.8963929009487097e-05, "loss": 9.1852, "step": 73980 }, { "epoch": 0.36949736572698444, "grad_norm": 0.10047879070043564, "learning_rate": 1.8962427094545547e-05, "loss": 9.2014, "step": 73990 }, { "epoch": 0.3695473045519239, "grad_norm": 0.09470636397600174, "learning_rate": 1.8960925179603994e-05, "loss": 9.1897, "step": 74000 }, { "epoch": 0.36959724337686334, "grad_norm": 0.08998063206672668, "learning_rate": 1.8959423264662447e-05, "loss": 9.2008, "step": 74010 }, { "epoch": 0.3696471822018028, "grad_norm": 0.09682891517877579, "learning_rate": 1.8957921349720894e-05, "loss": 9.1968, "step": 74020 }, { "epoch": 0.36969712102674224, "grad_norm": 0.093925341963768, "learning_rate": 1.8956419434779344e-05, "loss": 9.2174, "step": 74030 }, { "epoch": 0.3697470598516817, "grad_norm": 0.09512005746364594, "learning_rate": 1.8954917519837794e-05, "loss": 9.194, "step": 74040 }, { "epoch": 0.36979699867662114, "grad_norm": 0.09474792331457138, "learning_rate": 1.895341560489624e-05, "loss": 9.1955, "step": 74050 }, { "epoch": 0.3698469375015606, "grad_norm": 0.08794379979372025, "learning_rate": 1.8951913689954695e-05, "loss": 9.1912, "step": 74060 }, { "epoch": 0.36989687632650003, "grad_norm": 0.09021158516407013, "learning_rate": 1.895041177501314e-05, "loss": 9.1967, "step": 74070 }, { "epoch": 0.3699468151514395, "grad_norm": 0.09125207364559174, "learning_rate": 1.8948909860071592e-05, "loss": 9.1879, "step": 74080 }, { "epoch": 0.36999675397637893, "grad_norm": 0.09770797193050385, "learning_rate": 1.8947407945130042e-05, "loss": 9.1984, "step": 74090 }, { "epoch": 0.3700466928013184, "grad_norm": 0.09228277951478958, "learning_rate": 1.894590603018849e-05, "loss": 9.1979, "step": 74100 }, { "epoch": 0.37009663162625783, "grad_norm": 0.09251270443201065, "learning_rate": 1.8944404115246942e-05, "loss": 9.199, "step": 74110 }, { "epoch": 0.3701465704511973, "grad_norm": 0.09587238729000092, "learning_rate": 1.894290220030539e-05, "loss": 9.1933, "step": 74120 }, { "epoch": 0.37019650927613673, "grad_norm": 0.08899569511413574, "learning_rate": 1.894140028536384e-05, "loss": 9.1968, "step": 74130 }, { "epoch": 0.3702464481010762, "grad_norm": 0.09335777163505554, "learning_rate": 1.893989837042229e-05, "loss": 9.1918, "step": 74140 }, { "epoch": 0.37029638692601563, "grad_norm": 0.09356404095888138, "learning_rate": 1.8938396455480736e-05, "loss": 9.1845, "step": 74150 }, { "epoch": 0.3703463257509551, "grad_norm": 0.09852045774459839, "learning_rate": 1.893689454053919e-05, "loss": 9.1924, "step": 74160 }, { "epoch": 0.37039626457589453, "grad_norm": 0.09660044312477112, "learning_rate": 1.8935392625597637e-05, "loss": 9.2063, "step": 74170 }, { "epoch": 0.37044620340083395, "grad_norm": 0.09085432440042496, "learning_rate": 1.8933890710656087e-05, "loss": 9.1791, "step": 74180 }, { "epoch": 0.37049614222577343, "grad_norm": 0.09493508189916611, "learning_rate": 1.8932388795714537e-05, "loss": 9.1994, "step": 74190 }, { "epoch": 0.37054608105071285, "grad_norm": 0.09557243436574936, "learning_rate": 1.8930886880772984e-05, "loss": 9.1895, "step": 74200 }, { "epoch": 0.3705960198756523, "grad_norm": 0.09346624463796616, "learning_rate": 1.8929384965831437e-05, "loss": 9.1871, "step": 74210 }, { "epoch": 0.37064595870059175, "grad_norm": 0.09579519182443619, "learning_rate": 1.8927883050889884e-05, "loss": 9.1834, "step": 74220 }, { "epoch": 0.3706958975255312, "grad_norm": 0.10192973166704178, "learning_rate": 1.8926381135948334e-05, "loss": 9.1982, "step": 74230 }, { "epoch": 0.37074583635047065, "grad_norm": 0.09080871194601059, "learning_rate": 1.8924879221006784e-05, "loss": 9.2006, "step": 74240 }, { "epoch": 0.3707957751754101, "grad_norm": 0.09512729942798615, "learning_rate": 1.892337730606523e-05, "loss": 9.191, "step": 74250 }, { "epoch": 0.37084571400034955, "grad_norm": 0.09607906639575958, "learning_rate": 1.8921875391123685e-05, "loss": 9.1898, "step": 74260 }, { "epoch": 0.370895652825289, "grad_norm": 0.09581431746482849, "learning_rate": 1.892037347618213e-05, "loss": 9.1822, "step": 74270 }, { "epoch": 0.37094559165022845, "grad_norm": 0.09435351938009262, "learning_rate": 1.8918871561240585e-05, "loss": 9.1853, "step": 74280 }, { "epoch": 0.3709955304751679, "grad_norm": 0.09405918419361115, "learning_rate": 1.8917369646299032e-05, "loss": 9.1877, "step": 74290 }, { "epoch": 0.37104546930010734, "grad_norm": 0.09423495829105377, "learning_rate": 1.891586773135748e-05, "loss": 9.2003, "step": 74300 }, { "epoch": 0.3710954081250468, "grad_norm": 0.0974857285618782, "learning_rate": 1.8914365816415932e-05, "loss": 9.1888, "step": 74310 }, { "epoch": 0.37114534694998624, "grad_norm": 0.09585097432136536, "learning_rate": 1.891286390147438e-05, "loss": 9.2001, "step": 74320 }, { "epoch": 0.3711952857749257, "grad_norm": 0.09716727584600449, "learning_rate": 1.8911361986532833e-05, "loss": 9.1998, "step": 74330 }, { "epoch": 0.37124522459986514, "grad_norm": 0.09428082406520844, "learning_rate": 1.890986007159128e-05, "loss": 9.1946, "step": 74340 }, { "epoch": 0.3712951634248046, "grad_norm": 0.09271190315485, "learning_rate": 1.8908358156649726e-05, "loss": 9.1989, "step": 74350 }, { "epoch": 0.37134510224974404, "grad_norm": 0.09324190765619278, "learning_rate": 1.890685624170818e-05, "loss": 9.1995, "step": 74360 }, { "epoch": 0.3713950410746835, "grad_norm": 0.09561111032962799, "learning_rate": 1.8905354326766627e-05, "loss": 9.201, "step": 74370 }, { "epoch": 0.37144497989962294, "grad_norm": 0.09302332252264023, "learning_rate": 1.890385241182508e-05, "loss": 9.1906, "step": 74380 }, { "epoch": 0.3714949187245624, "grad_norm": 0.09225195646286011, "learning_rate": 1.8902350496883527e-05, "loss": 9.1941, "step": 74390 }, { "epoch": 0.37154485754950184, "grad_norm": 0.09414453059434891, "learning_rate": 1.8900848581941974e-05, "loss": 9.1922, "step": 74400 }, { "epoch": 0.3715947963744413, "grad_norm": 0.1040930524468422, "learning_rate": 1.8899346667000427e-05, "loss": 9.1835, "step": 74410 }, { "epoch": 0.37164473519938074, "grad_norm": 0.10101134330034256, "learning_rate": 1.8897844752058874e-05, "loss": 9.1919, "step": 74420 }, { "epoch": 0.3716946740243202, "grad_norm": 0.09623407572507858, "learning_rate": 1.8896342837117328e-05, "loss": 9.1749, "step": 74430 }, { "epoch": 0.37174461284925964, "grad_norm": 0.0893046110868454, "learning_rate": 1.8894840922175775e-05, "loss": 9.191, "step": 74440 }, { "epoch": 0.3717945516741991, "grad_norm": 0.09399503469467163, "learning_rate": 1.889333900723422e-05, "loss": 9.1985, "step": 74450 }, { "epoch": 0.37184449049913854, "grad_norm": 0.09701071679592133, "learning_rate": 1.8891837092292675e-05, "loss": 9.1864, "step": 74460 }, { "epoch": 0.371894429324078, "grad_norm": 0.09602676331996918, "learning_rate": 1.889033517735112e-05, "loss": 9.1936, "step": 74470 }, { "epoch": 0.37194436814901743, "grad_norm": 0.09337245672941208, "learning_rate": 1.8888833262409575e-05, "loss": 9.1728, "step": 74480 }, { "epoch": 0.3719943069739569, "grad_norm": 0.09136126190423965, "learning_rate": 1.8887331347468022e-05, "loss": 9.1816, "step": 74490 }, { "epoch": 0.37204424579889633, "grad_norm": 0.09118318557739258, "learning_rate": 1.888582943252647e-05, "loss": 9.1867, "step": 74500 }, { "epoch": 0.3720941846238358, "grad_norm": 0.09339746087789536, "learning_rate": 1.8884327517584922e-05, "loss": 9.1911, "step": 74510 }, { "epoch": 0.37214412344877523, "grad_norm": 0.09358755499124527, "learning_rate": 1.888282560264337e-05, "loss": 9.1926, "step": 74520 }, { "epoch": 0.3721940622737147, "grad_norm": 0.09610449522733688, "learning_rate": 1.8881323687701823e-05, "loss": 9.188, "step": 74530 }, { "epoch": 0.37224400109865413, "grad_norm": 0.09586481004953384, "learning_rate": 1.887982177276027e-05, "loss": 9.1889, "step": 74540 }, { "epoch": 0.3722939399235936, "grad_norm": 0.0982070192694664, "learning_rate": 1.8878319857818716e-05, "loss": 9.1799, "step": 74550 }, { "epoch": 0.37234387874853303, "grad_norm": 0.09355320036411285, "learning_rate": 1.887681794287717e-05, "loss": 9.191, "step": 74560 }, { "epoch": 0.3723938175734725, "grad_norm": 0.09481959044933319, "learning_rate": 1.8875316027935617e-05, "loss": 9.186, "step": 74570 }, { "epoch": 0.37244375639841193, "grad_norm": 0.09273957461118698, "learning_rate": 1.887381411299407e-05, "loss": 9.1892, "step": 74580 }, { "epoch": 0.3724936952233514, "grad_norm": 0.09700888395309448, "learning_rate": 1.8872312198052517e-05, "loss": 9.1935, "step": 74590 }, { "epoch": 0.37254363404829083, "grad_norm": 0.09876874834299088, "learning_rate": 1.8870810283110967e-05, "loss": 9.1894, "step": 74600 }, { "epoch": 0.3725935728732303, "grad_norm": 0.09617974609136581, "learning_rate": 1.8869308368169417e-05, "loss": 9.1823, "step": 74610 }, { "epoch": 0.3726435116981697, "grad_norm": 0.09874873608350754, "learning_rate": 1.8867806453227864e-05, "loss": 9.198, "step": 74620 }, { "epoch": 0.3726934505231092, "grad_norm": 0.0900963768362999, "learning_rate": 1.8866304538286318e-05, "loss": 9.1833, "step": 74630 }, { "epoch": 0.3727433893480486, "grad_norm": 0.09320524334907532, "learning_rate": 1.8864802623344765e-05, "loss": 9.1847, "step": 74640 }, { "epoch": 0.3727933281729881, "grad_norm": 0.09384310245513916, "learning_rate": 1.8863300708403215e-05, "loss": 9.182, "step": 74650 }, { "epoch": 0.3728432669979275, "grad_norm": 0.09499943256378174, "learning_rate": 1.8861798793461665e-05, "loss": 9.1918, "step": 74660 }, { "epoch": 0.372893205822867, "grad_norm": 0.09199872612953186, "learning_rate": 1.886029687852011e-05, "loss": 9.181, "step": 74670 }, { "epoch": 0.3729431446478064, "grad_norm": 0.0918089747428894, "learning_rate": 1.8858794963578565e-05, "loss": 9.1903, "step": 74680 }, { "epoch": 0.3729930834727459, "grad_norm": 0.09253137558698654, "learning_rate": 1.8857293048637012e-05, "loss": 9.1935, "step": 74690 }, { "epoch": 0.3730430222976853, "grad_norm": 0.09675570577383041, "learning_rate": 1.8855791133695462e-05, "loss": 9.1777, "step": 74700 }, { "epoch": 0.3730929611226248, "grad_norm": 0.09218452125787735, "learning_rate": 1.8854289218753912e-05, "loss": 9.1843, "step": 74710 }, { "epoch": 0.3731428999475642, "grad_norm": 0.09730592370033264, "learning_rate": 1.885278730381236e-05, "loss": 9.1803, "step": 74720 }, { "epoch": 0.3731928387725037, "grad_norm": 0.09571095556020737, "learning_rate": 1.8851285388870813e-05, "loss": 9.1813, "step": 74730 }, { "epoch": 0.3732427775974431, "grad_norm": 0.09244304150342941, "learning_rate": 1.884978347392926e-05, "loss": 9.1753, "step": 74740 }, { "epoch": 0.3732927164223826, "grad_norm": 0.08950365334749222, "learning_rate": 1.884828155898771e-05, "loss": 9.1876, "step": 74750 }, { "epoch": 0.373342655247322, "grad_norm": 0.0952899232506752, "learning_rate": 1.884677964404616e-05, "loss": 9.1857, "step": 74760 }, { "epoch": 0.3733925940722615, "grad_norm": 0.0960029885172844, "learning_rate": 1.8845277729104607e-05, "loss": 9.1913, "step": 74770 }, { "epoch": 0.3734425328972009, "grad_norm": 0.08860934525728226, "learning_rate": 1.884377581416306e-05, "loss": 9.1803, "step": 74780 }, { "epoch": 0.3734924717221404, "grad_norm": 0.09400645643472672, "learning_rate": 1.8842273899221507e-05, "loss": 9.1803, "step": 74790 }, { "epoch": 0.3735424105470798, "grad_norm": 0.09583377838134766, "learning_rate": 1.8840771984279957e-05, "loss": 9.1885, "step": 74800 }, { "epoch": 0.3735923493720193, "grad_norm": 0.09530255943536758, "learning_rate": 1.8839270069338407e-05, "loss": 9.1873, "step": 74810 }, { "epoch": 0.3736422881969587, "grad_norm": 0.09696003049612045, "learning_rate": 1.8837768154396854e-05, "loss": 9.1831, "step": 74820 }, { "epoch": 0.3736922270218982, "grad_norm": 0.09576781839132309, "learning_rate": 1.8836266239455308e-05, "loss": 9.1916, "step": 74830 }, { "epoch": 0.3737421658468376, "grad_norm": 0.08903126418590546, "learning_rate": 1.8834764324513755e-05, "loss": 9.1917, "step": 74840 }, { "epoch": 0.3737921046717771, "grad_norm": 0.09006597101688385, "learning_rate": 1.8833262409572205e-05, "loss": 9.174, "step": 74850 }, { "epoch": 0.3738420434967165, "grad_norm": 0.09339811652898788, "learning_rate": 1.8831760494630655e-05, "loss": 9.1918, "step": 74860 }, { "epoch": 0.373891982321656, "grad_norm": 0.09909702837467194, "learning_rate": 1.88302585796891e-05, "loss": 9.1877, "step": 74870 }, { "epoch": 0.3739419211465954, "grad_norm": 0.09441126883029938, "learning_rate": 1.8828756664747555e-05, "loss": 9.1836, "step": 74880 }, { "epoch": 0.3739918599715349, "grad_norm": 0.09800399839878082, "learning_rate": 1.8827254749806002e-05, "loss": 9.1844, "step": 74890 }, { "epoch": 0.3740417987964743, "grad_norm": 0.10018418729305267, "learning_rate": 1.8825752834864452e-05, "loss": 9.1881, "step": 74900 }, { "epoch": 0.3740917376214138, "grad_norm": 0.0955013558268547, "learning_rate": 1.8824250919922902e-05, "loss": 9.1919, "step": 74910 }, { "epoch": 0.3741416764463532, "grad_norm": 0.09473446756601334, "learning_rate": 1.8822749004981353e-05, "loss": 9.191, "step": 74920 }, { "epoch": 0.3741916152712927, "grad_norm": 0.09358023852109909, "learning_rate": 1.8821247090039803e-05, "loss": 9.1891, "step": 74930 }, { "epoch": 0.3742415540962321, "grad_norm": 0.09420783072710037, "learning_rate": 1.881974517509825e-05, "loss": 9.1874, "step": 74940 }, { "epoch": 0.3742914929211716, "grad_norm": 0.09367396682500839, "learning_rate": 1.88182432601567e-05, "loss": 9.1976, "step": 74950 }, { "epoch": 0.374341431746111, "grad_norm": 0.09339455515146255, "learning_rate": 1.881674134521515e-05, "loss": 9.1843, "step": 74960 }, { "epoch": 0.3743913705710505, "grad_norm": 0.09701774269342422, "learning_rate": 1.88152394302736e-05, "loss": 9.1973, "step": 74970 }, { "epoch": 0.3744413093959899, "grad_norm": 0.09740804880857468, "learning_rate": 1.881373751533205e-05, "loss": 9.1823, "step": 74980 }, { "epoch": 0.3744912482209294, "grad_norm": 0.09477312117815018, "learning_rate": 1.8812235600390497e-05, "loss": 9.1887, "step": 74990 }, { "epoch": 0.3745411870458688, "grad_norm": 0.09397511929273605, "learning_rate": 1.8810733685448947e-05, "loss": 9.178, "step": 75000 }, { "epoch": 0.3745911258708083, "grad_norm": 0.093665711581707, "learning_rate": 1.8809231770507397e-05, "loss": 9.1876, "step": 75010 }, { "epoch": 0.3746410646957477, "grad_norm": 0.09372160583734512, "learning_rate": 1.8807729855565848e-05, "loss": 9.1882, "step": 75020 }, { "epoch": 0.3746910035206872, "grad_norm": 0.09285315126180649, "learning_rate": 1.8806227940624298e-05, "loss": 9.1869, "step": 75030 }, { "epoch": 0.3747409423456266, "grad_norm": 0.10530349612236023, "learning_rate": 1.8804726025682745e-05, "loss": 9.1785, "step": 75040 }, { "epoch": 0.3747908811705661, "grad_norm": 0.09486869722604752, "learning_rate": 1.8803224110741195e-05, "loss": 9.1772, "step": 75050 }, { "epoch": 0.3748408199955055, "grad_norm": 0.09095942974090576, "learning_rate": 1.8801722195799645e-05, "loss": 9.1886, "step": 75060 }, { "epoch": 0.374890758820445, "grad_norm": 0.09552841633558273, "learning_rate": 1.8800220280858095e-05, "loss": 9.1746, "step": 75070 }, { "epoch": 0.3749406976453844, "grad_norm": 0.09439145773649216, "learning_rate": 1.8798718365916545e-05, "loss": 9.1689, "step": 75080 }, { "epoch": 0.3749906364703239, "grad_norm": 0.0905107706785202, "learning_rate": 1.8797216450974992e-05, "loss": 9.1867, "step": 75090 }, { "epoch": 0.3750405752952633, "grad_norm": 0.0866839811205864, "learning_rate": 1.8795714536033442e-05, "loss": 9.1908, "step": 75100 }, { "epoch": 0.3750905141202028, "grad_norm": 0.09543773531913757, "learning_rate": 1.8794212621091892e-05, "loss": 9.1941, "step": 75110 }, { "epoch": 0.3751404529451422, "grad_norm": 0.09861119091510773, "learning_rate": 1.8792710706150343e-05, "loss": 9.186, "step": 75120 }, { "epoch": 0.3751903917700817, "grad_norm": 0.09284248948097229, "learning_rate": 1.8791208791208793e-05, "loss": 9.1845, "step": 75130 }, { "epoch": 0.3752403305950211, "grad_norm": 0.0985037088394165, "learning_rate": 1.878970687626724e-05, "loss": 9.172, "step": 75140 }, { "epoch": 0.3752902694199606, "grad_norm": 0.09096460044384003, "learning_rate": 1.878820496132569e-05, "loss": 9.1894, "step": 75150 }, { "epoch": 0.3753402082449, "grad_norm": 0.09545445442199707, "learning_rate": 1.878670304638414e-05, "loss": 9.1824, "step": 75160 }, { "epoch": 0.3753901470698394, "grad_norm": 0.08448511362075806, "learning_rate": 1.878520113144259e-05, "loss": 9.1783, "step": 75170 }, { "epoch": 0.3754400858947789, "grad_norm": 0.09297330677509308, "learning_rate": 1.878369921650104e-05, "loss": 9.1779, "step": 75180 }, { "epoch": 0.3754900247197183, "grad_norm": 0.09360940754413605, "learning_rate": 1.8782197301559487e-05, "loss": 9.1796, "step": 75190 }, { "epoch": 0.3755399635446578, "grad_norm": 0.09942979365587234, "learning_rate": 1.8780695386617937e-05, "loss": 9.1824, "step": 75200 }, { "epoch": 0.3755899023695972, "grad_norm": 0.08949796110391617, "learning_rate": 1.8779193471676387e-05, "loss": 9.1709, "step": 75210 }, { "epoch": 0.3756398411945367, "grad_norm": 0.09671638906002045, "learning_rate": 1.8777691556734838e-05, "loss": 9.1738, "step": 75220 }, { "epoch": 0.3756897800194761, "grad_norm": 0.09702275693416595, "learning_rate": 1.8776189641793288e-05, "loss": 9.19, "step": 75230 }, { "epoch": 0.3757397188444156, "grad_norm": 0.09984516352415085, "learning_rate": 1.8774687726851738e-05, "loss": 9.1752, "step": 75240 }, { "epoch": 0.375789657669355, "grad_norm": 0.08986981213092804, "learning_rate": 1.8773185811910185e-05, "loss": 9.1787, "step": 75250 }, { "epoch": 0.3758395964942945, "grad_norm": 0.09705835580825806, "learning_rate": 1.8771683896968635e-05, "loss": 9.1834, "step": 75260 }, { "epoch": 0.3758895353192339, "grad_norm": 0.09416690468788147, "learning_rate": 1.8770181982027085e-05, "loss": 9.1762, "step": 75270 }, { "epoch": 0.3759394741441734, "grad_norm": 0.09269827604293823, "learning_rate": 1.8768680067085535e-05, "loss": 9.173, "step": 75280 }, { "epoch": 0.3759894129691128, "grad_norm": 0.08903074264526367, "learning_rate": 1.8767178152143985e-05, "loss": 9.183, "step": 75290 }, { "epoch": 0.3760393517940523, "grad_norm": 0.09470517188310623, "learning_rate": 1.8765676237202432e-05, "loss": 9.1779, "step": 75300 }, { "epoch": 0.3760892906189917, "grad_norm": 0.09774582087993622, "learning_rate": 1.8764174322260882e-05, "loss": 9.1639, "step": 75310 }, { "epoch": 0.3761392294439312, "grad_norm": 0.0924445167183876, "learning_rate": 1.8762672407319333e-05, "loss": 9.1775, "step": 75320 }, { "epoch": 0.3761891682688706, "grad_norm": 0.09741353988647461, "learning_rate": 1.8761170492377783e-05, "loss": 9.1726, "step": 75330 }, { "epoch": 0.3762391070938101, "grad_norm": 0.0985332578420639, "learning_rate": 1.8759668577436233e-05, "loss": 9.1825, "step": 75340 }, { "epoch": 0.3762890459187495, "grad_norm": 0.09728459268808365, "learning_rate": 1.875816666249468e-05, "loss": 9.1818, "step": 75350 }, { "epoch": 0.376338984743689, "grad_norm": 0.09466975927352905, "learning_rate": 1.875666474755313e-05, "loss": 9.1808, "step": 75360 }, { "epoch": 0.3763889235686284, "grad_norm": 0.09836606681346893, "learning_rate": 1.875516283261158e-05, "loss": 9.1755, "step": 75370 }, { "epoch": 0.3764388623935679, "grad_norm": 0.09210119396448135, "learning_rate": 1.875366091767003e-05, "loss": 9.1742, "step": 75380 }, { "epoch": 0.3764888012185073, "grad_norm": 0.09598752856254578, "learning_rate": 1.875215900272848e-05, "loss": 9.1785, "step": 75390 }, { "epoch": 0.3765387400434468, "grad_norm": 0.09594704955816269, "learning_rate": 1.8750657087786927e-05, "loss": 9.17, "step": 75400 }, { "epoch": 0.3765886788683862, "grad_norm": 0.09366414695978165, "learning_rate": 1.8749155172845377e-05, "loss": 9.1825, "step": 75410 }, { "epoch": 0.3766386176933257, "grad_norm": 0.09399878233671188, "learning_rate": 1.8747653257903828e-05, "loss": 9.1663, "step": 75420 }, { "epoch": 0.3766885565182651, "grad_norm": 0.09145677834749222, "learning_rate": 1.8746151342962278e-05, "loss": 9.1777, "step": 75430 }, { "epoch": 0.3767384953432046, "grad_norm": 0.0970328152179718, "learning_rate": 1.8744649428020728e-05, "loss": 9.1692, "step": 75440 }, { "epoch": 0.376788434168144, "grad_norm": 0.09038615226745605, "learning_rate": 1.8743147513079175e-05, "loss": 9.1891, "step": 75450 }, { "epoch": 0.3768383729930835, "grad_norm": 0.09302138537168503, "learning_rate": 1.8741645598137625e-05, "loss": 9.1833, "step": 75460 }, { "epoch": 0.3768883118180229, "grad_norm": 0.09453801065683365, "learning_rate": 1.8740143683196075e-05, "loss": 9.177, "step": 75470 }, { "epoch": 0.3769382506429624, "grad_norm": 0.09908052533864975, "learning_rate": 1.8738641768254525e-05, "loss": 9.182, "step": 75480 }, { "epoch": 0.3769881894679018, "grad_norm": 0.08896263688802719, "learning_rate": 1.8737139853312975e-05, "loss": 9.1729, "step": 75490 }, { "epoch": 0.3770381282928413, "grad_norm": 0.09187068045139313, "learning_rate": 1.8735637938371422e-05, "loss": 9.1645, "step": 75500 }, { "epoch": 0.3770880671177807, "grad_norm": 0.09145993739366531, "learning_rate": 1.8734136023429872e-05, "loss": 9.1855, "step": 75510 }, { "epoch": 0.3771380059427202, "grad_norm": 0.09123537689447403, "learning_rate": 1.8732634108488323e-05, "loss": 9.1817, "step": 75520 }, { "epoch": 0.3771879447676596, "grad_norm": 0.09334246069192886, "learning_rate": 1.8731132193546773e-05, "loss": 9.1796, "step": 75530 }, { "epoch": 0.3772378835925991, "grad_norm": 0.09891509264707565, "learning_rate": 1.8729630278605223e-05, "loss": 9.1878, "step": 75540 }, { "epoch": 0.3772878224175385, "grad_norm": 0.0893426239490509, "learning_rate": 1.872812836366367e-05, "loss": 9.1732, "step": 75550 }, { "epoch": 0.377337761242478, "grad_norm": 0.09921654313802719, "learning_rate": 1.8726626448722123e-05, "loss": 9.1717, "step": 75560 }, { "epoch": 0.3773877000674174, "grad_norm": 0.0956064984202385, "learning_rate": 1.872512453378057e-05, "loss": 9.1776, "step": 75570 }, { "epoch": 0.3774376388923569, "grad_norm": 0.09236710518598557, "learning_rate": 1.872362261883902e-05, "loss": 9.1823, "step": 75580 }, { "epoch": 0.3774875777172963, "grad_norm": 0.09194951504468918, "learning_rate": 1.872212070389747e-05, "loss": 9.1675, "step": 75590 }, { "epoch": 0.3775375165422358, "grad_norm": 0.09250642359256744, "learning_rate": 1.8720618788955917e-05, "loss": 9.1711, "step": 75600 }, { "epoch": 0.3775874553671752, "grad_norm": 0.09264052659273148, "learning_rate": 1.871911687401437e-05, "loss": 9.1799, "step": 75610 }, { "epoch": 0.3776373941921147, "grad_norm": 0.09335634112358093, "learning_rate": 1.8717614959072818e-05, "loss": 9.1752, "step": 75620 }, { "epoch": 0.3776873330170541, "grad_norm": 0.10500754415988922, "learning_rate": 1.8716113044131268e-05, "loss": 9.1719, "step": 75630 }, { "epoch": 0.37773727184199357, "grad_norm": 0.09396055340766907, "learning_rate": 1.8714611129189718e-05, "loss": 9.1832, "step": 75640 }, { "epoch": 0.377787210666933, "grad_norm": 0.09786514937877655, "learning_rate": 1.8713109214248165e-05, "loss": 9.1689, "step": 75650 }, { "epoch": 0.37783714949187247, "grad_norm": 0.09014048427343369, "learning_rate": 1.871160729930662e-05, "loss": 9.1699, "step": 75660 }, { "epoch": 0.3778870883168119, "grad_norm": 0.09452909231185913, "learning_rate": 1.8710105384365065e-05, "loss": 9.1733, "step": 75670 }, { "epoch": 0.37793702714175137, "grad_norm": 0.09486046433448792, "learning_rate": 1.8708603469423515e-05, "loss": 9.1663, "step": 75680 }, { "epoch": 0.3779869659666908, "grad_norm": 0.09308832138776779, "learning_rate": 1.8707101554481965e-05, "loss": 9.1795, "step": 75690 }, { "epoch": 0.37803690479163027, "grad_norm": 0.09890872985124588, "learning_rate": 1.8705599639540412e-05, "loss": 9.1685, "step": 75700 }, { "epoch": 0.3780868436165697, "grad_norm": 0.09047377854585648, "learning_rate": 1.8704097724598866e-05, "loss": 9.1856, "step": 75710 }, { "epoch": 0.37813678244150917, "grad_norm": 0.09121832996606827, "learning_rate": 1.8702595809657313e-05, "loss": 9.1825, "step": 75720 }, { "epoch": 0.3781867212664486, "grad_norm": 0.09841986000537872, "learning_rate": 1.8701093894715763e-05, "loss": 9.1776, "step": 75730 }, { "epoch": 0.37823666009138807, "grad_norm": 0.09331878274679184, "learning_rate": 1.8699591979774213e-05, "loss": 9.1805, "step": 75740 }, { "epoch": 0.3782865989163275, "grad_norm": 0.09422630816698074, "learning_rate": 1.869809006483266e-05, "loss": 9.1732, "step": 75750 }, { "epoch": 0.37833653774126697, "grad_norm": 0.09663139283657074, "learning_rate": 1.8696588149891113e-05, "loss": 9.1744, "step": 75760 }, { "epoch": 0.3783864765662064, "grad_norm": 0.0949576273560524, "learning_rate": 1.869508623494956e-05, "loss": 9.1771, "step": 75770 }, { "epoch": 0.37843641539114586, "grad_norm": 0.09695305675268173, "learning_rate": 1.869358432000801e-05, "loss": 9.1587, "step": 75780 }, { "epoch": 0.3784863542160853, "grad_norm": 0.09372842311859131, "learning_rate": 1.869208240506646e-05, "loss": 9.1616, "step": 75790 }, { "epoch": 0.37853629304102476, "grad_norm": 0.09371675550937653, "learning_rate": 1.8690580490124907e-05, "loss": 9.1689, "step": 75800 }, { "epoch": 0.3785862318659642, "grad_norm": 0.09579101204872131, "learning_rate": 1.868907857518336e-05, "loss": 9.1754, "step": 75810 }, { "epoch": 0.37863617069090366, "grad_norm": 0.09259112924337387, "learning_rate": 1.8687576660241808e-05, "loss": 9.1816, "step": 75820 }, { "epoch": 0.3786861095158431, "grad_norm": 0.09288639575242996, "learning_rate": 1.8686074745300258e-05, "loss": 9.1795, "step": 75830 }, { "epoch": 0.37873604834078256, "grad_norm": 0.09235890954732895, "learning_rate": 1.8684572830358708e-05, "loss": 9.1722, "step": 75840 }, { "epoch": 0.378785987165722, "grad_norm": 0.0942944809794426, "learning_rate": 1.8683070915417155e-05, "loss": 9.1728, "step": 75850 }, { "epoch": 0.37883592599066146, "grad_norm": 0.09688469022512436, "learning_rate": 1.868156900047561e-05, "loss": 9.1777, "step": 75860 }, { "epoch": 0.3788858648156009, "grad_norm": 0.08886689692735672, "learning_rate": 1.8680067085534055e-05, "loss": 9.1681, "step": 75870 }, { "epoch": 0.37893580364054036, "grad_norm": 0.09597429633140564, "learning_rate": 1.867856517059251e-05, "loss": 9.1737, "step": 75880 }, { "epoch": 0.3789857424654798, "grad_norm": 0.08873099833726883, "learning_rate": 1.8677063255650955e-05, "loss": 9.1717, "step": 75890 }, { "epoch": 0.37903568129041926, "grad_norm": 0.08912356197834015, "learning_rate": 1.8675561340709402e-05, "loss": 9.1843, "step": 75900 }, { "epoch": 0.3790856201153587, "grad_norm": 0.09434056282043457, "learning_rate": 1.8674059425767856e-05, "loss": 9.1582, "step": 75910 }, { "epoch": 0.37913555894029816, "grad_norm": 0.08778839558362961, "learning_rate": 1.8672557510826303e-05, "loss": 9.1786, "step": 75920 }, { "epoch": 0.3791854977652376, "grad_norm": 0.09374192357063293, "learning_rate": 1.8671055595884756e-05, "loss": 9.1732, "step": 75930 }, { "epoch": 0.37923543659017706, "grad_norm": 0.0933762714266777, "learning_rate": 1.8669553680943203e-05, "loss": 9.1647, "step": 75940 }, { "epoch": 0.3792853754151165, "grad_norm": 0.09484566748142242, "learning_rate": 1.866805176600165e-05, "loss": 9.1594, "step": 75950 }, { "epoch": 0.37933531424005595, "grad_norm": 0.09006204456090927, "learning_rate": 1.8666549851060103e-05, "loss": 9.1784, "step": 75960 }, { "epoch": 0.3793852530649954, "grad_norm": 0.09235017746686935, "learning_rate": 1.866504793611855e-05, "loss": 9.164, "step": 75970 }, { "epoch": 0.37943519188993485, "grad_norm": 0.09987694025039673, "learning_rate": 1.8663546021177004e-05, "loss": 9.1695, "step": 75980 }, { "epoch": 0.3794851307148743, "grad_norm": 0.08617845177650452, "learning_rate": 1.866204410623545e-05, "loss": 9.182, "step": 75990 }, { "epoch": 0.37953506953981375, "grad_norm": 0.08854833245277405, "learning_rate": 1.8660542191293897e-05, "loss": 9.1611, "step": 76000 }, { "epoch": 0.3795850083647532, "grad_norm": 0.09249111264944077, "learning_rate": 1.865904027635235e-05, "loss": 9.1808, "step": 76010 }, { "epoch": 0.37963494718969265, "grad_norm": 0.09673107415437698, "learning_rate": 1.8657538361410798e-05, "loss": 9.1706, "step": 76020 }, { "epoch": 0.3796848860146321, "grad_norm": 0.09875845909118652, "learning_rate": 1.865603644646925e-05, "loss": 9.1723, "step": 76030 }, { "epoch": 0.37973482483957155, "grad_norm": 0.09128276258707047, "learning_rate": 1.8654534531527698e-05, "loss": 9.1666, "step": 76040 }, { "epoch": 0.37978476366451097, "grad_norm": 0.09420790523290634, "learning_rate": 1.8653032616586145e-05, "loss": 9.163, "step": 76050 }, { "epoch": 0.37983470248945045, "grad_norm": 0.09466836601495743, "learning_rate": 1.86515307016446e-05, "loss": 9.1646, "step": 76060 }, { "epoch": 0.37988464131438987, "grad_norm": 0.10341818630695343, "learning_rate": 1.8650028786703045e-05, "loss": 9.1677, "step": 76070 }, { "epoch": 0.37993458013932935, "grad_norm": 0.0988142341375351, "learning_rate": 1.86485268717615e-05, "loss": 9.1715, "step": 76080 }, { "epoch": 0.37998451896426877, "grad_norm": 0.09664300084114075, "learning_rate": 1.8647024956819946e-05, "loss": 9.1588, "step": 76090 }, { "epoch": 0.38003445778920825, "grad_norm": 0.09185276925563812, "learning_rate": 1.8645523041878392e-05, "loss": 9.1747, "step": 76100 }, { "epoch": 0.38008439661414767, "grad_norm": 0.09593552350997925, "learning_rate": 1.8644021126936846e-05, "loss": 9.1682, "step": 76110 }, { "epoch": 0.38013433543908715, "grad_norm": 0.09431128203868866, "learning_rate": 1.8642519211995293e-05, "loss": 9.1772, "step": 76120 }, { "epoch": 0.38018427426402657, "grad_norm": 0.09099570661783218, "learning_rate": 1.8641017297053746e-05, "loss": 9.1571, "step": 76130 }, { "epoch": 0.38023421308896604, "grad_norm": 0.08938246220350266, "learning_rate": 1.8639515382112193e-05, "loss": 9.1721, "step": 76140 }, { "epoch": 0.38028415191390547, "grad_norm": 0.09171757102012634, "learning_rate": 1.863801346717064e-05, "loss": 9.1665, "step": 76150 }, { "epoch": 0.3803340907388449, "grad_norm": 0.09394662827253342, "learning_rate": 1.8636511552229093e-05, "loss": 9.1786, "step": 76160 }, { "epoch": 0.38038402956378436, "grad_norm": 0.0892520397901535, "learning_rate": 1.863500963728754e-05, "loss": 9.1615, "step": 76170 }, { "epoch": 0.3804339683887238, "grad_norm": 0.09431727975606918, "learning_rate": 1.8633507722345994e-05, "loss": 9.1807, "step": 76180 }, { "epoch": 0.38048390721366326, "grad_norm": 0.09187456965446472, "learning_rate": 1.863200580740444e-05, "loss": 9.1699, "step": 76190 }, { "epoch": 0.3805338460386027, "grad_norm": 0.09355001896619797, "learning_rate": 1.863050389246289e-05, "loss": 9.1677, "step": 76200 }, { "epoch": 0.38058378486354216, "grad_norm": 0.09874553978443146, "learning_rate": 1.862900197752134e-05, "loss": 9.1619, "step": 76210 }, { "epoch": 0.3806337236884816, "grad_norm": 0.0971277505159378, "learning_rate": 1.8627500062579788e-05, "loss": 9.157, "step": 76220 }, { "epoch": 0.38068366251342106, "grad_norm": 0.09368085116147995, "learning_rate": 1.862599814763824e-05, "loss": 9.1691, "step": 76230 }, { "epoch": 0.3807336013383605, "grad_norm": 0.08936905115842819, "learning_rate": 1.8624496232696688e-05, "loss": 9.1715, "step": 76240 }, { "epoch": 0.38078354016329996, "grad_norm": 0.09351339191198349, "learning_rate": 1.8622994317755138e-05, "loss": 9.1657, "step": 76250 }, { "epoch": 0.3808334789882394, "grad_norm": 0.09461808949708939, "learning_rate": 1.862149240281359e-05, "loss": 9.1624, "step": 76260 }, { "epoch": 0.38088341781317886, "grad_norm": 0.08949335664510727, "learning_rate": 1.8619990487872035e-05, "loss": 9.1684, "step": 76270 }, { "epoch": 0.3809333566381183, "grad_norm": 0.08932676911354065, "learning_rate": 1.861848857293049e-05, "loss": 9.1631, "step": 76280 }, { "epoch": 0.38098329546305776, "grad_norm": 0.09498759359121323, "learning_rate": 1.8616986657988936e-05, "loss": 9.163, "step": 76290 }, { "epoch": 0.3810332342879972, "grad_norm": 0.09363845735788345, "learning_rate": 1.8615484743047386e-05, "loss": 9.174, "step": 76300 }, { "epoch": 0.38108317311293666, "grad_norm": 0.08573130518198013, "learning_rate": 1.8613982828105836e-05, "loss": 9.1665, "step": 76310 }, { "epoch": 0.3811331119378761, "grad_norm": 0.09437863528728485, "learning_rate": 1.8612480913164283e-05, "loss": 9.1553, "step": 76320 }, { "epoch": 0.38118305076281556, "grad_norm": 0.09514939785003662, "learning_rate": 1.8610978998222736e-05, "loss": 9.161, "step": 76330 }, { "epoch": 0.381232989587755, "grad_norm": 0.0890861302614212, "learning_rate": 1.8609477083281183e-05, "loss": 9.1585, "step": 76340 }, { "epoch": 0.38128292841269446, "grad_norm": 0.09901072829961777, "learning_rate": 1.8607975168339633e-05, "loss": 9.1656, "step": 76350 }, { "epoch": 0.3813328672376339, "grad_norm": 0.09135191142559052, "learning_rate": 1.8606473253398083e-05, "loss": 9.1679, "step": 76360 }, { "epoch": 0.38138280606257335, "grad_norm": 0.08994290977716446, "learning_rate": 1.860497133845653e-05, "loss": 9.1553, "step": 76370 }, { "epoch": 0.3814327448875128, "grad_norm": 0.09007209539413452, "learning_rate": 1.8603469423514984e-05, "loss": 9.1617, "step": 76380 }, { "epoch": 0.38148268371245225, "grad_norm": 0.09258635342121124, "learning_rate": 1.860196750857343e-05, "loss": 9.1664, "step": 76390 }, { "epoch": 0.3815326225373917, "grad_norm": 0.09930216521024704, "learning_rate": 1.860046559363188e-05, "loss": 9.1606, "step": 76400 }, { "epoch": 0.38158256136233115, "grad_norm": 0.10146905481815338, "learning_rate": 1.859896367869033e-05, "loss": 9.1603, "step": 76410 }, { "epoch": 0.3816325001872706, "grad_norm": 0.09039684385061264, "learning_rate": 1.8597461763748778e-05, "loss": 9.1433, "step": 76420 }, { "epoch": 0.38168243901221005, "grad_norm": 0.09020677208900452, "learning_rate": 1.859595984880723e-05, "loss": 9.1615, "step": 76430 }, { "epoch": 0.3817323778371495, "grad_norm": 0.0926944836974144, "learning_rate": 1.8594457933865678e-05, "loss": 9.1614, "step": 76440 }, { "epoch": 0.38178231666208895, "grad_norm": 0.08853311091661453, "learning_rate": 1.8592956018924128e-05, "loss": 9.1708, "step": 76450 }, { "epoch": 0.38183225548702837, "grad_norm": 0.09793388843536377, "learning_rate": 1.859145410398258e-05, "loss": 9.1545, "step": 76460 }, { "epoch": 0.38188219431196785, "grad_norm": 0.08943363279104233, "learning_rate": 1.8589952189041025e-05, "loss": 9.1679, "step": 76470 }, { "epoch": 0.38193213313690727, "grad_norm": 0.09909660369157791, "learning_rate": 1.858845027409948e-05, "loss": 9.1526, "step": 76480 }, { "epoch": 0.38198207196184675, "grad_norm": 0.09491763263940811, "learning_rate": 1.8586948359157926e-05, "loss": 9.1674, "step": 76490 }, { "epoch": 0.38203201078678617, "grad_norm": 0.0981326624751091, "learning_rate": 1.8585446444216376e-05, "loss": 9.1596, "step": 76500 }, { "epoch": 0.38208194961172565, "grad_norm": 0.09292647987604141, "learning_rate": 1.8583944529274826e-05, "loss": 9.1512, "step": 76510 }, { "epoch": 0.38213188843666507, "grad_norm": 0.0934506356716156, "learning_rate": 1.8582442614333273e-05, "loss": 9.1633, "step": 76520 }, { "epoch": 0.38218182726160455, "grad_norm": 0.09270615875720978, "learning_rate": 1.8580940699391726e-05, "loss": 9.155, "step": 76530 }, { "epoch": 0.38223176608654397, "grad_norm": 0.09436081349849701, "learning_rate": 1.8579438784450173e-05, "loss": 9.1638, "step": 76540 }, { "epoch": 0.38228170491148344, "grad_norm": 0.09928996115922928, "learning_rate": 1.8577936869508623e-05, "loss": 9.1611, "step": 76550 }, { "epoch": 0.38233164373642287, "grad_norm": 0.09387561678886414, "learning_rate": 1.8576434954567073e-05, "loss": 9.1742, "step": 76560 }, { "epoch": 0.38238158256136234, "grad_norm": 0.09134583920240402, "learning_rate": 1.8574933039625524e-05, "loss": 9.1692, "step": 76570 }, { "epoch": 0.38243152138630176, "grad_norm": 0.09804858267307281, "learning_rate": 1.8573431124683974e-05, "loss": 9.1597, "step": 76580 }, { "epoch": 0.38248146021124124, "grad_norm": 0.09265462309122086, "learning_rate": 1.857192920974242e-05, "loss": 9.1541, "step": 76590 }, { "epoch": 0.38253139903618066, "grad_norm": 0.0962829664349556, "learning_rate": 1.8570427294800874e-05, "loss": 9.1548, "step": 76600 }, { "epoch": 0.38258133786112014, "grad_norm": 0.10263549536466599, "learning_rate": 1.856892537985932e-05, "loss": 9.1655, "step": 76610 }, { "epoch": 0.38263127668605956, "grad_norm": 0.0963212177157402, "learning_rate": 1.856742346491777e-05, "loss": 9.161, "step": 76620 }, { "epoch": 0.38268121551099904, "grad_norm": 0.08831122517585754, "learning_rate": 1.856592154997622e-05, "loss": 9.1554, "step": 76630 }, { "epoch": 0.38273115433593846, "grad_norm": 0.0948970839381218, "learning_rate": 1.8564419635034668e-05, "loss": 9.1628, "step": 76640 }, { "epoch": 0.38278109316087794, "grad_norm": 0.09297934174537659, "learning_rate": 1.856291772009312e-05, "loss": 9.1581, "step": 76650 }, { "epoch": 0.38283103198581736, "grad_norm": 0.09483528882265091, "learning_rate": 1.856141580515157e-05, "loss": 9.1561, "step": 76660 }, { "epoch": 0.38288097081075684, "grad_norm": 0.09543635696172714, "learning_rate": 1.855991389021002e-05, "loss": 9.1531, "step": 76670 }, { "epoch": 0.38293090963569626, "grad_norm": 0.09006334096193314, "learning_rate": 1.855841197526847e-05, "loss": 9.157, "step": 76680 }, { "epoch": 0.38298084846063574, "grad_norm": 0.09074097126722336, "learning_rate": 1.8556910060326916e-05, "loss": 9.1644, "step": 76690 }, { "epoch": 0.38303078728557516, "grad_norm": 0.09905076771974564, "learning_rate": 1.855540814538537e-05, "loss": 9.1571, "step": 76700 }, { "epoch": 0.38308072611051464, "grad_norm": 0.09774630516767502, "learning_rate": 1.8553906230443816e-05, "loss": 9.1457, "step": 76710 }, { "epoch": 0.38313066493545406, "grad_norm": 0.09291476756334305, "learning_rate": 1.8552404315502266e-05, "loss": 9.144, "step": 76720 }, { "epoch": 0.38318060376039353, "grad_norm": 0.09428045153617859, "learning_rate": 1.8550902400560716e-05, "loss": 9.1604, "step": 76730 }, { "epoch": 0.38323054258533296, "grad_norm": 0.09279389679431915, "learning_rate": 1.8549400485619163e-05, "loss": 9.1636, "step": 76740 }, { "epoch": 0.38328048141027243, "grad_norm": 0.10000916570425034, "learning_rate": 1.8547898570677617e-05, "loss": 9.1772, "step": 76750 }, { "epoch": 0.38333042023521186, "grad_norm": 0.09311681985855103, "learning_rate": 1.8546396655736063e-05, "loss": 9.1693, "step": 76760 }, { "epoch": 0.38338035906015133, "grad_norm": 0.09120059013366699, "learning_rate": 1.8544894740794514e-05, "loss": 9.1541, "step": 76770 }, { "epoch": 0.38343029788509075, "grad_norm": 0.08788907527923584, "learning_rate": 1.8543392825852964e-05, "loss": 9.1603, "step": 76780 }, { "epoch": 0.38348023671003023, "grad_norm": 0.09764920175075531, "learning_rate": 1.854189091091141e-05, "loss": 9.1641, "step": 76790 }, { "epoch": 0.38353017553496965, "grad_norm": 0.098087377846241, "learning_rate": 1.8540388995969864e-05, "loss": 9.1472, "step": 76800 }, { "epoch": 0.38358011435990913, "grad_norm": 0.09712155908346176, "learning_rate": 1.853888708102831e-05, "loss": 9.1688, "step": 76810 }, { "epoch": 0.38363005318484855, "grad_norm": 0.09204491227865219, "learning_rate": 1.853738516608676e-05, "loss": 9.1757, "step": 76820 }, { "epoch": 0.38367999200978803, "grad_norm": 0.09970824420452118, "learning_rate": 1.853588325114521e-05, "loss": 9.171, "step": 76830 }, { "epoch": 0.38372993083472745, "grad_norm": 0.09648123383522034, "learning_rate": 1.8534381336203658e-05, "loss": 9.1693, "step": 76840 }, { "epoch": 0.38377986965966693, "grad_norm": 0.09382256120443344, "learning_rate": 1.853287942126211e-05, "loss": 9.1641, "step": 76850 }, { "epoch": 0.38382980848460635, "grad_norm": 0.09341836720705032, "learning_rate": 1.853137750632056e-05, "loss": 9.1648, "step": 76860 }, { "epoch": 0.3838797473095458, "grad_norm": 0.09189660847187042, "learning_rate": 1.852987559137901e-05, "loss": 9.1553, "step": 76870 }, { "epoch": 0.38392968613448525, "grad_norm": 0.09186588227748871, "learning_rate": 1.852837367643746e-05, "loss": 9.1624, "step": 76880 }, { "epoch": 0.3839796249594247, "grad_norm": 0.09395985305309296, "learning_rate": 1.852687176149591e-05, "loss": 9.1741, "step": 76890 }, { "epoch": 0.38402956378436415, "grad_norm": 0.09236618131399155, "learning_rate": 1.852536984655436e-05, "loss": 9.1473, "step": 76900 }, { "epoch": 0.3840795026093036, "grad_norm": 0.1052381843328476, "learning_rate": 1.8523867931612806e-05, "loss": 9.1679, "step": 76910 }, { "epoch": 0.38412944143424305, "grad_norm": 0.09067463874816895, "learning_rate": 1.8522366016671256e-05, "loss": 9.1509, "step": 76920 }, { "epoch": 0.3841793802591825, "grad_norm": 0.09203337877988815, "learning_rate": 1.8520864101729706e-05, "loss": 9.1688, "step": 76930 }, { "epoch": 0.38422931908412195, "grad_norm": 0.09451109915971756, "learning_rate": 1.8519362186788156e-05, "loss": 9.1508, "step": 76940 }, { "epoch": 0.3842792579090614, "grad_norm": 0.09311412274837494, "learning_rate": 1.8517860271846607e-05, "loss": 9.1549, "step": 76950 }, { "epoch": 0.38432919673400084, "grad_norm": 0.08877348154783249, "learning_rate": 1.8516358356905053e-05, "loss": 9.1582, "step": 76960 }, { "epoch": 0.3843791355589403, "grad_norm": 0.09304699301719666, "learning_rate": 1.8514856441963504e-05, "loss": 9.1632, "step": 76970 }, { "epoch": 0.38442907438387974, "grad_norm": 0.09285631775856018, "learning_rate": 1.8513354527021954e-05, "loss": 9.1592, "step": 76980 }, { "epoch": 0.3844790132088192, "grad_norm": 0.09747754037380219, "learning_rate": 1.8511852612080404e-05, "loss": 9.1499, "step": 76990 }, { "epoch": 0.38452895203375864, "grad_norm": 0.09608352929353714, "learning_rate": 1.8510350697138854e-05, "loss": 9.1616, "step": 77000 }, { "epoch": 0.3845788908586981, "grad_norm": 0.09752684831619263, "learning_rate": 1.85088487821973e-05, "loss": 9.1647, "step": 77010 }, { "epoch": 0.38462882968363754, "grad_norm": 0.09097377210855484, "learning_rate": 1.850734686725575e-05, "loss": 9.1597, "step": 77020 }, { "epoch": 0.384678768508577, "grad_norm": 0.09708648175001144, "learning_rate": 1.85058449523142e-05, "loss": 9.158, "step": 77030 }, { "epoch": 0.38472870733351644, "grad_norm": 0.09771420806646347, "learning_rate": 1.850434303737265e-05, "loss": 9.1618, "step": 77040 }, { "epoch": 0.3847786461584559, "grad_norm": 0.09642086923122406, "learning_rate": 1.85028411224311e-05, "loss": 9.1574, "step": 77050 }, { "epoch": 0.38482858498339534, "grad_norm": 0.09355588257312775, "learning_rate": 1.850133920748955e-05, "loss": 9.1562, "step": 77060 }, { "epoch": 0.3848785238083348, "grad_norm": 0.09392425417900085, "learning_rate": 1.8499837292548e-05, "loss": 9.1579, "step": 77070 }, { "epoch": 0.38492846263327424, "grad_norm": 0.09295708686113358, "learning_rate": 1.849833537760645e-05, "loss": 9.1519, "step": 77080 }, { "epoch": 0.3849784014582137, "grad_norm": 0.09548468142747879, "learning_rate": 1.84968334626649e-05, "loss": 9.1605, "step": 77090 }, { "epoch": 0.38502834028315314, "grad_norm": 0.09481420367956161, "learning_rate": 1.849533154772335e-05, "loss": 9.1501, "step": 77100 }, { "epoch": 0.3850782791080926, "grad_norm": 0.10212825238704681, "learning_rate": 1.8493829632781796e-05, "loss": 9.1495, "step": 77110 }, { "epoch": 0.38512821793303204, "grad_norm": 0.09605475515127182, "learning_rate": 1.8492327717840246e-05, "loss": 9.1723, "step": 77120 }, { "epoch": 0.3851781567579715, "grad_norm": 0.09045825153589249, "learning_rate": 1.8490825802898696e-05, "loss": 9.1645, "step": 77130 }, { "epoch": 0.38522809558291093, "grad_norm": 0.0943480134010315, "learning_rate": 1.8489323887957146e-05, "loss": 9.1471, "step": 77140 }, { "epoch": 0.38527803440785036, "grad_norm": 0.09214089065790176, "learning_rate": 1.8487821973015597e-05, "loss": 9.1521, "step": 77150 }, { "epoch": 0.38532797323278983, "grad_norm": 0.09132665395736694, "learning_rate": 1.8486320058074043e-05, "loss": 9.1588, "step": 77160 }, { "epoch": 0.38537791205772925, "grad_norm": 0.09081696718931198, "learning_rate": 1.8484818143132494e-05, "loss": 9.1676, "step": 77170 }, { "epoch": 0.38542785088266873, "grad_norm": 0.09519261121749878, "learning_rate": 1.8483316228190944e-05, "loss": 9.1709, "step": 77180 }, { "epoch": 0.38547778970760815, "grad_norm": 0.090855672955513, "learning_rate": 1.8481814313249394e-05, "loss": 9.1543, "step": 77190 }, { "epoch": 0.38552772853254763, "grad_norm": 0.09267590194940567, "learning_rate": 1.8480312398307844e-05, "loss": 9.1494, "step": 77200 }, { "epoch": 0.38557766735748705, "grad_norm": 0.1009821891784668, "learning_rate": 1.8478810483366294e-05, "loss": 9.1579, "step": 77210 }, { "epoch": 0.38562760618242653, "grad_norm": 0.09545056521892548, "learning_rate": 1.847730856842474e-05, "loss": 9.1533, "step": 77220 }, { "epoch": 0.38567754500736595, "grad_norm": 0.09436902403831482, "learning_rate": 1.847580665348319e-05, "loss": 9.1525, "step": 77230 }, { "epoch": 0.38572748383230543, "grad_norm": 0.09197907149791718, "learning_rate": 1.847430473854164e-05, "loss": 9.1414, "step": 77240 }, { "epoch": 0.38577742265724485, "grad_norm": 0.08811882883310318, "learning_rate": 1.847280282360009e-05, "loss": 9.1626, "step": 77250 }, { "epoch": 0.38582736148218433, "grad_norm": 0.09159471839666367, "learning_rate": 1.8471300908658542e-05, "loss": 9.1453, "step": 77260 }, { "epoch": 0.38587730030712375, "grad_norm": 0.08988473564386368, "learning_rate": 1.846979899371699e-05, "loss": 9.1385, "step": 77270 }, { "epoch": 0.3859272391320632, "grad_norm": 0.09255439788103104, "learning_rate": 1.846829707877544e-05, "loss": 9.1462, "step": 77280 }, { "epoch": 0.38597717795700265, "grad_norm": 0.09517369419336319, "learning_rate": 1.846679516383389e-05, "loss": 9.1512, "step": 77290 }, { "epoch": 0.3860271167819421, "grad_norm": 0.09043008089065552, "learning_rate": 1.846529324889234e-05, "loss": 9.1543, "step": 77300 }, { "epoch": 0.38607705560688155, "grad_norm": 0.0920136496424675, "learning_rate": 1.846379133395079e-05, "loss": 9.1581, "step": 77310 }, { "epoch": 0.386126994431821, "grad_norm": 0.09350711852312088, "learning_rate": 1.8462289419009236e-05, "loss": 9.1577, "step": 77320 }, { "epoch": 0.38617693325676045, "grad_norm": 0.09980452060699463, "learning_rate": 1.8460787504067686e-05, "loss": 9.1488, "step": 77330 }, { "epoch": 0.3862268720816999, "grad_norm": 0.09318140149116516, "learning_rate": 1.8459285589126136e-05, "loss": 9.1622, "step": 77340 }, { "epoch": 0.38627681090663935, "grad_norm": 0.08900347352027893, "learning_rate": 1.8457783674184587e-05, "loss": 9.1495, "step": 77350 }, { "epoch": 0.3863267497315788, "grad_norm": 0.09255148470401764, "learning_rate": 1.8456281759243037e-05, "loss": 9.1642, "step": 77360 }, { "epoch": 0.38637668855651824, "grad_norm": 0.09274660795927048, "learning_rate": 1.8454779844301484e-05, "loss": 9.1568, "step": 77370 }, { "epoch": 0.3864266273814577, "grad_norm": 0.09435738623142242, "learning_rate": 1.8453277929359934e-05, "loss": 9.1571, "step": 77380 }, { "epoch": 0.38647656620639714, "grad_norm": 0.09425579011440277, "learning_rate": 1.8451776014418384e-05, "loss": 9.1503, "step": 77390 }, { "epoch": 0.3865265050313366, "grad_norm": 0.09888068586587906, "learning_rate": 1.8450274099476834e-05, "loss": 9.1483, "step": 77400 }, { "epoch": 0.38657644385627604, "grad_norm": 0.0868912860751152, "learning_rate": 1.8448772184535284e-05, "loss": 9.1477, "step": 77410 }, { "epoch": 0.3866263826812155, "grad_norm": 0.09297952800989151, "learning_rate": 1.844727026959373e-05, "loss": 9.1485, "step": 77420 }, { "epoch": 0.38667632150615494, "grad_norm": 0.09065989404916763, "learning_rate": 1.844576835465218e-05, "loss": 9.1491, "step": 77430 }, { "epoch": 0.3867262603310944, "grad_norm": 0.0999770238995552, "learning_rate": 1.844426643971063e-05, "loss": 9.1413, "step": 77440 }, { "epoch": 0.38677619915603384, "grad_norm": 0.09342008084058762, "learning_rate": 1.844276452476908e-05, "loss": 9.1529, "step": 77450 }, { "epoch": 0.3868261379809733, "grad_norm": 0.09146399050951004, "learning_rate": 1.8441262609827532e-05, "loss": 9.1423, "step": 77460 }, { "epoch": 0.38687607680591274, "grad_norm": 0.09426107257604599, "learning_rate": 1.843976069488598e-05, "loss": 9.1583, "step": 77470 }, { "epoch": 0.3869260156308522, "grad_norm": 0.0957350954413414, "learning_rate": 1.843825877994443e-05, "loss": 9.1493, "step": 77480 }, { "epoch": 0.38697595445579164, "grad_norm": 0.09453484416007996, "learning_rate": 1.843675686500288e-05, "loss": 9.1603, "step": 77490 }, { "epoch": 0.3870258932807311, "grad_norm": 0.09307164698839188, "learning_rate": 1.843525495006133e-05, "loss": 9.1468, "step": 77500 }, { "epoch": 0.38707583210567054, "grad_norm": 0.09427579492330551, "learning_rate": 1.843375303511978e-05, "loss": 9.1512, "step": 77510 }, { "epoch": 0.38712577093061, "grad_norm": 0.09382929652929306, "learning_rate": 1.8432251120178226e-05, "loss": 9.154, "step": 77520 }, { "epoch": 0.38717570975554944, "grad_norm": 0.0964818224310875, "learning_rate": 1.843074920523668e-05, "loss": 9.1498, "step": 77530 }, { "epoch": 0.3872256485804889, "grad_norm": 0.09739819914102554, "learning_rate": 1.8429247290295127e-05, "loss": 9.1422, "step": 77540 }, { "epoch": 0.38727558740542833, "grad_norm": 0.0981798991560936, "learning_rate": 1.8427745375353577e-05, "loss": 9.1577, "step": 77550 }, { "epoch": 0.3873255262303678, "grad_norm": 0.09459348767995834, "learning_rate": 1.8426243460412027e-05, "loss": 9.1525, "step": 77560 }, { "epoch": 0.38737546505530723, "grad_norm": 0.08874860405921936, "learning_rate": 1.8424741545470474e-05, "loss": 9.1339, "step": 77570 }, { "epoch": 0.3874254038802467, "grad_norm": 0.0875902771949768, "learning_rate": 1.8423239630528927e-05, "loss": 9.1537, "step": 77580 }, { "epoch": 0.38747534270518613, "grad_norm": 0.09034288674592972, "learning_rate": 1.8421737715587374e-05, "loss": 9.1553, "step": 77590 }, { "epoch": 0.3875252815301256, "grad_norm": 0.09590810537338257, "learning_rate": 1.8420235800645824e-05, "loss": 9.162, "step": 77600 }, { "epoch": 0.38757522035506503, "grad_norm": 0.0925174131989479, "learning_rate": 1.8418733885704274e-05, "loss": 9.1575, "step": 77610 }, { "epoch": 0.3876251591800045, "grad_norm": 0.09790638089179993, "learning_rate": 1.841723197076272e-05, "loss": 9.1518, "step": 77620 }, { "epoch": 0.38767509800494393, "grad_norm": 0.09194985032081604, "learning_rate": 1.8415730055821175e-05, "loss": 9.1374, "step": 77630 }, { "epoch": 0.3877250368298834, "grad_norm": 0.09064792096614838, "learning_rate": 1.841422814087962e-05, "loss": 9.1472, "step": 77640 }, { "epoch": 0.38777497565482283, "grad_norm": 0.09329324215650558, "learning_rate": 1.841272622593807e-05, "loss": 9.1595, "step": 77650 }, { "epoch": 0.3878249144797623, "grad_norm": 0.09056134521961212, "learning_rate": 1.8411224310996522e-05, "loss": 9.1453, "step": 77660 }, { "epoch": 0.3878748533047017, "grad_norm": 0.08973592519760132, "learning_rate": 1.840972239605497e-05, "loss": 9.1536, "step": 77670 }, { "epoch": 0.3879247921296412, "grad_norm": 0.09086141735315323, "learning_rate": 1.8408220481113422e-05, "loss": 9.1471, "step": 77680 }, { "epoch": 0.3879747309545806, "grad_norm": 0.09189939498901367, "learning_rate": 1.840671856617187e-05, "loss": 9.1613, "step": 77690 }, { "epoch": 0.3880246697795201, "grad_norm": 0.09157737344503403, "learning_rate": 1.840521665123032e-05, "loss": 9.1504, "step": 77700 }, { "epoch": 0.3880746086044595, "grad_norm": 0.09373574703931808, "learning_rate": 1.840371473628877e-05, "loss": 9.1432, "step": 77710 }, { "epoch": 0.388124547429399, "grad_norm": 0.09311201423406601, "learning_rate": 1.8402212821347216e-05, "loss": 9.1433, "step": 77720 }, { "epoch": 0.3881744862543384, "grad_norm": 0.09637656062841415, "learning_rate": 1.840071090640567e-05, "loss": 9.1406, "step": 77730 }, { "epoch": 0.3882244250792779, "grad_norm": 0.09410546720027924, "learning_rate": 1.8399208991464117e-05, "loss": 9.1557, "step": 77740 }, { "epoch": 0.3882743639042173, "grad_norm": 0.09236341714859009, "learning_rate": 1.8397707076522567e-05, "loss": 9.151, "step": 77750 }, { "epoch": 0.3883243027291568, "grad_norm": 0.10012514889240265, "learning_rate": 1.8396205161581017e-05, "loss": 9.1482, "step": 77760 }, { "epoch": 0.3883742415540962, "grad_norm": 0.09507282823324203, "learning_rate": 1.8394703246639464e-05, "loss": 9.1616, "step": 77770 }, { "epoch": 0.3884241803790357, "grad_norm": 0.09531018882989883, "learning_rate": 1.8393201331697917e-05, "loss": 9.1508, "step": 77780 }, { "epoch": 0.3884741192039751, "grad_norm": 0.09432236105203629, "learning_rate": 1.8391699416756364e-05, "loss": 9.1536, "step": 77790 }, { "epoch": 0.3885240580289146, "grad_norm": 0.09611290693283081, "learning_rate": 1.8390197501814814e-05, "loss": 9.1479, "step": 77800 }, { "epoch": 0.388573996853854, "grad_norm": 0.09352581202983856, "learning_rate": 1.8388695586873264e-05, "loss": 9.157, "step": 77810 }, { "epoch": 0.3886239356787935, "grad_norm": 0.09698393195867538, "learning_rate": 1.838719367193171e-05, "loss": 9.1378, "step": 77820 }, { "epoch": 0.3886738745037329, "grad_norm": 0.09182106703519821, "learning_rate": 1.8385691756990165e-05, "loss": 9.1501, "step": 77830 }, { "epoch": 0.3887238133286724, "grad_norm": 0.09386096894741058, "learning_rate": 1.838418984204861e-05, "loss": 9.1381, "step": 77840 }, { "epoch": 0.3887737521536118, "grad_norm": 0.09277167916297913, "learning_rate": 1.8382687927107065e-05, "loss": 9.1395, "step": 77850 }, { "epoch": 0.3888236909785513, "grad_norm": 0.09407127648591995, "learning_rate": 1.8381186012165512e-05, "loss": 9.1482, "step": 77860 }, { "epoch": 0.3888736298034907, "grad_norm": 0.09299039095640182, "learning_rate": 1.837968409722396e-05, "loss": 9.1526, "step": 77870 }, { "epoch": 0.3889235686284302, "grad_norm": 0.09196474403142929, "learning_rate": 1.8378182182282412e-05, "loss": 9.1458, "step": 77880 }, { "epoch": 0.3889735074533696, "grad_norm": 0.09697432816028595, "learning_rate": 1.837668026734086e-05, "loss": 9.1328, "step": 77890 }, { "epoch": 0.3890234462783091, "grad_norm": 0.09332502633333206, "learning_rate": 1.8375178352399313e-05, "loss": 9.1421, "step": 77900 }, { "epoch": 0.3890733851032485, "grad_norm": 0.09647480398416519, "learning_rate": 1.837367643745776e-05, "loss": 9.1494, "step": 77910 }, { "epoch": 0.389123323928188, "grad_norm": 0.08914009481668472, "learning_rate": 1.8372174522516206e-05, "loss": 9.1537, "step": 77920 }, { "epoch": 0.3891732627531274, "grad_norm": 0.09153462946414948, "learning_rate": 1.837067260757466e-05, "loss": 9.1461, "step": 77930 }, { "epoch": 0.3892232015780669, "grad_norm": 0.09255599975585938, "learning_rate": 1.8369170692633107e-05, "loss": 9.1481, "step": 77940 }, { "epoch": 0.3892731404030063, "grad_norm": 0.10034976899623871, "learning_rate": 1.836766877769156e-05, "loss": 9.1427, "step": 77950 }, { "epoch": 0.3893230792279458, "grad_norm": 0.09407934546470642, "learning_rate": 1.8366166862750007e-05, "loss": 9.1505, "step": 77960 }, { "epoch": 0.3893730180528852, "grad_norm": 0.09382222592830658, "learning_rate": 1.8364664947808454e-05, "loss": 9.1392, "step": 77970 }, { "epoch": 0.3894229568778247, "grad_norm": 0.09767767041921616, "learning_rate": 1.8363163032866907e-05, "loss": 9.1463, "step": 77980 }, { "epoch": 0.3894728957027641, "grad_norm": 0.0940956324338913, "learning_rate": 1.8361661117925354e-05, "loss": 9.144, "step": 77990 }, { "epoch": 0.3895228345277036, "grad_norm": 0.09381180256605148, "learning_rate": 1.8360159202983808e-05, "loss": 9.1565, "step": 78000 }, { "epoch": 0.389572773352643, "grad_norm": 0.09209650754928589, "learning_rate": 1.8358657288042254e-05, "loss": 9.149, "step": 78010 }, { "epoch": 0.3896227121775825, "grad_norm": 0.09117661416530609, "learning_rate": 1.83571553731007e-05, "loss": 9.1426, "step": 78020 }, { "epoch": 0.3896726510025219, "grad_norm": 0.0906655564904213, "learning_rate": 1.8355653458159155e-05, "loss": 9.1527, "step": 78030 }, { "epoch": 0.3897225898274614, "grad_norm": 0.09310529381036758, "learning_rate": 1.83541515432176e-05, "loss": 9.1374, "step": 78040 }, { "epoch": 0.3897725286524008, "grad_norm": 0.08677937090396881, "learning_rate": 1.8352649628276055e-05, "loss": 9.1483, "step": 78050 }, { "epoch": 0.3898224674773403, "grad_norm": 0.09317050874233246, "learning_rate": 1.8351147713334502e-05, "loss": 9.1481, "step": 78060 }, { "epoch": 0.3898724063022797, "grad_norm": 0.09428145736455917, "learning_rate": 1.834964579839295e-05, "loss": 9.1525, "step": 78070 }, { "epoch": 0.3899223451272192, "grad_norm": 0.09383665770292282, "learning_rate": 1.8348143883451402e-05, "loss": 9.1374, "step": 78080 }, { "epoch": 0.3899722839521586, "grad_norm": 0.08776134252548218, "learning_rate": 1.834664196850985e-05, "loss": 9.1481, "step": 78090 }, { "epoch": 0.3900222227770981, "grad_norm": 0.09563107043504715, "learning_rate": 1.8345140053568303e-05, "loss": 9.1403, "step": 78100 }, { "epoch": 0.3900721616020375, "grad_norm": 0.09589152038097382, "learning_rate": 1.834363813862675e-05, "loss": 9.1492, "step": 78110 }, { "epoch": 0.390122100426977, "grad_norm": 0.09651998430490494, "learning_rate": 1.8342136223685196e-05, "loss": 9.1421, "step": 78120 }, { "epoch": 0.3901720392519164, "grad_norm": 0.0984501987695694, "learning_rate": 1.834063430874365e-05, "loss": 9.1399, "step": 78130 }, { "epoch": 0.3902219780768558, "grad_norm": 0.09631562978029251, "learning_rate": 1.8339132393802097e-05, "loss": 9.1386, "step": 78140 }, { "epoch": 0.3902719169017953, "grad_norm": 0.09599178284406662, "learning_rate": 1.833763047886055e-05, "loss": 9.1481, "step": 78150 }, { "epoch": 0.3903218557267347, "grad_norm": 0.0964631512761116, "learning_rate": 1.8336128563918997e-05, "loss": 9.14, "step": 78160 }, { "epoch": 0.3903717945516742, "grad_norm": 0.09247509390115738, "learning_rate": 1.8334626648977447e-05, "loss": 9.1432, "step": 78170 }, { "epoch": 0.3904217333766136, "grad_norm": 0.09320325404405594, "learning_rate": 1.8333124734035897e-05, "loss": 9.1439, "step": 78180 }, { "epoch": 0.3904716722015531, "grad_norm": 0.09139348566532135, "learning_rate": 1.8331622819094344e-05, "loss": 9.156, "step": 78190 }, { "epoch": 0.3905216110264925, "grad_norm": 0.09385591000318527, "learning_rate": 1.8330120904152798e-05, "loss": 9.1441, "step": 78200 }, { "epoch": 0.390571549851432, "grad_norm": 0.09762391448020935, "learning_rate": 1.8328618989211244e-05, "loss": 9.1494, "step": 78210 }, { "epoch": 0.3906214886763714, "grad_norm": 0.09472424536943436, "learning_rate": 1.8327117074269695e-05, "loss": 9.1339, "step": 78220 }, { "epoch": 0.3906714275013109, "grad_norm": 0.09121805429458618, "learning_rate": 1.8325615159328145e-05, "loss": 9.1461, "step": 78230 }, { "epoch": 0.3907213663262503, "grad_norm": 0.09944367408752441, "learning_rate": 1.832411324438659e-05, "loss": 9.1434, "step": 78240 }, { "epoch": 0.3907713051511898, "grad_norm": 0.08815374970436096, "learning_rate": 1.8322611329445045e-05, "loss": 9.138, "step": 78250 }, { "epoch": 0.3908212439761292, "grad_norm": 0.09055221825838089, "learning_rate": 1.8321109414503492e-05, "loss": 9.1388, "step": 78260 }, { "epoch": 0.3908711828010687, "grad_norm": 0.09463029354810715, "learning_rate": 1.8319607499561942e-05, "loss": 9.1477, "step": 78270 }, { "epoch": 0.3909211216260081, "grad_norm": 0.09334297478199005, "learning_rate": 1.8318105584620392e-05, "loss": 9.1358, "step": 78280 }, { "epoch": 0.3909710604509476, "grad_norm": 0.09212377667427063, "learning_rate": 1.831660366967884e-05, "loss": 9.1363, "step": 78290 }, { "epoch": 0.391020999275887, "grad_norm": 0.09191081672906876, "learning_rate": 1.8315101754737293e-05, "loss": 9.1443, "step": 78300 }, { "epoch": 0.3910709381008265, "grad_norm": 0.0968773290514946, "learning_rate": 1.831359983979574e-05, "loss": 9.1507, "step": 78310 }, { "epoch": 0.3911208769257659, "grad_norm": 0.09789808839559555, "learning_rate": 1.831209792485419e-05, "loss": 9.1365, "step": 78320 }, { "epoch": 0.3911708157507054, "grad_norm": 0.09304534643888474, "learning_rate": 1.831059600991264e-05, "loss": 9.1417, "step": 78330 }, { "epoch": 0.3912207545756448, "grad_norm": 0.09302885830402374, "learning_rate": 1.8309094094971087e-05, "loss": 9.1402, "step": 78340 }, { "epoch": 0.3912706934005843, "grad_norm": 0.10683131963014603, "learning_rate": 1.830759218002954e-05, "loss": 9.1388, "step": 78350 }, { "epoch": 0.3913206322255237, "grad_norm": 0.09414584934711456, "learning_rate": 1.8306090265087987e-05, "loss": 9.1488, "step": 78360 }, { "epoch": 0.3913705710504632, "grad_norm": 0.10074611753225327, "learning_rate": 1.8304588350146437e-05, "loss": 9.1423, "step": 78370 }, { "epoch": 0.3914205098754026, "grad_norm": 0.0921291634440422, "learning_rate": 1.8303086435204887e-05, "loss": 9.1431, "step": 78380 }, { "epoch": 0.3914704487003421, "grad_norm": 0.09323565661907196, "learning_rate": 1.8301584520263334e-05, "loss": 9.1458, "step": 78390 }, { "epoch": 0.3915203875252815, "grad_norm": 0.09994424134492874, "learning_rate": 1.8300082605321788e-05, "loss": 9.1407, "step": 78400 }, { "epoch": 0.391570326350221, "grad_norm": 0.09224946796894073, "learning_rate": 1.8298580690380234e-05, "loss": 9.1374, "step": 78410 }, { "epoch": 0.3916202651751604, "grad_norm": 0.09452834725379944, "learning_rate": 1.8297078775438685e-05, "loss": 9.145, "step": 78420 }, { "epoch": 0.3916702040000999, "grad_norm": 0.10009356588125229, "learning_rate": 1.8295576860497135e-05, "loss": 9.1396, "step": 78430 }, { "epoch": 0.3917201428250393, "grad_norm": 0.0899641364812851, "learning_rate": 1.829407494555558e-05, "loss": 9.1334, "step": 78440 }, { "epoch": 0.3917700816499788, "grad_norm": 0.09872201830148697, "learning_rate": 1.8292573030614035e-05, "loss": 9.1392, "step": 78450 }, { "epoch": 0.3918200204749182, "grad_norm": 0.09648354351520538, "learning_rate": 1.8291071115672482e-05, "loss": 9.1291, "step": 78460 }, { "epoch": 0.3918699592998577, "grad_norm": 0.09189938753843307, "learning_rate": 1.8289569200730932e-05, "loss": 9.1376, "step": 78470 }, { "epoch": 0.3919198981247971, "grad_norm": 0.0942487046122551, "learning_rate": 1.8288067285789382e-05, "loss": 9.1554, "step": 78480 }, { "epoch": 0.3919698369497366, "grad_norm": 0.0920250341296196, "learning_rate": 1.8286565370847832e-05, "loss": 9.1335, "step": 78490 }, { "epoch": 0.392019775774676, "grad_norm": 0.09978582710027695, "learning_rate": 1.8285063455906283e-05, "loss": 9.1339, "step": 78500 }, { "epoch": 0.3920697145996155, "grad_norm": 0.09699604660272598, "learning_rate": 1.828356154096473e-05, "loss": 9.146, "step": 78510 }, { "epoch": 0.3921196534245549, "grad_norm": 0.09093842655420303, "learning_rate": 1.828205962602318e-05, "loss": 9.1379, "step": 78520 }, { "epoch": 0.3921695922494944, "grad_norm": 0.09174724668264389, "learning_rate": 1.828055771108163e-05, "loss": 9.1321, "step": 78530 }, { "epoch": 0.3922195310744338, "grad_norm": 0.10035578906536102, "learning_rate": 1.827905579614008e-05, "loss": 9.1408, "step": 78540 }, { "epoch": 0.3922694698993733, "grad_norm": 0.09467300027608871, "learning_rate": 1.827755388119853e-05, "loss": 9.1563, "step": 78550 }, { "epoch": 0.3923194087243127, "grad_norm": 0.09329366683959961, "learning_rate": 1.8276051966256977e-05, "loss": 9.1288, "step": 78560 }, { "epoch": 0.3923693475492522, "grad_norm": 0.09315571188926697, "learning_rate": 1.8274550051315427e-05, "loss": 9.1358, "step": 78570 }, { "epoch": 0.3924192863741916, "grad_norm": 0.09450618177652359, "learning_rate": 1.8273048136373877e-05, "loss": 9.1401, "step": 78580 }, { "epoch": 0.3924692251991311, "grad_norm": 0.09535399079322815, "learning_rate": 1.8271546221432327e-05, "loss": 9.1415, "step": 78590 }, { "epoch": 0.3925191640240705, "grad_norm": 0.09569433331489563, "learning_rate": 1.8270044306490778e-05, "loss": 9.145, "step": 78600 }, { "epoch": 0.39256910284901, "grad_norm": 0.09955570846796036, "learning_rate": 1.8268542391549224e-05, "loss": 9.1322, "step": 78610 }, { "epoch": 0.3926190416739494, "grad_norm": 0.10329762101173401, "learning_rate": 1.8267040476607675e-05, "loss": 9.127, "step": 78620 }, { "epoch": 0.3926689804988889, "grad_norm": 0.09589072316884995, "learning_rate": 1.8265538561666125e-05, "loss": 9.1384, "step": 78630 }, { "epoch": 0.3927189193238283, "grad_norm": 0.09281908720731735, "learning_rate": 1.8264036646724575e-05, "loss": 9.143, "step": 78640 }, { "epoch": 0.3927688581487678, "grad_norm": 0.08548375964164734, "learning_rate": 1.8262534731783025e-05, "loss": 9.1432, "step": 78650 }, { "epoch": 0.3928187969737072, "grad_norm": 0.10105335712432861, "learning_rate": 1.8261032816841472e-05, "loss": 9.1397, "step": 78660 }, { "epoch": 0.3928687357986467, "grad_norm": 0.10008376091718674, "learning_rate": 1.8259530901899922e-05, "loss": 9.1335, "step": 78670 }, { "epoch": 0.3929186746235861, "grad_norm": 0.09541326016187668, "learning_rate": 1.8258028986958372e-05, "loss": 9.1324, "step": 78680 }, { "epoch": 0.39296861344852557, "grad_norm": 0.0945446565747261, "learning_rate": 1.8256527072016822e-05, "loss": 9.1329, "step": 78690 }, { "epoch": 0.393018552273465, "grad_norm": 0.09741361439228058, "learning_rate": 1.8255025157075273e-05, "loss": 9.1452, "step": 78700 }, { "epoch": 0.39306849109840447, "grad_norm": 0.09333933889865875, "learning_rate": 1.825352324213372e-05, "loss": 9.1364, "step": 78710 }, { "epoch": 0.3931184299233439, "grad_norm": 0.09559603035449982, "learning_rate": 1.825202132719217e-05, "loss": 9.1332, "step": 78720 }, { "epoch": 0.39316836874828337, "grad_norm": 0.09066354483366013, "learning_rate": 1.825051941225062e-05, "loss": 9.1269, "step": 78730 }, { "epoch": 0.3932183075732228, "grad_norm": 0.09495797008275986, "learning_rate": 1.824901749730907e-05, "loss": 9.1402, "step": 78740 }, { "epoch": 0.39326824639816227, "grad_norm": 0.08994494378566742, "learning_rate": 1.824751558236752e-05, "loss": 9.145, "step": 78750 }, { "epoch": 0.3933181852231017, "grad_norm": 0.08942119777202606, "learning_rate": 1.8246013667425967e-05, "loss": 9.1393, "step": 78760 }, { "epoch": 0.39336812404804117, "grad_norm": 0.09385786950588226, "learning_rate": 1.8244511752484417e-05, "loss": 9.141, "step": 78770 }, { "epoch": 0.3934180628729806, "grad_norm": 0.09488862007856369, "learning_rate": 1.8243009837542867e-05, "loss": 9.1458, "step": 78780 }, { "epoch": 0.39346800169792007, "grad_norm": 0.0987120270729065, "learning_rate": 1.8241507922601317e-05, "loss": 9.1272, "step": 78790 }, { "epoch": 0.3935179405228595, "grad_norm": 0.09095241129398346, "learning_rate": 1.8240006007659768e-05, "loss": 9.13, "step": 78800 }, { "epoch": 0.39356787934779897, "grad_norm": 0.08821319043636322, "learning_rate": 1.8238504092718218e-05, "loss": 9.1371, "step": 78810 }, { "epoch": 0.3936178181727384, "grad_norm": 0.09783562272787094, "learning_rate": 1.8237002177776665e-05, "loss": 9.1444, "step": 78820 }, { "epoch": 0.39366775699767786, "grad_norm": 0.09780313819646835, "learning_rate": 1.8235500262835115e-05, "loss": 9.1244, "step": 78830 }, { "epoch": 0.3937176958226173, "grad_norm": 0.0926724523305893, "learning_rate": 1.8233998347893565e-05, "loss": 9.134, "step": 78840 }, { "epoch": 0.39376763464755676, "grad_norm": 0.09907516092061996, "learning_rate": 1.8232496432952015e-05, "loss": 9.137, "step": 78850 }, { "epoch": 0.3938175734724962, "grad_norm": 0.09416226297616959, "learning_rate": 1.8230994518010465e-05, "loss": 9.1347, "step": 78860 }, { "epoch": 0.39386751229743566, "grad_norm": 0.09291341155767441, "learning_rate": 1.8229492603068912e-05, "loss": 9.1378, "step": 78870 }, { "epoch": 0.3939174511223751, "grad_norm": 0.09237024933099747, "learning_rate": 1.8227990688127362e-05, "loss": 9.135, "step": 78880 }, { "epoch": 0.39396738994731456, "grad_norm": 0.09598385542631149, "learning_rate": 1.8226488773185812e-05, "loss": 9.1253, "step": 78890 }, { "epoch": 0.394017328772254, "grad_norm": 0.09644147008657455, "learning_rate": 1.8224986858244263e-05, "loss": 9.1379, "step": 78900 }, { "epoch": 0.39406726759719346, "grad_norm": 0.09208100289106369, "learning_rate": 1.8223484943302713e-05, "loss": 9.1481, "step": 78910 }, { "epoch": 0.3941172064221329, "grad_norm": 0.091837577521801, "learning_rate": 1.822198302836116e-05, "loss": 9.142, "step": 78920 }, { "epoch": 0.39416714524707236, "grad_norm": 0.09068531543016434, "learning_rate": 1.822048111341961e-05, "loss": 9.1359, "step": 78930 }, { "epoch": 0.3942170840720118, "grad_norm": 0.09340940415859222, "learning_rate": 1.821897919847806e-05, "loss": 9.1407, "step": 78940 }, { "epoch": 0.39426702289695126, "grad_norm": 0.09838276356458664, "learning_rate": 1.821747728353651e-05, "loss": 9.1273, "step": 78950 }, { "epoch": 0.3943169617218907, "grad_norm": 0.09257405251264572, "learning_rate": 1.821597536859496e-05, "loss": 9.1309, "step": 78960 }, { "epoch": 0.39436690054683016, "grad_norm": 0.09464617073535919, "learning_rate": 1.8214473453653407e-05, "loss": 9.125, "step": 78970 }, { "epoch": 0.3944168393717696, "grad_norm": 0.09580957889556885, "learning_rate": 1.8212971538711857e-05, "loss": 9.1253, "step": 78980 }, { "epoch": 0.39446677819670906, "grad_norm": 0.09709136933088303, "learning_rate": 1.8211469623770308e-05, "loss": 9.1327, "step": 78990 }, { "epoch": 0.3945167170216485, "grad_norm": 0.10155075043439865, "learning_rate": 1.8209967708828758e-05, "loss": 9.1258, "step": 79000 }, { "epoch": 0.39456665584658795, "grad_norm": 0.08959164470434189, "learning_rate": 1.8208465793887208e-05, "loss": 9.1309, "step": 79010 }, { "epoch": 0.3946165946715274, "grad_norm": 0.10539384931325912, "learning_rate": 1.8206963878945655e-05, "loss": 9.1265, "step": 79020 }, { "epoch": 0.39466653349646685, "grad_norm": 0.09576883167028427, "learning_rate": 1.8205461964004105e-05, "loss": 9.1324, "step": 79030 }, { "epoch": 0.3947164723214063, "grad_norm": 0.09333497285842896, "learning_rate": 1.8203960049062555e-05, "loss": 9.1188, "step": 79040 }, { "epoch": 0.39476641114634575, "grad_norm": 0.10127103328704834, "learning_rate": 1.8202458134121005e-05, "loss": 9.1255, "step": 79050 }, { "epoch": 0.3948163499712852, "grad_norm": 0.09161249548196793, "learning_rate": 1.8200956219179455e-05, "loss": 9.1357, "step": 79060 }, { "epoch": 0.39486628879622465, "grad_norm": 0.08713789284229279, "learning_rate": 1.8199454304237902e-05, "loss": 9.1448, "step": 79070 }, { "epoch": 0.3949162276211641, "grad_norm": 0.09979274123907089, "learning_rate": 1.8197952389296352e-05, "loss": 9.1245, "step": 79080 }, { "epoch": 0.39496616644610355, "grad_norm": 0.09479266405105591, "learning_rate": 1.8196450474354803e-05, "loss": 9.1359, "step": 79090 }, { "epoch": 0.39501610527104297, "grad_norm": 0.0914844274520874, "learning_rate": 1.8194948559413253e-05, "loss": 9.1356, "step": 79100 }, { "epoch": 0.3950660440959824, "grad_norm": 0.09772706776857376, "learning_rate": 1.8193446644471703e-05, "loss": 9.1253, "step": 79110 }, { "epoch": 0.39511598292092187, "grad_norm": 0.08755208551883698, "learning_rate": 1.819194472953015e-05, "loss": 9.1353, "step": 79120 }, { "epoch": 0.3951659217458613, "grad_norm": 0.09900221973657608, "learning_rate": 1.8190442814588603e-05, "loss": 9.1268, "step": 79130 }, { "epoch": 0.39521586057080077, "grad_norm": 0.09705707430839539, "learning_rate": 1.818894089964705e-05, "loss": 9.1355, "step": 79140 }, { "epoch": 0.3952657993957402, "grad_norm": 0.09519544243812561, "learning_rate": 1.81874389847055e-05, "loss": 9.1456, "step": 79150 }, { "epoch": 0.39531573822067967, "grad_norm": 0.09430651366710663, "learning_rate": 1.818593706976395e-05, "loss": 9.1446, "step": 79160 }, { "epoch": 0.3953656770456191, "grad_norm": 0.0924086719751358, "learning_rate": 1.8184435154822397e-05, "loss": 9.1273, "step": 79170 }, { "epoch": 0.39541561587055857, "grad_norm": 0.09312140196561813, "learning_rate": 1.818293323988085e-05, "loss": 9.1378, "step": 79180 }, { "epoch": 0.395465554695498, "grad_norm": 0.08896911144256592, "learning_rate": 1.8181431324939298e-05, "loss": 9.1462, "step": 79190 }, { "epoch": 0.39551549352043747, "grad_norm": 0.09490480273962021, "learning_rate": 1.8179929409997748e-05, "loss": 9.1304, "step": 79200 }, { "epoch": 0.3955654323453769, "grad_norm": 0.09386398643255234, "learning_rate": 1.8178427495056198e-05, "loss": 9.1264, "step": 79210 }, { "epoch": 0.39561537117031637, "grad_norm": 0.09263758361339569, "learning_rate": 1.8176925580114645e-05, "loss": 9.1253, "step": 79220 }, { "epoch": 0.3956653099952558, "grad_norm": 0.09182369709014893, "learning_rate": 1.8175423665173098e-05, "loss": 9.1266, "step": 79230 }, { "epoch": 0.39571524882019526, "grad_norm": 0.09554483741521835, "learning_rate": 1.8173921750231545e-05, "loss": 9.1378, "step": 79240 }, { "epoch": 0.3957651876451347, "grad_norm": 0.09417613595724106, "learning_rate": 1.8172419835289995e-05, "loss": 9.1335, "step": 79250 }, { "epoch": 0.39581512647007416, "grad_norm": 0.09561773389577866, "learning_rate": 1.8170917920348445e-05, "loss": 9.1292, "step": 79260 }, { "epoch": 0.3958650652950136, "grad_norm": 0.09833815693855286, "learning_rate": 1.8169416005406892e-05, "loss": 9.1259, "step": 79270 }, { "epoch": 0.39591500411995306, "grad_norm": 0.09367582947015762, "learning_rate": 1.8167914090465346e-05, "loss": 9.1323, "step": 79280 }, { "epoch": 0.3959649429448925, "grad_norm": 0.09364238381385803, "learning_rate": 1.8166412175523793e-05, "loss": 9.1206, "step": 79290 }, { "epoch": 0.39601488176983196, "grad_norm": 0.09396982192993164, "learning_rate": 1.8164910260582243e-05, "loss": 9.1248, "step": 79300 }, { "epoch": 0.3960648205947714, "grad_norm": 0.09186158329248428, "learning_rate": 1.8163408345640693e-05, "loss": 9.1398, "step": 79310 }, { "epoch": 0.39611475941971086, "grad_norm": 0.09493670612573624, "learning_rate": 1.816190643069914e-05, "loss": 9.129, "step": 79320 }, { "epoch": 0.3961646982446503, "grad_norm": 0.0939958244562149, "learning_rate": 1.8160404515757593e-05, "loss": 9.1374, "step": 79330 }, { "epoch": 0.39621463706958976, "grad_norm": 0.09256591647863388, "learning_rate": 1.815890260081604e-05, "loss": 9.1247, "step": 79340 }, { "epoch": 0.3962645758945292, "grad_norm": 0.0958244577050209, "learning_rate": 1.815740068587449e-05, "loss": 9.1239, "step": 79350 }, { "epoch": 0.39631451471946866, "grad_norm": 0.09303276985883713, "learning_rate": 1.815589877093294e-05, "loss": 9.1319, "step": 79360 }, { "epoch": 0.3963644535444081, "grad_norm": 0.10332775115966797, "learning_rate": 1.8154396855991387e-05, "loss": 9.1269, "step": 79370 }, { "epoch": 0.39641439236934756, "grad_norm": 0.09653986990451813, "learning_rate": 1.815289494104984e-05, "loss": 9.1214, "step": 79380 }, { "epoch": 0.396464331194287, "grad_norm": 0.09338979423046112, "learning_rate": 1.8151393026108288e-05, "loss": 9.1155, "step": 79390 }, { "epoch": 0.39651427001922646, "grad_norm": 0.09556922316551208, "learning_rate": 1.8149891111166738e-05, "loss": 9.1303, "step": 79400 }, { "epoch": 0.3965642088441659, "grad_norm": 0.09039589762687683, "learning_rate": 1.8148389196225188e-05, "loss": 9.1323, "step": 79410 }, { "epoch": 0.39661414766910535, "grad_norm": 0.09440034627914429, "learning_rate": 1.8146887281283635e-05, "loss": 9.131, "step": 79420 }, { "epoch": 0.3966640864940448, "grad_norm": 0.0989178940653801, "learning_rate": 1.8145385366342088e-05, "loss": 9.1244, "step": 79430 }, { "epoch": 0.39671402531898425, "grad_norm": 0.0976061150431633, "learning_rate": 1.8143883451400535e-05, "loss": 9.1148, "step": 79440 }, { "epoch": 0.3967639641439237, "grad_norm": 0.09087694436311722, "learning_rate": 1.8142381536458985e-05, "loss": 9.1231, "step": 79450 }, { "epoch": 0.39681390296886315, "grad_norm": 0.08781003206968307, "learning_rate": 1.8140879621517435e-05, "loss": 9.1166, "step": 79460 }, { "epoch": 0.3968638417938026, "grad_norm": 0.08944723010063171, "learning_rate": 1.8139377706575882e-05, "loss": 9.1332, "step": 79470 }, { "epoch": 0.39691378061874205, "grad_norm": 0.09698821604251862, "learning_rate": 1.8137875791634336e-05, "loss": 9.123, "step": 79480 }, { "epoch": 0.3969637194436815, "grad_norm": 0.09338833391666412, "learning_rate": 1.8136373876692783e-05, "loss": 9.1365, "step": 79490 }, { "epoch": 0.39701365826862095, "grad_norm": 0.09354853630065918, "learning_rate": 1.8134871961751236e-05, "loss": 9.1218, "step": 79500 }, { "epoch": 0.39706359709356037, "grad_norm": 0.09762848913669586, "learning_rate": 1.8133370046809683e-05, "loss": 9.1297, "step": 79510 }, { "epoch": 0.39711353591849985, "grad_norm": 0.0980941578745842, "learning_rate": 1.813186813186813e-05, "loss": 9.1269, "step": 79520 }, { "epoch": 0.39716347474343927, "grad_norm": 0.09508627653121948, "learning_rate": 1.8130366216926583e-05, "loss": 9.1146, "step": 79530 }, { "epoch": 0.39721341356837875, "grad_norm": 0.09081006050109863, "learning_rate": 1.812886430198503e-05, "loss": 9.1403, "step": 79540 }, { "epoch": 0.39726335239331817, "grad_norm": 0.09213867783546448, "learning_rate": 1.8127362387043484e-05, "loss": 9.116, "step": 79550 }, { "epoch": 0.39731329121825765, "grad_norm": 0.09143271297216415, "learning_rate": 1.812586047210193e-05, "loss": 9.1232, "step": 79560 }, { "epoch": 0.39736323004319707, "grad_norm": 0.09485948085784912, "learning_rate": 1.8124358557160377e-05, "loss": 9.1199, "step": 79570 }, { "epoch": 0.39741316886813655, "grad_norm": 0.09463505446910858, "learning_rate": 1.812285664221883e-05, "loss": 9.1312, "step": 79580 }, { "epoch": 0.39746310769307597, "grad_norm": 0.09464465081691742, "learning_rate": 1.8121354727277278e-05, "loss": 9.1267, "step": 79590 }, { "epoch": 0.39751304651801544, "grad_norm": 0.09754239022731781, "learning_rate": 1.811985281233573e-05, "loss": 9.1344, "step": 79600 }, { "epoch": 0.39756298534295487, "grad_norm": 0.09760978072881699, "learning_rate": 1.8118350897394178e-05, "loss": 9.1294, "step": 79610 }, { "epoch": 0.39761292416789434, "grad_norm": 0.08997610211372375, "learning_rate": 1.8116848982452625e-05, "loss": 9.1341, "step": 79620 }, { "epoch": 0.39766286299283377, "grad_norm": 0.09305060654878616, "learning_rate": 1.8115347067511078e-05, "loss": 9.1429, "step": 79630 }, { "epoch": 0.39771280181777324, "grad_norm": 0.0855415090918541, "learning_rate": 1.8113845152569525e-05, "loss": 9.1285, "step": 79640 }, { "epoch": 0.39776274064271266, "grad_norm": 0.09722486883401871, "learning_rate": 1.811234323762798e-05, "loss": 9.1218, "step": 79650 }, { "epoch": 0.39781267946765214, "grad_norm": 0.09455563873052597, "learning_rate": 1.8110841322686425e-05, "loss": 9.1177, "step": 79660 }, { "epoch": 0.39786261829259156, "grad_norm": 0.09245886653661728, "learning_rate": 1.8109339407744872e-05, "loss": 9.1312, "step": 79670 }, { "epoch": 0.39791255711753104, "grad_norm": 0.09259022772312164, "learning_rate": 1.8107837492803326e-05, "loss": 9.1313, "step": 79680 }, { "epoch": 0.39796249594247046, "grad_norm": 0.08969958871603012, "learning_rate": 1.8106335577861773e-05, "loss": 9.1278, "step": 79690 }, { "epoch": 0.39801243476740994, "grad_norm": 0.08772096782922745, "learning_rate": 1.8104833662920226e-05, "loss": 9.132, "step": 79700 }, { "epoch": 0.39806237359234936, "grad_norm": 0.0951986312866211, "learning_rate": 1.8103331747978673e-05, "loss": 9.1208, "step": 79710 }, { "epoch": 0.39811231241728884, "grad_norm": 0.09764955192804337, "learning_rate": 1.810182983303712e-05, "loss": 9.1099, "step": 79720 }, { "epoch": 0.39816225124222826, "grad_norm": 0.09765150398015976, "learning_rate": 1.8100327918095573e-05, "loss": 9.1259, "step": 79730 }, { "epoch": 0.39821219006716774, "grad_norm": 0.09225156158208847, "learning_rate": 1.809882600315402e-05, "loss": 9.1319, "step": 79740 }, { "epoch": 0.39826212889210716, "grad_norm": 0.09323499351739883, "learning_rate": 1.8097324088212474e-05, "loss": 9.1295, "step": 79750 }, { "epoch": 0.39831206771704664, "grad_norm": 0.0989677682518959, "learning_rate": 1.809582217327092e-05, "loss": 9.1328, "step": 79760 }, { "epoch": 0.39836200654198606, "grad_norm": 0.09290597587823868, "learning_rate": 1.8094320258329367e-05, "loss": 9.1173, "step": 79770 }, { "epoch": 0.39841194536692554, "grad_norm": 0.09229425340890884, "learning_rate": 1.809281834338782e-05, "loss": 9.1144, "step": 79780 }, { "epoch": 0.39846188419186496, "grad_norm": 0.09045591205358505, "learning_rate": 1.8091316428446268e-05, "loss": 9.1248, "step": 79790 }, { "epoch": 0.39851182301680443, "grad_norm": 0.0939536839723587, "learning_rate": 1.808981451350472e-05, "loss": 9.1254, "step": 79800 }, { "epoch": 0.39856176184174386, "grad_norm": 0.09491851180791855, "learning_rate": 1.8088312598563168e-05, "loss": 9.123, "step": 79810 }, { "epoch": 0.39861170066668333, "grad_norm": 0.08969860523939133, "learning_rate": 1.8086810683621618e-05, "loss": 9.1183, "step": 79820 }, { "epoch": 0.39866163949162275, "grad_norm": 0.10173804312944412, "learning_rate": 1.8085308768680068e-05, "loss": 9.1313, "step": 79830 }, { "epoch": 0.39871157831656223, "grad_norm": 0.0922439694404602, "learning_rate": 1.8083806853738515e-05, "loss": 9.1186, "step": 79840 }, { "epoch": 0.39876151714150165, "grad_norm": 0.08746393769979477, "learning_rate": 1.808230493879697e-05, "loss": 9.1344, "step": 79850 }, { "epoch": 0.39881145596644113, "grad_norm": 0.095425546169281, "learning_rate": 1.8080803023855415e-05, "loss": 9.1175, "step": 79860 }, { "epoch": 0.39886139479138055, "grad_norm": 0.09297501295804977, "learning_rate": 1.8079301108913866e-05, "loss": 9.1252, "step": 79870 }, { "epoch": 0.39891133361632003, "grad_norm": 0.09445119649171829, "learning_rate": 1.8077799193972316e-05, "loss": 9.1152, "step": 79880 }, { "epoch": 0.39896127244125945, "grad_norm": 0.08981538563966751, "learning_rate": 1.8076297279030763e-05, "loss": 9.1123, "step": 79890 }, { "epoch": 0.39901121126619893, "grad_norm": 0.09903477132320404, "learning_rate": 1.8074795364089216e-05, "loss": 9.1247, "step": 79900 }, { "epoch": 0.39906115009113835, "grad_norm": 0.09373074769973755, "learning_rate": 1.8073293449147663e-05, "loss": 9.1176, "step": 79910 }, { "epoch": 0.3991110889160778, "grad_norm": 0.0910414308309555, "learning_rate": 1.8071791534206113e-05, "loss": 9.124, "step": 79920 }, { "epoch": 0.39916102774101725, "grad_norm": 0.09311582893133163, "learning_rate": 1.8070289619264563e-05, "loss": 9.111, "step": 79930 }, { "epoch": 0.3992109665659567, "grad_norm": 0.09247269481420517, "learning_rate": 1.806878770432301e-05, "loss": 9.1145, "step": 79940 }, { "epoch": 0.39926090539089615, "grad_norm": 0.09804600477218628, "learning_rate": 1.8067285789381464e-05, "loss": 9.1214, "step": 79950 }, { "epoch": 0.3993108442158356, "grad_norm": 0.09990818053483963, "learning_rate": 1.806578387443991e-05, "loss": 9.1349, "step": 79960 }, { "epoch": 0.39936078304077505, "grad_norm": 0.09712336957454681, "learning_rate": 1.806428195949836e-05, "loss": 9.1245, "step": 79970 }, { "epoch": 0.3994107218657145, "grad_norm": 0.09431684017181396, "learning_rate": 1.806278004455681e-05, "loss": 9.1178, "step": 79980 }, { "epoch": 0.39946066069065395, "grad_norm": 0.10282929986715317, "learning_rate": 1.8061278129615258e-05, "loss": 9.1093, "step": 79990 }, { "epoch": 0.3995105995155934, "grad_norm": 0.09497708082199097, "learning_rate": 1.805977621467371e-05, "loss": 9.1184, "step": 80000 }, { "epoch": 0.39956053834053284, "grad_norm": 0.09546716511249542, "learning_rate": 1.8058274299732158e-05, "loss": 9.1205, "step": 80010 }, { "epoch": 0.3996104771654723, "grad_norm": 0.0929088369011879, "learning_rate": 1.8056772384790608e-05, "loss": 9.1212, "step": 80020 }, { "epoch": 0.39966041599041174, "grad_norm": 0.09245330840349197, "learning_rate": 1.8055270469849058e-05, "loss": 9.1023, "step": 80030 }, { "epoch": 0.3997103548153512, "grad_norm": 0.09106311202049255, "learning_rate": 1.8053768554907505e-05, "loss": 9.1202, "step": 80040 }, { "epoch": 0.39976029364029064, "grad_norm": 0.10017859935760498, "learning_rate": 1.805226663996596e-05, "loss": 9.126, "step": 80050 }, { "epoch": 0.3998102324652301, "grad_norm": 0.09317342936992645, "learning_rate": 1.8050764725024405e-05, "loss": 9.1289, "step": 80060 }, { "epoch": 0.39986017129016954, "grad_norm": 0.0955187976360321, "learning_rate": 1.8049262810082856e-05, "loss": 9.1126, "step": 80070 }, { "epoch": 0.399910110115109, "grad_norm": 0.09439694136381149, "learning_rate": 1.8047760895141306e-05, "loss": 9.1324, "step": 80080 }, { "epoch": 0.39996004894004844, "grad_norm": 0.09123414754867554, "learning_rate": 1.8046258980199753e-05, "loss": 9.1342, "step": 80090 }, { "epoch": 0.40000998776498786, "grad_norm": 0.09292822331190109, "learning_rate": 1.8044757065258206e-05, "loss": 9.111, "step": 80100 }, { "epoch": 0.40005992658992734, "grad_norm": 0.09875541925430298, "learning_rate": 1.8043255150316653e-05, "loss": 9.1114, "step": 80110 }, { "epoch": 0.40010986541486676, "grad_norm": 0.09288553893566132, "learning_rate": 1.8041753235375103e-05, "loss": 9.1085, "step": 80120 }, { "epoch": 0.40015980423980624, "grad_norm": 0.09572578966617584, "learning_rate": 1.8040251320433553e-05, "loss": 9.1151, "step": 80130 }, { "epoch": 0.40020974306474566, "grad_norm": 0.09946201741695404, "learning_rate": 1.8038749405492003e-05, "loss": 9.1163, "step": 80140 }, { "epoch": 0.40025968188968514, "grad_norm": 0.09416273236274719, "learning_rate": 1.8037247490550454e-05, "loss": 9.1196, "step": 80150 }, { "epoch": 0.40030962071462456, "grad_norm": 0.09236134588718414, "learning_rate": 1.80357455756089e-05, "loss": 9.1084, "step": 80160 }, { "epoch": 0.40035955953956404, "grad_norm": 0.09351241588592529, "learning_rate": 1.803424366066735e-05, "loss": 9.1082, "step": 80170 }, { "epoch": 0.40040949836450346, "grad_norm": 0.09657840430736542, "learning_rate": 1.80327417457258e-05, "loss": 9.1077, "step": 80180 }, { "epoch": 0.40045943718944293, "grad_norm": 0.09256424009799957, "learning_rate": 1.803123983078425e-05, "loss": 9.1178, "step": 80190 }, { "epoch": 0.40050937601438236, "grad_norm": 0.09746617078781128, "learning_rate": 1.80297379158427e-05, "loss": 9.1223, "step": 80200 }, { "epoch": 0.40055931483932183, "grad_norm": 0.09331115335226059, "learning_rate": 1.8028236000901148e-05, "loss": 9.1215, "step": 80210 }, { "epoch": 0.40060925366426126, "grad_norm": 0.09966427087783813, "learning_rate": 1.8026734085959598e-05, "loss": 9.1028, "step": 80220 }, { "epoch": 0.40065919248920073, "grad_norm": 0.09679210931062698, "learning_rate": 1.8025232171018048e-05, "loss": 9.1186, "step": 80230 }, { "epoch": 0.40070913131414015, "grad_norm": 0.09198209643363953, "learning_rate": 1.80237302560765e-05, "loss": 9.1265, "step": 80240 }, { "epoch": 0.40075907013907963, "grad_norm": 0.09457174688577652, "learning_rate": 1.802222834113495e-05, "loss": 9.1212, "step": 80250 }, { "epoch": 0.40080900896401905, "grad_norm": 0.09352682530879974, "learning_rate": 1.8020726426193395e-05, "loss": 9.1148, "step": 80260 }, { "epoch": 0.40085894778895853, "grad_norm": 0.0890619084239006, "learning_rate": 1.8019224511251846e-05, "loss": 9.126, "step": 80270 }, { "epoch": 0.40090888661389795, "grad_norm": 0.09067501127719879, "learning_rate": 1.8017722596310296e-05, "loss": 9.1201, "step": 80280 }, { "epoch": 0.40095882543883743, "grad_norm": 0.09653503447771072, "learning_rate": 1.8016220681368746e-05, "loss": 9.1192, "step": 80290 }, { "epoch": 0.40100876426377685, "grad_norm": 0.09168026596307755, "learning_rate": 1.8014718766427196e-05, "loss": 9.1156, "step": 80300 }, { "epoch": 0.40105870308871633, "grad_norm": 0.09609059989452362, "learning_rate": 1.8013216851485643e-05, "loss": 9.1139, "step": 80310 }, { "epoch": 0.40110864191365575, "grad_norm": 0.09476880729198456, "learning_rate": 1.8011714936544093e-05, "loss": 9.1134, "step": 80320 }, { "epoch": 0.4011585807385952, "grad_norm": 0.10273495316505432, "learning_rate": 1.8010213021602543e-05, "loss": 9.1195, "step": 80330 }, { "epoch": 0.40120851956353465, "grad_norm": 0.09645021706819534, "learning_rate": 1.8008711106660993e-05, "loss": 9.1238, "step": 80340 }, { "epoch": 0.4012584583884741, "grad_norm": 0.09573419392108917, "learning_rate": 1.8007209191719444e-05, "loss": 9.1229, "step": 80350 }, { "epoch": 0.40130839721341355, "grad_norm": 0.09707140922546387, "learning_rate": 1.800570727677789e-05, "loss": 9.1014, "step": 80360 }, { "epoch": 0.401358336038353, "grad_norm": 0.08955365419387817, "learning_rate": 1.800420536183634e-05, "loss": 9.1242, "step": 80370 }, { "epoch": 0.40140827486329245, "grad_norm": 0.09678129851818085, "learning_rate": 1.800270344689479e-05, "loss": 9.1202, "step": 80380 }, { "epoch": 0.4014582136882319, "grad_norm": 0.09080725908279419, "learning_rate": 1.800120153195324e-05, "loss": 9.1198, "step": 80390 }, { "epoch": 0.40150815251317135, "grad_norm": 0.09258688241243362, "learning_rate": 1.799969961701169e-05, "loss": 9.1001, "step": 80400 }, { "epoch": 0.4015580913381108, "grad_norm": 0.09553803503513336, "learning_rate": 1.7998197702070138e-05, "loss": 9.1137, "step": 80410 }, { "epoch": 0.40160803016305024, "grad_norm": 0.09325069189071655, "learning_rate": 1.7996695787128588e-05, "loss": 9.1155, "step": 80420 }, { "epoch": 0.4016579689879897, "grad_norm": 0.09590888768434525, "learning_rate": 1.799519387218704e-05, "loss": 9.1142, "step": 80430 }, { "epoch": 0.40170790781292914, "grad_norm": 0.08667058497667313, "learning_rate": 1.799369195724549e-05, "loss": 9.1148, "step": 80440 }, { "epoch": 0.4017578466378686, "grad_norm": 0.088908351957798, "learning_rate": 1.799219004230394e-05, "loss": 9.1099, "step": 80450 }, { "epoch": 0.40180778546280804, "grad_norm": 0.09739526361227036, "learning_rate": 1.799068812736239e-05, "loss": 9.114, "step": 80460 }, { "epoch": 0.4018577242877475, "grad_norm": 0.09346520155668259, "learning_rate": 1.7989186212420836e-05, "loss": 9.1227, "step": 80470 }, { "epoch": 0.40190766311268694, "grad_norm": 0.09403660148382187, "learning_rate": 1.7987684297479286e-05, "loss": 9.123, "step": 80480 }, { "epoch": 0.4019576019376264, "grad_norm": 0.09151037782430649, "learning_rate": 1.7986182382537736e-05, "loss": 9.109, "step": 80490 }, { "epoch": 0.40200754076256584, "grad_norm": 0.09313507378101349, "learning_rate": 1.7984680467596186e-05, "loss": 9.102, "step": 80500 }, { "epoch": 0.4020574795875053, "grad_norm": 0.09583766758441925, "learning_rate": 1.7983178552654636e-05, "loss": 9.095, "step": 80510 }, { "epoch": 0.40210741841244474, "grad_norm": 0.10357358306646347, "learning_rate": 1.7981676637713083e-05, "loss": 9.1297, "step": 80520 }, { "epoch": 0.4021573572373842, "grad_norm": 0.09157849848270416, "learning_rate": 1.7980174722771533e-05, "loss": 9.0986, "step": 80530 }, { "epoch": 0.40220729606232364, "grad_norm": 0.0905398353934288, "learning_rate": 1.7978672807829984e-05, "loss": 9.1144, "step": 80540 }, { "epoch": 0.4022572348872631, "grad_norm": 0.09321150183677673, "learning_rate": 1.7977170892888434e-05, "loss": 9.1098, "step": 80550 }, { "epoch": 0.40230717371220254, "grad_norm": 0.09091385453939438, "learning_rate": 1.7975668977946884e-05, "loss": 9.1172, "step": 80560 }, { "epoch": 0.402357112537142, "grad_norm": 0.09568551182746887, "learning_rate": 1.797416706300533e-05, "loss": 9.1055, "step": 80570 }, { "epoch": 0.40240705136208144, "grad_norm": 0.08929819613695145, "learning_rate": 1.797266514806378e-05, "loss": 9.1045, "step": 80580 }, { "epoch": 0.4024569901870209, "grad_norm": 0.09343628585338593, "learning_rate": 1.797116323312223e-05, "loss": 9.0965, "step": 80590 }, { "epoch": 0.40250692901196033, "grad_norm": 0.0918562188744545, "learning_rate": 1.796966131818068e-05, "loss": 9.103, "step": 80600 }, { "epoch": 0.4025568678368998, "grad_norm": 0.09256646037101746, "learning_rate": 1.796815940323913e-05, "loss": 9.1031, "step": 80610 }, { "epoch": 0.40260680666183923, "grad_norm": 0.08834106475114822, "learning_rate": 1.7966657488297578e-05, "loss": 9.1066, "step": 80620 }, { "epoch": 0.4026567454867787, "grad_norm": 0.09646911174058914, "learning_rate": 1.796515557335603e-05, "loss": 9.1038, "step": 80630 }, { "epoch": 0.40270668431171813, "grad_norm": 0.09727106243371964, "learning_rate": 1.796365365841448e-05, "loss": 9.1046, "step": 80640 }, { "epoch": 0.4027566231366576, "grad_norm": 0.0980573520064354, "learning_rate": 1.796215174347293e-05, "loss": 9.1105, "step": 80650 }, { "epoch": 0.40280656196159703, "grad_norm": 0.0933801680803299, "learning_rate": 1.796064982853138e-05, "loss": 9.1102, "step": 80660 }, { "epoch": 0.4028565007865365, "grad_norm": 0.09309963136911392, "learning_rate": 1.7959147913589826e-05, "loss": 9.1067, "step": 80670 }, { "epoch": 0.40290643961147593, "grad_norm": 0.09479737281799316, "learning_rate": 1.7957645998648276e-05, "loss": 9.118, "step": 80680 }, { "epoch": 0.4029563784364154, "grad_norm": 0.08766873925924301, "learning_rate": 1.7956144083706726e-05, "loss": 9.1088, "step": 80690 }, { "epoch": 0.40300631726135483, "grad_norm": 0.10010071843862534, "learning_rate": 1.7954642168765176e-05, "loss": 9.1077, "step": 80700 }, { "epoch": 0.4030562560862943, "grad_norm": 0.09243807941675186, "learning_rate": 1.7953140253823626e-05, "loss": 9.1047, "step": 80710 }, { "epoch": 0.40310619491123373, "grad_norm": 0.0926956832408905, "learning_rate": 1.7951638338882073e-05, "loss": 9.1029, "step": 80720 }, { "epoch": 0.4031561337361732, "grad_norm": 0.09393865615129471, "learning_rate": 1.7950136423940523e-05, "loss": 9.1034, "step": 80730 }, { "epoch": 0.4032060725611126, "grad_norm": 0.09327403455972672, "learning_rate": 1.7948634508998974e-05, "loss": 9.1105, "step": 80740 }, { "epoch": 0.4032560113860521, "grad_norm": 0.09919864684343338, "learning_rate": 1.7947132594057424e-05, "loss": 9.1182, "step": 80750 }, { "epoch": 0.4033059502109915, "grad_norm": 0.09142395853996277, "learning_rate": 1.7945630679115874e-05, "loss": 9.1112, "step": 80760 }, { "epoch": 0.403355889035931, "grad_norm": 0.09137079864740372, "learning_rate": 1.794412876417432e-05, "loss": 9.1173, "step": 80770 }, { "epoch": 0.4034058278608704, "grad_norm": 0.09005033224821091, "learning_rate": 1.7942626849232774e-05, "loss": 9.116, "step": 80780 }, { "epoch": 0.4034557666858099, "grad_norm": 0.09195708483457565, "learning_rate": 1.794112493429122e-05, "loss": 9.1215, "step": 80790 }, { "epoch": 0.4035057055107493, "grad_norm": 0.09235309809446335, "learning_rate": 1.793962301934967e-05, "loss": 9.1151, "step": 80800 }, { "epoch": 0.4035556443356888, "grad_norm": 0.09343268722295761, "learning_rate": 1.793812110440812e-05, "loss": 9.1282, "step": 80810 }, { "epoch": 0.4036055831606282, "grad_norm": 0.09357087314128876, "learning_rate": 1.793661918946657e-05, "loss": 9.1123, "step": 80820 }, { "epoch": 0.4036555219855677, "grad_norm": 0.08820904046297073, "learning_rate": 1.7935117274525022e-05, "loss": 9.1184, "step": 80830 }, { "epoch": 0.4037054608105071, "grad_norm": 0.08932943642139435, "learning_rate": 1.793361535958347e-05, "loss": 9.1211, "step": 80840 }, { "epoch": 0.4037553996354466, "grad_norm": 0.09997081011533737, "learning_rate": 1.793211344464192e-05, "loss": 9.0972, "step": 80850 }, { "epoch": 0.403805338460386, "grad_norm": 0.09369473904371262, "learning_rate": 1.793061152970037e-05, "loss": 9.1223, "step": 80860 }, { "epoch": 0.4038552772853255, "grad_norm": 0.09159389138221741, "learning_rate": 1.792910961475882e-05, "loss": 9.1404, "step": 80870 }, { "epoch": 0.4039052161102649, "grad_norm": 0.09258721768856049, "learning_rate": 1.792760769981727e-05, "loss": 9.1047, "step": 80880 }, { "epoch": 0.4039551549352044, "grad_norm": 0.0894956886768341, "learning_rate": 1.7926105784875716e-05, "loss": 9.1137, "step": 80890 }, { "epoch": 0.4040050937601438, "grad_norm": 0.09068527817726135, "learning_rate": 1.7924603869934166e-05, "loss": 9.1043, "step": 80900 }, { "epoch": 0.4040550325850833, "grad_norm": 0.09284330904483795, "learning_rate": 1.7923101954992616e-05, "loss": 9.1099, "step": 80910 }, { "epoch": 0.4041049714100227, "grad_norm": 0.10179001837968826, "learning_rate": 1.7921600040051067e-05, "loss": 9.0992, "step": 80920 }, { "epoch": 0.4041549102349622, "grad_norm": 0.09713611751794815, "learning_rate": 1.7920098125109517e-05, "loss": 9.1143, "step": 80930 }, { "epoch": 0.4042048490599016, "grad_norm": 0.10156739503145218, "learning_rate": 1.7918596210167964e-05, "loss": 9.1069, "step": 80940 }, { "epoch": 0.4042547878848411, "grad_norm": 0.09405851364135742, "learning_rate": 1.7917094295226414e-05, "loss": 9.1072, "step": 80950 }, { "epoch": 0.4043047267097805, "grad_norm": 0.0946517214179039, "learning_rate": 1.7915592380284864e-05, "loss": 9.1207, "step": 80960 }, { "epoch": 0.40435466553472, "grad_norm": 0.09372934699058533, "learning_rate": 1.7914090465343314e-05, "loss": 9.1101, "step": 80970 }, { "epoch": 0.4044046043596594, "grad_norm": 0.08745318651199341, "learning_rate": 1.7912588550401764e-05, "loss": 9.1133, "step": 80980 }, { "epoch": 0.4044545431845989, "grad_norm": 0.09735770523548126, "learning_rate": 1.791108663546021e-05, "loss": 9.1006, "step": 80990 }, { "epoch": 0.4045044820095383, "grad_norm": 0.10448341071605682, "learning_rate": 1.790958472051866e-05, "loss": 9.0897, "step": 81000 }, { "epoch": 0.4045544208344778, "grad_norm": 0.09520471096038818, "learning_rate": 1.790808280557711e-05, "loss": 9.1102, "step": 81010 }, { "epoch": 0.4046043596594172, "grad_norm": 0.09410350024700165, "learning_rate": 1.790658089063556e-05, "loss": 9.1109, "step": 81020 }, { "epoch": 0.4046542984843567, "grad_norm": 0.09858789294958115, "learning_rate": 1.7905078975694012e-05, "loss": 9.1101, "step": 81030 }, { "epoch": 0.4047042373092961, "grad_norm": 0.09166577458381653, "learning_rate": 1.790357706075246e-05, "loss": 9.1214, "step": 81040 }, { "epoch": 0.4047541761342356, "grad_norm": 0.0967087671160698, "learning_rate": 1.790207514581091e-05, "loss": 9.1012, "step": 81050 }, { "epoch": 0.404804114959175, "grad_norm": 0.09214182943105698, "learning_rate": 1.790057323086936e-05, "loss": 9.1048, "step": 81060 }, { "epoch": 0.4048540537841145, "grad_norm": 0.09241543710231781, "learning_rate": 1.789907131592781e-05, "loss": 9.1192, "step": 81070 }, { "epoch": 0.4049039926090539, "grad_norm": 0.09916938096284866, "learning_rate": 1.789756940098626e-05, "loss": 9.101, "step": 81080 }, { "epoch": 0.40495393143399333, "grad_norm": 0.09278815984725952, "learning_rate": 1.7896067486044706e-05, "loss": 9.1098, "step": 81090 }, { "epoch": 0.4050038702589328, "grad_norm": 0.09353374689817429, "learning_rate": 1.789456557110316e-05, "loss": 9.1081, "step": 81100 }, { "epoch": 0.40505380908387223, "grad_norm": 0.09639580547809601, "learning_rate": 1.7893063656161606e-05, "loss": 9.0991, "step": 81110 }, { "epoch": 0.4051037479088117, "grad_norm": 0.09381458908319473, "learning_rate": 1.7891561741220057e-05, "loss": 9.1231, "step": 81120 }, { "epoch": 0.40515368673375113, "grad_norm": 0.09496497362852097, "learning_rate": 1.7890059826278507e-05, "loss": 9.1022, "step": 81130 }, { "epoch": 0.4052036255586906, "grad_norm": 0.09724625945091248, "learning_rate": 1.7888557911336954e-05, "loss": 9.1116, "step": 81140 }, { "epoch": 0.40525356438363, "grad_norm": 0.09216088056564331, "learning_rate": 1.7887055996395407e-05, "loss": 9.1035, "step": 81150 }, { "epoch": 0.4053035032085695, "grad_norm": 0.09402379393577576, "learning_rate": 1.7885554081453854e-05, "loss": 9.1074, "step": 81160 }, { "epoch": 0.4053534420335089, "grad_norm": 0.09243787080049515, "learning_rate": 1.7884052166512304e-05, "loss": 9.0979, "step": 81170 }, { "epoch": 0.4054033808584484, "grad_norm": 0.0954827070236206, "learning_rate": 1.7882550251570754e-05, "loss": 9.104, "step": 81180 }, { "epoch": 0.4054533196833878, "grad_norm": 0.09604278206825256, "learning_rate": 1.78810483366292e-05, "loss": 9.0999, "step": 81190 }, { "epoch": 0.4055032585083273, "grad_norm": 0.09881725907325745, "learning_rate": 1.7879546421687655e-05, "loss": 9.1049, "step": 81200 }, { "epoch": 0.4055531973332667, "grad_norm": 0.09090153127908707, "learning_rate": 1.78780445067461e-05, "loss": 9.1028, "step": 81210 }, { "epoch": 0.4056031361582062, "grad_norm": 0.09422878175973892, "learning_rate": 1.787654259180455e-05, "loss": 9.0912, "step": 81220 }, { "epoch": 0.4056530749831456, "grad_norm": 0.09033548831939697, "learning_rate": 1.7875040676863002e-05, "loss": 9.1124, "step": 81230 }, { "epoch": 0.4057030138080851, "grad_norm": 0.08588909357786179, "learning_rate": 1.787353876192145e-05, "loss": 9.0967, "step": 81240 }, { "epoch": 0.4057529526330245, "grad_norm": 0.0940273180603981, "learning_rate": 1.7872036846979902e-05, "loss": 9.1105, "step": 81250 }, { "epoch": 0.405802891457964, "grad_norm": 0.0950455591082573, "learning_rate": 1.787053493203835e-05, "loss": 9.1095, "step": 81260 }, { "epoch": 0.4058528302829034, "grad_norm": 0.09447299689054489, "learning_rate": 1.78690330170968e-05, "loss": 9.1007, "step": 81270 }, { "epoch": 0.4059027691078429, "grad_norm": 0.09144210070371628, "learning_rate": 1.786753110215525e-05, "loss": 9.1033, "step": 81280 }, { "epoch": 0.4059527079327823, "grad_norm": 0.08806926012039185, "learning_rate": 1.7866029187213696e-05, "loss": 9.105, "step": 81290 }, { "epoch": 0.4060026467577218, "grad_norm": 0.09366438537836075, "learning_rate": 1.786452727227215e-05, "loss": 9.0969, "step": 81300 }, { "epoch": 0.4060525855826612, "grad_norm": 0.0918484702706337, "learning_rate": 1.7863025357330596e-05, "loss": 9.1047, "step": 81310 }, { "epoch": 0.4061025244076007, "grad_norm": 0.09249420464038849, "learning_rate": 1.7861523442389047e-05, "loss": 9.0934, "step": 81320 }, { "epoch": 0.4061524632325401, "grad_norm": 0.09651041775941849, "learning_rate": 1.7860021527447497e-05, "loss": 9.1173, "step": 81330 }, { "epoch": 0.4062024020574796, "grad_norm": 0.09468764811754227, "learning_rate": 1.7858519612505944e-05, "loss": 9.0975, "step": 81340 }, { "epoch": 0.406252340882419, "grad_norm": 0.09159006178379059, "learning_rate": 1.7857017697564397e-05, "loss": 9.108, "step": 81350 }, { "epoch": 0.4063022797073585, "grad_norm": 0.08911147713661194, "learning_rate": 1.7855515782622844e-05, "loss": 9.1011, "step": 81360 }, { "epoch": 0.4063522185322979, "grad_norm": 0.09557855129241943, "learning_rate": 1.7854013867681294e-05, "loss": 9.1114, "step": 81370 }, { "epoch": 0.4064021573572374, "grad_norm": 0.09145300090312958, "learning_rate": 1.7852511952739744e-05, "loss": 9.0937, "step": 81380 }, { "epoch": 0.4064520961821768, "grad_norm": 0.09059559553861618, "learning_rate": 1.785101003779819e-05, "loss": 9.1113, "step": 81390 }, { "epoch": 0.4065020350071163, "grad_norm": 0.09117017686367035, "learning_rate": 1.7849508122856645e-05, "loss": 9.1032, "step": 81400 }, { "epoch": 0.4065519738320557, "grad_norm": 0.09077339619398117, "learning_rate": 1.784800620791509e-05, "loss": 9.102, "step": 81410 }, { "epoch": 0.4066019126569952, "grad_norm": 0.0927463248372078, "learning_rate": 1.7846504292973545e-05, "loss": 9.1111, "step": 81420 }, { "epoch": 0.4066518514819346, "grad_norm": 0.09298672527074814, "learning_rate": 1.7845002378031992e-05, "loss": 9.1062, "step": 81430 }, { "epoch": 0.4067017903068741, "grad_norm": 0.09268884360790253, "learning_rate": 1.784350046309044e-05, "loss": 9.0985, "step": 81440 }, { "epoch": 0.4067517291318135, "grad_norm": 0.09739190340042114, "learning_rate": 1.7841998548148892e-05, "loss": 9.1097, "step": 81450 }, { "epoch": 0.406801667956753, "grad_norm": 0.08956177532672882, "learning_rate": 1.784049663320734e-05, "loss": 9.104, "step": 81460 }, { "epoch": 0.4068516067816924, "grad_norm": 0.09477641433477402, "learning_rate": 1.7838994718265792e-05, "loss": 9.1018, "step": 81470 }, { "epoch": 0.4069015456066319, "grad_norm": 0.09133501350879669, "learning_rate": 1.783749280332424e-05, "loss": 9.1138, "step": 81480 }, { "epoch": 0.4069514844315713, "grad_norm": 0.09000437706708908, "learning_rate": 1.7835990888382686e-05, "loss": 9.1056, "step": 81490 }, { "epoch": 0.4070014232565108, "grad_norm": 0.09407982975244522, "learning_rate": 1.783448897344114e-05, "loss": 9.1159, "step": 81500 }, { "epoch": 0.4070513620814502, "grad_norm": 0.09220529347658157, "learning_rate": 1.7832987058499586e-05, "loss": 9.0999, "step": 81510 }, { "epoch": 0.4071013009063897, "grad_norm": 0.10050280392169952, "learning_rate": 1.783148514355804e-05, "loss": 9.0938, "step": 81520 }, { "epoch": 0.4071512397313291, "grad_norm": 0.09240719676017761, "learning_rate": 1.7829983228616487e-05, "loss": 9.1099, "step": 81530 }, { "epoch": 0.4072011785562686, "grad_norm": 0.09365271776914597, "learning_rate": 1.7828481313674934e-05, "loss": 9.105, "step": 81540 }, { "epoch": 0.407251117381208, "grad_norm": 0.0895438864827156, "learning_rate": 1.7826979398733387e-05, "loss": 9.0933, "step": 81550 }, { "epoch": 0.4073010562061475, "grad_norm": 0.09172806888818741, "learning_rate": 1.7825477483791834e-05, "loss": 9.0977, "step": 81560 }, { "epoch": 0.4073509950310869, "grad_norm": 0.09493505954742432, "learning_rate": 1.7823975568850287e-05, "loss": 9.1043, "step": 81570 }, { "epoch": 0.4074009338560264, "grad_norm": 0.08669907599687576, "learning_rate": 1.7822473653908734e-05, "loss": 9.0986, "step": 81580 }, { "epoch": 0.4074508726809658, "grad_norm": 0.09328383207321167, "learning_rate": 1.782097173896718e-05, "loss": 9.0989, "step": 81590 }, { "epoch": 0.4075008115059053, "grad_norm": 0.09149834513664246, "learning_rate": 1.7819469824025635e-05, "loss": 9.114, "step": 81600 }, { "epoch": 0.4075507503308447, "grad_norm": 0.08791434019804001, "learning_rate": 1.781796790908408e-05, "loss": 9.1065, "step": 81610 }, { "epoch": 0.4076006891557842, "grad_norm": 0.09626881778240204, "learning_rate": 1.7816465994142535e-05, "loss": 9.0977, "step": 81620 }, { "epoch": 0.4076506279807236, "grad_norm": 0.08901573717594147, "learning_rate": 1.7814964079200982e-05, "loss": 9.1001, "step": 81630 }, { "epoch": 0.4077005668056631, "grad_norm": 0.09143085032701492, "learning_rate": 1.781346216425943e-05, "loss": 9.0981, "step": 81640 }, { "epoch": 0.4077505056306025, "grad_norm": 0.09885165840387344, "learning_rate": 1.7811960249317882e-05, "loss": 9.0968, "step": 81650 }, { "epoch": 0.407800444455542, "grad_norm": 0.09109603613615036, "learning_rate": 1.781045833437633e-05, "loss": 9.0985, "step": 81660 }, { "epoch": 0.4078503832804814, "grad_norm": 0.09030817449092865, "learning_rate": 1.7808956419434783e-05, "loss": 9.1018, "step": 81670 }, { "epoch": 0.4079003221054209, "grad_norm": 0.09494989365339279, "learning_rate": 1.780745450449323e-05, "loss": 9.1095, "step": 81680 }, { "epoch": 0.4079502609303603, "grad_norm": 0.09289959818124771, "learning_rate": 1.7805952589551676e-05, "loss": 9.0857, "step": 81690 }, { "epoch": 0.4080001997552998, "grad_norm": 0.09349525719881058, "learning_rate": 1.780445067461013e-05, "loss": 9.097, "step": 81700 }, { "epoch": 0.4080501385802392, "grad_norm": 0.08992131799459457, "learning_rate": 1.7802948759668576e-05, "loss": 9.0873, "step": 81710 }, { "epoch": 0.4081000774051787, "grad_norm": 0.09782059490680695, "learning_rate": 1.780144684472703e-05, "loss": 9.0824, "step": 81720 }, { "epoch": 0.4081500162301181, "grad_norm": 0.09610580652952194, "learning_rate": 1.7799944929785477e-05, "loss": 9.1044, "step": 81730 }, { "epoch": 0.4081999550550576, "grad_norm": 0.09353209286928177, "learning_rate": 1.7798443014843927e-05, "loss": 9.0979, "step": 81740 }, { "epoch": 0.408249893879997, "grad_norm": 0.08947628736495972, "learning_rate": 1.7796941099902377e-05, "loss": 9.0948, "step": 81750 }, { "epoch": 0.40829983270493647, "grad_norm": 0.09303894639015198, "learning_rate": 1.7795439184960824e-05, "loss": 9.0996, "step": 81760 }, { "epoch": 0.4083497715298759, "grad_norm": 0.09388189017772675, "learning_rate": 1.7793937270019278e-05, "loss": 9.1086, "step": 81770 }, { "epoch": 0.40839971035481537, "grad_norm": 0.09600455313920975, "learning_rate": 1.7792435355077724e-05, "loss": 9.0987, "step": 81780 }, { "epoch": 0.4084496491797548, "grad_norm": 0.091989666223526, "learning_rate": 1.7790933440136174e-05, "loss": 9.1066, "step": 81790 }, { "epoch": 0.40849958800469427, "grad_norm": 0.09617997705936432, "learning_rate": 1.7789431525194625e-05, "loss": 9.1038, "step": 81800 }, { "epoch": 0.4085495268296337, "grad_norm": 0.09295359253883362, "learning_rate": 1.778792961025307e-05, "loss": 9.0986, "step": 81810 }, { "epoch": 0.40859946565457317, "grad_norm": 0.09218577295541763, "learning_rate": 1.7786427695311525e-05, "loss": 9.1062, "step": 81820 }, { "epoch": 0.4086494044795126, "grad_norm": 0.09626112878322601, "learning_rate": 1.7784925780369972e-05, "loss": 9.0865, "step": 81830 }, { "epoch": 0.40869934330445207, "grad_norm": 0.08941785991191864, "learning_rate": 1.7783423865428422e-05, "loss": 9.0962, "step": 81840 }, { "epoch": 0.4087492821293915, "grad_norm": 0.0907067134976387, "learning_rate": 1.7781921950486872e-05, "loss": 9.1065, "step": 81850 }, { "epoch": 0.40879922095433097, "grad_norm": 0.08822061866521835, "learning_rate": 1.778042003554532e-05, "loss": 9.1054, "step": 81860 }, { "epoch": 0.4088491597792704, "grad_norm": 0.08900550752878189, "learning_rate": 1.7778918120603773e-05, "loss": 9.1029, "step": 81870 }, { "epoch": 0.40889909860420987, "grad_norm": 0.09885650873184204, "learning_rate": 1.777741620566222e-05, "loss": 9.0903, "step": 81880 }, { "epoch": 0.4089490374291493, "grad_norm": 0.09264232963323593, "learning_rate": 1.777591429072067e-05, "loss": 9.0968, "step": 81890 }, { "epoch": 0.40899897625408876, "grad_norm": 0.0919133648276329, "learning_rate": 1.777441237577912e-05, "loss": 9.0921, "step": 81900 }, { "epoch": 0.4090489150790282, "grad_norm": 0.09178303927183151, "learning_rate": 1.7772910460837566e-05, "loss": 9.0978, "step": 81910 }, { "epoch": 0.40909885390396766, "grad_norm": 0.09064862877130508, "learning_rate": 1.777140854589602e-05, "loss": 9.1053, "step": 81920 }, { "epoch": 0.4091487927289071, "grad_norm": 0.0949510857462883, "learning_rate": 1.7769906630954467e-05, "loss": 9.1046, "step": 81930 }, { "epoch": 0.40919873155384656, "grad_norm": 0.09381455928087234, "learning_rate": 1.7768404716012917e-05, "loss": 9.1019, "step": 81940 }, { "epoch": 0.409248670378786, "grad_norm": 0.0917896181344986, "learning_rate": 1.7766902801071367e-05, "loss": 9.0914, "step": 81950 }, { "epoch": 0.40929860920372546, "grad_norm": 0.09546224027872086, "learning_rate": 1.7765400886129814e-05, "loss": 9.0897, "step": 81960 }, { "epoch": 0.4093485480286649, "grad_norm": 0.09134580194950104, "learning_rate": 1.7763898971188268e-05, "loss": 9.0924, "step": 81970 }, { "epoch": 0.40939848685360436, "grad_norm": 0.09422563761472702, "learning_rate": 1.7762397056246714e-05, "loss": 9.1046, "step": 81980 }, { "epoch": 0.4094484256785438, "grad_norm": 0.09437835216522217, "learning_rate": 1.7760895141305164e-05, "loss": 9.1111, "step": 81990 }, { "epoch": 0.40949836450348326, "grad_norm": 0.09388506412506104, "learning_rate": 1.7759393226363615e-05, "loss": 9.0865, "step": 82000 }, { "epoch": 0.4095483033284227, "grad_norm": 0.09779687970876694, "learning_rate": 1.775789131142206e-05, "loss": 9.0946, "step": 82010 }, { "epoch": 0.40959824215336216, "grad_norm": 0.08977722376585007, "learning_rate": 1.7756389396480515e-05, "loss": 9.1029, "step": 82020 }, { "epoch": 0.4096481809783016, "grad_norm": 0.09358174353837967, "learning_rate": 1.7754887481538962e-05, "loss": 9.0842, "step": 82030 }, { "epoch": 0.40969811980324106, "grad_norm": 0.09870479255914688, "learning_rate": 1.7753385566597412e-05, "loss": 9.0784, "step": 82040 }, { "epoch": 0.4097480586281805, "grad_norm": 0.09142067283391953, "learning_rate": 1.7751883651655862e-05, "loss": 9.0896, "step": 82050 }, { "epoch": 0.40979799745311996, "grad_norm": 0.08930368721485138, "learning_rate": 1.7750381736714312e-05, "loss": 9.1012, "step": 82060 }, { "epoch": 0.4098479362780594, "grad_norm": 0.09085031598806381, "learning_rate": 1.7748879821772763e-05, "loss": 9.0979, "step": 82070 }, { "epoch": 0.4098978751029988, "grad_norm": 0.09649605304002762, "learning_rate": 1.774737790683121e-05, "loss": 9.0945, "step": 82080 }, { "epoch": 0.4099478139279383, "grad_norm": 0.09378213435411453, "learning_rate": 1.774587599188966e-05, "loss": 9.1019, "step": 82090 }, { "epoch": 0.4099977527528777, "grad_norm": 0.09320469200611115, "learning_rate": 1.774437407694811e-05, "loss": 9.0815, "step": 82100 }, { "epoch": 0.4100476915778172, "grad_norm": 0.09025924652814865, "learning_rate": 1.774287216200656e-05, "loss": 9.1096, "step": 82110 }, { "epoch": 0.4100976304027566, "grad_norm": 0.09439290314912796, "learning_rate": 1.774137024706501e-05, "loss": 9.1072, "step": 82120 }, { "epoch": 0.4101475692276961, "grad_norm": 0.09332076460123062, "learning_rate": 1.7739868332123457e-05, "loss": 9.104, "step": 82130 }, { "epoch": 0.4101975080526355, "grad_norm": 0.10301212221384048, "learning_rate": 1.7738366417181907e-05, "loss": 9.0906, "step": 82140 }, { "epoch": 0.410247446877575, "grad_norm": 0.08951026201248169, "learning_rate": 1.7736864502240357e-05, "loss": 9.1012, "step": 82150 }, { "epoch": 0.4102973857025144, "grad_norm": 0.09387951344251633, "learning_rate": 1.7735362587298807e-05, "loss": 9.1009, "step": 82160 }, { "epoch": 0.41034732452745387, "grad_norm": 0.0935429260134697, "learning_rate": 1.7733860672357258e-05, "loss": 9.0949, "step": 82170 }, { "epoch": 0.4103972633523933, "grad_norm": 0.09872562438249588, "learning_rate": 1.7732358757415704e-05, "loss": 9.0956, "step": 82180 }, { "epoch": 0.41044720217733277, "grad_norm": 0.09391787648200989, "learning_rate": 1.7730856842474155e-05, "loss": 9.0856, "step": 82190 }, { "epoch": 0.4104971410022722, "grad_norm": 0.09664194285869598, "learning_rate": 1.7729354927532605e-05, "loss": 9.0931, "step": 82200 }, { "epoch": 0.41054707982721167, "grad_norm": 0.10238590836524963, "learning_rate": 1.7727853012591055e-05, "loss": 9.0853, "step": 82210 }, { "epoch": 0.4105970186521511, "grad_norm": 0.08652331680059433, "learning_rate": 1.7726351097649505e-05, "loss": 9.1032, "step": 82220 }, { "epoch": 0.41064695747709057, "grad_norm": 0.09455471485853195, "learning_rate": 1.7724849182707952e-05, "loss": 9.0888, "step": 82230 }, { "epoch": 0.41069689630203, "grad_norm": 0.09298815578222275, "learning_rate": 1.7723347267766402e-05, "loss": 9.0829, "step": 82240 }, { "epoch": 0.41074683512696947, "grad_norm": 0.09878503531217575, "learning_rate": 1.7721845352824852e-05, "loss": 9.0886, "step": 82250 }, { "epoch": 0.4107967739519089, "grad_norm": 0.09556316584348679, "learning_rate": 1.7720343437883302e-05, "loss": 9.0951, "step": 82260 }, { "epoch": 0.41084671277684837, "grad_norm": 0.094581738114357, "learning_rate": 1.7718841522941753e-05, "loss": 9.0941, "step": 82270 }, { "epoch": 0.4108966516017878, "grad_norm": 0.09531483799219131, "learning_rate": 1.77173396080002e-05, "loss": 9.0941, "step": 82280 }, { "epoch": 0.41094659042672727, "grad_norm": 0.09399186819791794, "learning_rate": 1.771583769305865e-05, "loss": 9.0851, "step": 82290 }, { "epoch": 0.4109965292516667, "grad_norm": 0.09608032554388046, "learning_rate": 1.77143357781171e-05, "loss": 9.0931, "step": 82300 }, { "epoch": 0.41104646807660616, "grad_norm": 0.0954754576086998, "learning_rate": 1.771283386317555e-05, "loss": 9.1091, "step": 82310 }, { "epoch": 0.4110964069015456, "grad_norm": 0.08752164989709854, "learning_rate": 1.7711331948234e-05, "loss": 9.0904, "step": 82320 }, { "epoch": 0.41114634572648506, "grad_norm": 0.09011632949113846, "learning_rate": 1.7709830033292447e-05, "loss": 9.1022, "step": 82330 }, { "epoch": 0.4111962845514245, "grad_norm": 0.08889009058475494, "learning_rate": 1.7708328118350897e-05, "loss": 9.0813, "step": 82340 }, { "epoch": 0.41124622337636396, "grad_norm": 0.0934472531080246, "learning_rate": 1.7706826203409347e-05, "loss": 9.084, "step": 82350 }, { "epoch": 0.4112961622013034, "grad_norm": 0.0992710068821907, "learning_rate": 1.7705324288467797e-05, "loss": 9.1012, "step": 82360 }, { "epoch": 0.41134610102624286, "grad_norm": 0.0951753556728363, "learning_rate": 1.7703822373526248e-05, "loss": 9.0896, "step": 82370 }, { "epoch": 0.4113960398511823, "grad_norm": 0.09202361106872559, "learning_rate": 1.7702320458584694e-05, "loss": 9.0929, "step": 82380 }, { "epoch": 0.41144597867612176, "grad_norm": 0.09348968416452408, "learning_rate": 1.7700818543643145e-05, "loss": 9.0928, "step": 82390 }, { "epoch": 0.4114959175010612, "grad_norm": 0.0965779572725296, "learning_rate": 1.7699316628701595e-05, "loss": 9.086, "step": 82400 }, { "epoch": 0.41154585632600066, "grad_norm": 0.09712937474250793, "learning_rate": 1.7697814713760045e-05, "loss": 9.085, "step": 82410 }, { "epoch": 0.4115957951509401, "grad_norm": 0.09527904540300369, "learning_rate": 1.7696312798818495e-05, "loss": 9.0898, "step": 82420 }, { "epoch": 0.41164573397587956, "grad_norm": 0.09039422124624252, "learning_rate": 1.7694810883876945e-05, "loss": 9.0958, "step": 82430 }, { "epoch": 0.411695672800819, "grad_norm": 0.09513916820287704, "learning_rate": 1.7693308968935392e-05, "loss": 9.085, "step": 82440 }, { "epoch": 0.41174561162575846, "grad_norm": 0.09290063381195068, "learning_rate": 1.7691807053993842e-05, "loss": 9.0877, "step": 82450 }, { "epoch": 0.4117955504506979, "grad_norm": 0.09197252988815308, "learning_rate": 1.7690305139052292e-05, "loss": 9.0885, "step": 82460 }, { "epoch": 0.41184548927563736, "grad_norm": 0.0950205847620964, "learning_rate": 1.7688803224110743e-05, "loss": 9.0964, "step": 82470 }, { "epoch": 0.4118954281005768, "grad_norm": 0.09569496661424637, "learning_rate": 1.7687301309169193e-05, "loss": 9.0962, "step": 82480 }, { "epoch": 0.41194536692551625, "grad_norm": 0.09163631498813629, "learning_rate": 1.768579939422764e-05, "loss": 9.101, "step": 82490 }, { "epoch": 0.4119953057504557, "grad_norm": 0.09723185002803802, "learning_rate": 1.768429747928609e-05, "loss": 9.0751, "step": 82500 }, { "epoch": 0.41204524457539515, "grad_norm": 0.09386081993579865, "learning_rate": 1.768279556434454e-05, "loss": 9.0936, "step": 82510 }, { "epoch": 0.4120951834003346, "grad_norm": 0.09162143617868423, "learning_rate": 1.768129364940299e-05, "loss": 9.1021, "step": 82520 }, { "epoch": 0.41214512222527405, "grad_norm": 0.09791895002126694, "learning_rate": 1.767979173446144e-05, "loss": 9.0871, "step": 82530 }, { "epoch": 0.4121950610502135, "grad_norm": 0.09404843300580978, "learning_rate": 1.7678289819519887e-05, "loss": 9.0875, "step": 82540 }, { "epoch": 0.41224499987515295, "grad_norm": 0.09097657352685928, "learning_rate": 1.7676787904578337e-05, "loss": 9.0881, "step": 82550 }, { "epoch": 0.4122949387000924, "grad_norm": 0.090410977602005, "learning_rate": 1.7675285989636787e-05, "loss": 9.0898, "step": 82560 }, { "epoch": 0.41234487752503185, "grad_norm": 0.09892985969781876, "learning_rate": 1.7673784074695238e-05, "loss": 9.0896, "step": 82570 }, { "epoch": 0.41239481634997127, "grad_norm": 0.09155625849962234, "learning_rate": 1.7672282159753688e-05, "loss": 9.0865, "step": 82580 }, { "epoch": 0.41244475517491075, "grad_norm": 0.09150595217943192, "learning_rate": 1.7670780244812135e-05, "loss": 9.0801, "step": 82590 }, { "epoch": 0.41249469399985017, "grad_norm": 0.09165318310260773, "learning_rate": 1.7669278329870585e-05, "loss": 9.0751, "step": 82600 }, { "epoch": 0.41254463282478965, "grad_norm": 0.09589849412441254, "learning_rate": 1.7667776414929035e-05, "loss": 9.0826, "step": 82610 }, { "epoch": 0.41259457164972907, "grad_norm": 0.10229449719190598, "learning_rate": 1.7666274499987485e-05, "loss": 9.0724, "step": 82620 }, { "epoch": 0.41264451047466855, "grad_norm": 0.09139035642147064, "learning_rate": 1.7664772585045935e-05, "loss": 9.091, "step": 82630 }, { "epoch": 0.41269444929960797, "grad_norm": 0.09130216389894485, "learning_rate": 1.7663270670104382e-05, "loss": 9.0797, "step": 82640 }, { "epoch": 0.41274438812454745, "grad_norm": 0.08872245997190475, "learning_rate": 1.7661768755162832e-05, "loss": 9.0946, "step": 82650 }, { "epoch": 0.41279432694948687, "grad_norm": 0.09483634680509567, "learning_rate": 1.7660266840221282e-05, "loss": 9.0982, "step": 82660 }, { "epoch": 0.41284426577442634, "grad_norm": 0.10040302574634552, "learning_rate": 1.7658764925279733e-05, "loss": 9.0882, "step": 82670 }, { "epoch": 0.41289420459936577, "grad_norm": 0.09902846813201904, "learning_rate": 1.7657263010338183e-05, "loss": 9.0838, "step": 82680 }, { "epoch": 0.41294414342430524, "grad_norm": 0.090021513402462, "learning_rate": 1.765576109539663e-05, "loss": 9.0854, "step": 82690 }, { "epoch": 0.41299408224924467, "grad_norm": 0.09512148797512054, "learning_rate": 1.765425918045508e-05, "loss": 9.0922, "step": 82700 }, { "epoch": 0.41304402107418414, "grad_norm": 0.09530332684516907, "learning_rate": 1.765275726551353e-05, "loss": 9.0939, "step": 82710 }, { "epoch": 0.41309395989912356, "grad_norm": 0.09510703384876251, "learning_rate": 1.765125535057198e-05, "loss": 9.0849, "step": 82720 }, { "epoch": 0.41314389872406304, "grad_norm": 0.09195013344287872, "learning_rate": 1.764975343563043e-05, "loss": 9.0738, "step": 82730 }, { "epoch": 0.41319383754900246, "grad_norm": 0.09670493006706238, "learning_rate": 1.7648251520688877e-05, "loss": 9.0946, "step": 82740 }, { "epoch": 0.41324377637394194, "grad_norm": 0.09696520864963531, "learning_rate": 1.764674960574733e-05, "loss": 9.0757, "step": 82750 }, { "epoch": 0.41329371519888136, "grad_norm": 0.09648612141609192, "learning_rate": 1.7645247690805777e-05, "loss": 9.0891, "step": 82760 }, { "epoch": 0.41334365402382084, "grad_norm": 0.09383885562419891, "learning_rate": 1.7643745775864228e-05, "loss": 9.0929, "step": 82770 }, { "epoch": 0.41339359284876026, "grad_norm": 0.09602250903844833, "learning_rate": 1.7642243860922678e-05, "loss": 9.0904, "step": 82780 }, { "epoch": 0.41344353167369974, "grad_norm": 0.08740779012441635, "learning_rate": 1.7640741945981125e-05, "loss": 9.0999, "step": 82790 }, { "epoch": 0.41349347049863916, "grad_norm": 0.10102899372577667, "learning_rate": 1.7639240031039578e-05, "loss": 9.0845, "step": 82800 }, { "epoch": 0.41354340932357864, "grad_norm": 0.10466084629297256, "learning_rate": 1.7637738116098025e-05, "loss": 9.0695, "step": 82810 }, { "epoch": 0.41359334814851806, "grad_norm": 0.09442269057035446, "learning_rate": 1.7636236201156475e-05, "loss": 9.0806, "step": 82820 }, { "epoch": 0.41364328697345754, "grad_norm": 0.09268420934677124, "learning_rate": 1.7634734286214925e-05, "loss": 9.1006, "step": 82830 }, { "epoch": 0.41369322579839696, "grad_norm": 0.09057000279426575, "learning_rate": 1.7633232371273372e-05, "loss": 9.0809, "step": 82840 }, { "epoch": 0.41374316462333643, "grad_norm": 0.0951695516705513, "learning_rate": 1.7631730456331826e-05, "loss": 9.0833, "step": 82850 }, { "epoch": 0.41379310344827586, "grad_norm": 0.09882236272096634, "learning_rate": 1.7630228541390272e-05, "loss": 9.0878, "step": 82860 }, { "epoch": 0.41384304227321533, "grad_norm": 0.09375642985105515, "learning_rate": 1.7628726626448723e-05, "loss": 9.0873, "step": 82870 }, { "epoch": 0.41389298109815476, "grad_norm": 0.08775999397039413, "learning_rate": 1.7627224711507173e-05, "loss": 9.1052, "step": 82880 }, { "epoch": 0.41394291992309423, "grad_norm": 0.09294060617685318, "learning_rate": 1.762572279656562e-05, "loss": 9.0823, "step": 82890 }, { "epoch": 0.41399285874803365, "grad_norm": 0.10115856677293777, "learning_rate": 1.7624220881624073e-05, "loss": 9.0824, "step": 82900 }, { "epoch": 0.41404279757297313, "grad_norm": 0.09972229599952698, "learning_rate": 1.762271896668252e-05, "loss": 9.0816, "step": 82910 }, { "epoch": 0.41409273639791255, "grad_norm": 0.0935303345322609, "learning_rate": 1.762121705174097e-05, "loss": 9.0848, "step": 82920 }, { "epoch": 0.41414267522285203, "grad_norm": 0.09236697852611542, "learning_rate": 1.761971513679942e-05, "loss": 9.091, "step": 82930 }, { "epoch": 0.41419261404779145, "grad_norm": 0.08565404266119003, "learning_rate": 1.7618213221857867e-05, "loss": 9.0823, "step": 82940 }, { "epoch": 0.41424255287273093, "grad_norm": 0.09559847414493561, "learning_rate": 1.761671130691632e-05, "loss": 9.0797, "step": 82950 }, { "epoch": 0.41429249169767035, "grad_norm": 0.09396830946207047, "learning_rate": 1.7615209391974767e-05, "loss": 9.088, "step": 82960 }, { "epoch": 0.41434243052260983, "grad_norm": 0.09308158606290817, "learning_rate": 1.7613707477033218e-05, "loss": 9.0753, "step": 82970 }, { "epoch": 0.41439236934754925, "grad_norm": 0.10932270437479019, "learning_rate": 1.7612205562091668e-05, "loss": 9.0863, "step": 82980 }, { "epoch": 0.4144423081724887, "grad_norm": 0.09359308332204819, "learning_rate": 1.7610703647150115e-05, "loss": 9.084, "step": 82990 }, { "epoch": 0.41449224699742815, "grad_norm": 0.09583938866853714, "learning_rate": 1.7609201732208568e-05, "loss": 9.075, "step": 83000 }, { "epoch": 0.4145421858223676, "grad_norm": 0.09451348334550858, "learning_rate": 1.7607699817267015e-05, "loss": 9.0976, "step": 83010 }, { "epoch": 0.41459212464730705, "grad_norm": 0.09366924315690994, "learning_rate": 1.7606197902325465e-05, "loss": 9.0789, "step": 83020 }, { "epoch": 0.4146420634722465, "grad_norm": 0.09320022910833359, "learning_rate": 1.7604695987383915e-05, "loss": 9.0808, "step": 83030 }, { "epoch": 0.41469200229718595, "grad_norm": 0.09090333431959152, "learning_rate": 1.7603194072442362e-05, "loss": 9.0839, "step": 83040 }, { "epoch": 0.4147419411221254, "grad_norm": 0.093370221555233, "learning_rate": 1.7601692157500816e-05, "loss": 9.0735, "step": 83050 }, { "epoch": 0.41479187994706485, "grad_norm": 0.09438198059797287, "learning_rate": 1.7600190242559262e-05, "loss": 9.085, "step": 83060 }, { "epoch": 0.41484181877200427, "grad_norm": 0.09503324329853058, "learning_rate": 1.7598688327617716e-05, "loss": 9.087, "step": 83070 }, { "epoch": 0.41489175759694374, "grad_norm": 0.09370449930429459, "learning_rate": 1.7597186412676163e-05, "loss": 9.0811, "step": 83080 }, { "epoch": 0.41494169642188317, "grad_norm": 0.09288004785776138, "learning_rate": 1.759568449773461e-05, "loss": 9.0904, "step": 83090 }, { "epoch": 0.41499163524682264, "grad_norm": 0.09713056683540344, "learning_rate": 1.7594182582793063e-05, "loss": 9.0817, "step": 83100 }, { "epoch": 0.41504157407176206, "grad_norm": 0.09737755358219147, "learning_rate": 1.759268066785151e-05, "loss": 9.071, "step": 83110 }, { "epoch": 0.41509151289670154, "grad_norm": 0.10059583187103271, "learning_rate": 1.7591178752909963e-05, "loss": 9.0756, "step": 83120 }, { "epoch": 0.41514145172164096, "grad_norm": 0.0912448912858963, "learning_rate": 1.758967683796841e-05, "loss": 9.0848, "step": 83130 }, { "epoch": 0.41519139054658044, "grad_norm": 0.09705382585525513, "learning_rate": 1.7588174923026857e-05, "loss": 9.0865, "step": 83140 }, { "epoch": 0.41524132937151986, "grad_norm": 0.08912192285060883, "learning_rate": 1.758667300808531e-05, "loss": 9.0884, "step": 83150 }, { "epoch": 0.41529126819645934, "grad_norm": 0.09693977981805801, "learning_rate": 1.7585171093143757e-05, "loss": 9.081, "step": 83160 }, { "epoch": 0.41534120702139876, "grad_norm": 0.09407562017440796, "learning_rate": 1.758366917820221e-05, "loss": 9.0875, "step": 83170 }, { "epoch": 0.41539114584633824, "grad_norm": 0.0872037410736084, "learning_rate": 1.7582167263260658e-05, "loss": 9.0934, "step": 83180 }, { "epoch": 0.41544108467127766, "grad_norm": 0.09205826371908188, "learning_rate": 1.7580665348319105e-05, "loss": 9.0891, "step": 83190 }, { "epoch": 0.41549102349621714, "grad_norm": 0.09072985500097275, "learning_rate": 1.7579163433377558e-05, "loss": 9.0847, "step": 83200 }, { "epoch": 0.41554096232115656, "grad_norm": 0.08977384865283966, "learning_rate": 1.7577661518436005e-05, "loss": 9.0795, "step": 83210 }, { "epoch": 0.41559090114609604, "grad_norm": 0.0946430042386055, "learning_rate": 1.757615960349446e-05, "loss": 9.0804, "step": 83220 }, { "epoch": 0.41564083997103546, "grad_norm": 0.09836602956056595, "learning_rate": 1.7574657688552905e-05, "loss": 9.0745, "step": 83230 }, { "epoch": 0.41569077879597494, "grad_norm": 0.09133485704660416, "learning_rate": 1.7573155773611352e-05, "loss": 9.0921, "step": 83240 }, { "epoch": 0.41574071762091436, "grad_norm": 0.093567855656147, "learning_rate": 1.7571653858669806e-05, "loss": 9.0733, "step": 83250 }, { "epoch": 0.41579065644585383, "grad_norm": 0.09800833463668823, "learning_rate": 1.7570151943728252e-05, "loss": 9.0626, "step": 83260 }, { "epoch": 0.41584059527079326, "grad_norm": 0.1007581278681755, "learning_rate": 1.7568650028786706e-05, "loss": 9.0695, "step": 83270 }, { "epoch": 0.41589053409573273, "grad_norm": 0.09314744919538498, "learning_rate": 1.7567148113845153e-05, "loss": 9.0963, "step": 83280 }, { "epoch": 0.41594047292067216, "grad_norm": 0.09255973249673843, "learning_rate": 1.75656461989036e-05, "loss": 9.0841, "step": 83290 }, { "epoch": 0.41599041174561163, "grad_norm": 0.0947415828704834, "learning_rate": 1.7564144283962053e-05, "loss": 9.0759, "step": 83300 }, { "epoch": 0.41604035057055105, "grad_norm": 0.09376344829797745, "learning_rate": 1.75626423690205e-05, "loss": 9.0787, "step": 83310 }, { "epoch": 0.41609028939549053, "grad_norm": 0.09048786014318466, "learning_rate": 1.7561140454078954e-05, "loss": 9.0731, "step": 83320 }, { "epoch": 0.41614022822042995, "grad_norm": 0.09429807960987091, "learning_rate": 1.75596385391374e-05, "loss": 9.0738, "step": 83330 }, { "epoch": 0.41619016704536943, "grad_norm": 0.0935714915394783, "learning_rate": 1.7558136624195847e-05, "loss": 9.065, "step": 83340 }, { "epoch": 0.41624010587030885, "grad_norm": 0.09685614705085754, "learning_rate": 1.75566347092543e-05, "loss": 9.0758, "step": 83350 }, { "epoch": 0.41629004469524833, "grad_norm": 0.09270419180393219, "learning_rate": 1.7555132794312747e-05, "loss": 9.0771, "step": 83360 }, { "epoch": 0.41633998352018775, "grad_norm": 0.09448056668043137, "learning_rate": 1.75536308793712e-05, "loss": 9.0852, "step": 83370 }, { "epoch": 0.41638992234512723, "grad_norm": 0.09013166278600693, "learning_rate": 1.7552128964429648e-05, "loss": 9.0791, "step": 83380 }, { "epoch": 0.41643986117006665, "grad_norm": 0.09527698159217834, "learning_rate": 1.7550627049488098e-05, "loss": 9.0773, "step": 83390 }, { "epoch": 0.4164897999950061, "grad_norm": 0.09563423693180084, "learning_rate": 1.7549125134546548e-05, "loss": 9.0723, "step": 83400 }, { "epoch": 0.41653973881994555, "grad_norm": 0.09608939290046692, "learning_rate": 1.7547623219604995e-05, "loss": 9.0768, "step": 83410 }, { "epoch": 0.416589677644885, "grad_norm": 0.09871455281972885, "learning_rate": 1.754612130466345e-05, "loss": 9.0737, "step": 83420 }, { "epoch": 0.41663961646982445, "grad_norm": 0.09225298464298248, "learning_rate": 1.7544619389721895e-05, "loss": 9.0819, "step": 83430 }, { "epoch": 0.4166895552947639, "grad_norm": 0.09640655666589737, "learning_rate": 1.7543117474780345e-05, "loss": 9.0798, "step": 83440 }, { "epoch": 0.41673949411970335, "grad_norm": 0.09529406577348709, "learning_rate": 1.7541615559838796e-05, "loss": 9.0874, "step": 83450 }, { "epoch": 0.4167894329446428, "grad_norm": 0.09108598530292511, "learning_rate": 1.7540113644897242e-05, "loss": 9.071, "step": 83460 }, { "epoch": 0.41683937176958225, "grad_norm": 0.09113084524869919, "learning_rate": 1.7538611729955696e-05, "loss": 9.0852, "step": 83470 }, { "epoch": 0.4168893105945217, "grad_norm": 0.09776317328214645, "learning_rate": 1.7537109815014143e-05, "loss": 9.0882, "step": 83480 }, { "epoch": 0.41693924941946114, "grad_norm": 0.09335512667894363, "learning_rate": 1.7535607900072593e-05, "loss": 9.077, "step": 83490 }, { "epoch": 0.4169891882444006, "grad_norm": 0.09077412635087967, "learning_rate": 1.7534105985131043e-05, "loss": 9.0792, "step": 83500 }, { "epoch": 0.41703912706934004, "grad_norm": 0.1023026630282402, "learning_rate": 1.753260407018949e-05, "loss": 9.0743, "step": 83510 }, { "epoch": 0.4170890658942795, "grad_norm": 0.09414954483509064, "learning_rate": 1.7531102155247944e-05, "loss": 9.0811, "step": 83520 }, { "epoch": 0.41713900471921894, "grad_norm": 0.09618096053600311, "learning_rate": 1.752960024030639e-05, "loss": 9.0919, "step": 83530 }, { "epoch": 0.4171889435441584, "grad_norm": 0.0886245146393776, "learning_rate": 1.752809832536484e-05, "loss": 9.0966, "step": 83540 }, { "epoch": 0.41723888236909784, "grad_norm": 0.09386541694402695, "learning_rate": 1.752659641042329e-05, "loss": 9.0889, "step": 83550 }, { "epoch": 0.4172888211940373, "grad_norm": 0.08851998299360275, "learning_rate": 1.7525094495481737e-05, "loss": 9.0748, "step": 83560 }, { "epoch": 0.41733876001897674, "grad_norm": 0.1024288460612297, "learning_rate": 1.752359258054019e-05, "loss": 9.0864, "step": 83570 }, { "epoch": 0.4173886988439162, "grad_norm": 0.09188997000455856, "learning_rate": 1.7522090665598638e-05, "loss": 9.0743, "step": 83580 }, { "epoch": 0.41743863766885564, "grad_norm": 0.09162962436676025, "learning_rate": 1.7520588750657088e-05, "loss": 9.0847, "step": 83590 }, { "epoch": 0.4174885764937951, "grad_norm": 0.0950562134385109, "learning_rate": 1.7519086835715538e-05, "loss": 9.0715, "step": 83600 }, { "epoch": 0.41753851531873454, "grad_norm": 0.09186748415231705, "learning_rate": 1.7517584920773985e-05, "loss": 9.072, "step": 83610 }, { "epoch": 0.417588454143674, "grad_norm": 0.0918838381767273, "learning_rate": 1.751608300583244e-05, "loss": 9.0721, "step": 83620 }, { "epoch": 0.41763839296861344, "grad_norm": 0.09041111171245575, "learning_rate": 1.7514581090890885e-05, "loss": 9.0816, "step": 83630 }, { "epoch": 0.4176883317935529, "grad_norm": 0.08900655061006546, "learning_rate": 1.7513079175949336e-05, "loss": 9.0725, "step": 83640 }, { "epoch": 0.41773827061849234, "grad_norm": 0.09458507597446442, "learning_rate": 1.7511577261007786e-05, "loss": 9.0837, "step": 83650 }, { "epoch": 0.4177882094434318, "grad_norm": 0.08990652859210968, "learning_rate": 1.7510075346066232e-05, "loss": 9.0666, "step": 83660 }, { "epoch": 0.41783814826837123, "grad_norm": 0.09383875876665115, "learning_rate": 1.7508573431124686e-05, "loss": 9.0684, "step": 83670 }, { "epoch": 0.4178880870933107, "grad_norm": 0.0938841700553894, "learning_rate": 1.7507071516183133e-05, "loss": 9.076, "step": 83680 }, { "epoch": 0.41793802591825013, "grad_norm": 0.0932869017124176, "learning_rate": 1.7505569601241583e-05, "loss": 9.0731, "step": 83690 }, { "epoch": 0.4179879647431896, "grad_norm": 0.09501129388809204, "learning_rate": 1.7504067686300033e-05, "loss": 9.0626, "step": 83700 }, { "epoch": 0.41803790356812903, "grad_norm": 0.09345944225788116, "learning_rate": 1.7502565771358483e-05, "loss": 9.0749, "step": 83710 }, { "epoch": 0.4180878423930685, "grad_norm": 0.09460506588220596, "learning_rate": 1.7501063856416934e-05, "loss": 9.0764, "step": 83720 }, { "epoch": 0.41813778121800793, "grad_norm": 0.09743547439575195, "learning_rate": 1.749956194147538e-05, "loss": 9.0757, "step": 83730 }, { "epoch": 0.4181877200429474, "grad_norm": 0.09816765040159225, "learning_rate": 1.749806002653383e-05, "loss": 9.0767, "step": 83740 }, { "epoch": 0.41823765886788683, "grad_norm": 0.09210207313299179, "learning_rate": 1.749655811159228e-05, "loss": 9.0721, "step": 83750 }, { "epoch": 0.4182875976928263, "grad_norm": 0.09752412140369415, "learning_rate": 1.749505619665073e-05, "loss": 9.0679, "step": 83760 }, { "epoch": 0.41833753651776573, "grad_norm": 0.09351370483636856, "learning_rate": 1.749355428170918e-05, "loss": 9.0862, "step": 83770 }, { "epoch": 0.4183874753427052, "grad_norm": 0.09450095891952515, "learning_rate": 1.7492052366767628e-05, "loss": 9.0659, "step": 83780 }, { "epoch": 0.41843741416764463, "grad_norm": 0.09616388380527496, "learning_rate": 1.7490550451826078e-05, "loss": 9.0898, "step": 83790 }, { "epoch": 0.4184873529925841, "grad_norm": 0.09121173620223999, "learning_rate": 1.7489048536884528e-05, "loss": 9.1012, "step": 83800 }, { "epoch": 0.4185372918175235, "grad_norm": 0.09462346136569977, "learning_rate": 1.748754662194298e-05, "loss": 9.0969, "step": 83810 }, { "epoch": 0.418587230642463, "grad_norm": 0.09324286133050919, "learning_rate": 1.748604470700143e-05, "loss": 9.0746, "step": 83820 }, { "epoch": 0.4186371694674024, "grad_norm": 0.09117257595062256, "learning_rate": 1.7484542792059875e-05, "loss": 9.0724, "step": 83830 }, { "epoch": 0.4186871082923419, "grad_norm": 0.09084919095039368, "learning_rate": 1.7483040877118326e-05, "loss": 9.0775, "step": 83840 }, { "epoch": 0.4187370471172813, "grad_norm": 0.09522408992052078, "learning_rate": 1.7481538962176776e-05, "loss": 9.0747, "step": 83850 }, { "epoch": 0.4187869859422208, "grad_norm": 0.09409532696008682, "learning_rate": 1.7480037047235226e-05, "loss": 9.0663, "step": 83860 }, { "epoch": 0.4188369247671602, "grad_norm": 0.08830953389406204, "learning_rate": 1.7478535132293676e-05, "loss": 9.0784, "step": 83870 }, { "epoch": 0.4188868635920997, "grad_norm": 0.0929829478263855, "learning_rate": 1.7477033217352123e-05, "loss": 9.0728, "step": 83880 }, { "epoch": 0.4189368024170391, "grad_norm": 0.09145011007785797, "learning_rate": 1.7475531302410573e-05, "loss": 9.0703, "step": 83890 }, { "epoch": 0.4189867412419786, "grad_norm": 0.09224165230989456, "learning_rate": 1.7474029387469023e-05, "loss": 9.077, "step": 83900 }, { "epoch": 0.419036680066918, "grad_norm": 0.09355958551168442, "learning_rate": 1.7472527472527473e-05, "loss": 9.0668, "step": 83910 }, { "epoch": 0.4190866188918575, "grad_norm": 0.09556030482053757, "learning_rate": 1.7471025557585924e-05, "loss": 9.0659, "step": 83920 }, { "epoch": 0.4191365577167969, "grad_norm": 0.09351398795843124, "learning_rate": 1.746952364264437e-05, "loss": 9.0652, "step": 83930 }, { "epoch": 0.4191864965417364, "grad_norm": 0.09275408834218979, "learning_rate": 1.746802172770282e-05, "loss": 9.0618, "step": 83940 }, { "epoch": 0.4192364353666758, "grad_norm": 0.0906294509768486, "learning_rate": 1.746651981276127e-05, "loss": 9.0744, "step": 83950 }, { "epoch": 0.4192863741916153, "grad_norm": 0.09481019526720047, "learning_rate": 1.746501789781972e-05, "loss": 9.0686, "step": 83960 }, { "epoch": 0.4193363130165547, "grad_norm": 0.0955205038189888, "learning_rate": 1.746351598287817e-05, "loss": 9.0717, "step": 83970 }, { "epoch": 0.4193862518414942, "grad_norm": 0.09485220909118652, "learning_rate": 1.7462014067936618e-05, "loss": 9.0709, "step": 83980 }, { "epoch": 0.4194361906664336, "grad_norm": 0.09109359234571457, "learning_rate": 1.7460512152995068e-05, "loss": 9.0645, "step": 83990 }, { "epoch": 0.4194861294913731, "grad_norm": 0.09005378186702728, "learning_rate": 1.7459010238053518e-05, "loss": 9.0704, "step": 84000 }, { "epoch": 0.4195360683163125, "grad_norm": 0.09958352148532867, "learning_rate": 1.745750832311197e-05, "loss": 9.0769, "step": 84010 }, { "epoch": 0.419586007141252, "grad_norm": 0.09399083256721497, "learning_rate": 1.745600640817042e-05, "loss": 9.0654, "step": 84020 }, { "epoch": 0.4196359459661914, "grad_norm": 0.09548033028841019, "learning_rate": 1.745450449322887e-05, "loss": 9.0775, "step": 84030 }, { "epoch": 0.4196858847911309, "grad_norm": 0.09409498423337936, "learning_rate": 1.7453002578287316e-05, "loss": 9.0756, "step": 84040 }, { "epoch": 0.4197358236160703, "grad_norm": 0.09729974716901779, "learning_rate": 1.7451500663345766e-05, "loss": 9.0749, "step": 84050 }, { "epoch": 0.41978576244100974, "grad_norm": 0.09663716703653336, "learning_rate": 1.7449998748404216e-05, "loss": 9.069, "step": 84060 }, { "epoch": 0.4198357012659492, "grad_norm": 0.09065845608711243, "learning_rate": 1.7448496833462666e-05, "loss": 9.066, "step": 84070 }, { "epoch": 0.41988564009088863, "grad_norm": 0.09165765345096588, "learning_rate": 1.7446994918521116e-05, "loss": 9.0866, "step": 84080 }, { "epoch": 0.4199355789158281, "grad_norm": 0.09514272212982178, "learning_rate": 1.7445493003579563e-05, "loss": 9.07, "step": 84090 }, { "epoch": 0.41998551774076753, "grad_norm": 0.090760737657547, "learning_rate": 1.7443991088638013e-05, "loss": 9.0838, "step": 84100 }, { "epoch": 0.420035456565707, "grad_norm": 0.09050191193819046, "learning_rate": 1.7442489173696463e-05, "loss": 9.0707, "step": 84110 }, { "epoch": 0.42008539539064643, "grad_norm": 0.09378772228956223, "learning_rate": 1.7440987258754914e-05, "loss": 9.0724, "step": 84120 }, { "epoch": 0.4201353342155859, "grad_norm": 0.09341657161712646, "learning_rate": 1.7439485343813364e-05, "loss": 9.067, "step": 84130 }, { "epoch": 0.42018527304052533, "grad_norm": 0.09716533869504929, "learning_rate": 1.743798342887181e-05, "loss": 9.0632, "step": 84140 }, { "epoch": 0.4202352118654648, "grad_norm": 0.09457621723413467, "learning_rate": 1.743648151393026e-05, "loss": 9.0649, "step": 84150 }, { "epoch": 0.42028515069040423, "grad_norm": 0.0924045741558075, "learning_rate": 1.743497959898871e-05, "loss": 9.0646, "step": 84160 }, { "epoch": 0.4203350895153437, "grad_norm": 0.09211113303899765, "learning_rate": 1.743347768404716e-05, "loss": 9.0715, "step": 84170 }, { "epoch": 0.42038502834028313, "grad_norm": 0.09424125403165817, "learning_rate": 1.743197576910561e-05, "loss": 9.069, "step": 84180 }, { "epoch": 0.4204349671652226, "grad_norm": 0.08973129838705063, "learning_rate": 1.7430473854164058e-05, "loss": 9.0637, "step": 84190 }, { "epoch": 0.42048490599016203, "grad_norm": 0.09753432869911194, "learning_rate": 1.7428971939222508e-05, "loss": 9.0612, "step": 84200 }, { "epoch": 0.4205348448151015, "grad_norm": 0.09066448360681534, "learning_rate": 1.742747002428096e-05, "loss": 9.0633, "step": 84210 }, { "epoch": 0.4205847836400409, "grad_norm": 0.09251467883586884, "learning_rate": 1.742596810933941e-05, "loss": 9.0689, "step": 84220 }, { "epoch": 0.4206347224649804, "grad_norm": 0.09440715610980988, "learning_rate": 1.742446619439786e-05, "loss": 9.0761, "step": 84230 }, { "epoch": 0.4206846612899198, "grad_norm": 0.09155986458063126, "learning_rate": 1.7422964279456306e-05, "loss": 9.0703, "step": 84240 }, { "epoch": 0.4207346001148593, "grad_norm": 0.0903191938996315, "learning_rate": 1.7421462364514756e-05, "loss": 9.0739, "step": 84250 }, { "epoch": 0.4207845389397987, "grad_norm": 0.09451457858085632, "learning_rate": 1.7419960449573206e-05, "loss": 9.0768, "step": 84260 }, { "epoch": 0.4208344777647382, "grad_norm": 0.09495178610086441, "learning_rate": 1.7418458534631656e-05, "loss": 9.0598, "step": 84270 }, { "epoch": 0.4208844165896776, "grad_norm": 0.09594985842704773, "learning_rate": 1.7416956619690106e-05, "loss": 9.0666, "step": 84280 }, { "epoch": 0.4209343554146171, "grad_norm": 0.09474395960569382, "learning_rate": 1.7415454704748553e-05, "loss": 9.0648, "step": 84290 }, { "epoch": 0.4209842942395565, "grad_norm": 0.10143403708934784, "learning_rate": 1.7413952789807003e-05, "loss": 9.0528, "step": 84300 }, { "epoch": 0.421034233064496, "grad_norm": 0.09076518565416336, "learning_rate": 1.7412450874865453e-05, "loss": 9.0731, "step": 84310 }, { "epoch": 0.4210841718894354, "grad_norm": 0.0921441987156868, "learning_rate": 1.7410948959923904e-05, "loss": 9.067, "step": 84320 }, { "epoch": 0.4211341107143749, "grad_norm": 0.09396324306726456, "learning_rate": 1.7409447044982354e-05, "loss": 9.0719, "step": 84330 }, { "epoch": 0.4211840495393143, "grad_norm": 0.0950896143913269, "learning_rate": 1.74079451300408e-05, "loss": 9.0815, "step": 84340 }, { "epoch": 0.4212339883642538, "grad_norm": 0.08926611393690109, "learning_rate": 1.7406443215099254e-05, "loss": 9.0629, "step": 84350 }, { "epoch": 0.4212839271891932, "grad_norm": 0.09594748169183731, "learning_rate": 1.74049413001577e-05, "loss": 9.0707, "step": 84360 }, { "epoch": 0.4213338660141327, "grad_norm": 0.09475325793027878, "learning_rate": 1.740343938521615e-05, "loss": 9.074, "step": 84370 }, { "epoch": 0.4213838048390721, "grad_norm": 0.09772231429815292, "learning_rate": 1.74019374702746e-05, "loss": 9.0669, "step": 84380 }, { "epoch": 0.4214337436640116, "grad_norm": 0.09714603424072266, "learning_rate": 1.7400435555333048e-05, "loss": 9.0703, "step": 84390 }, { "epoch": 0.421483682488951, "grad_norm": 0.09235162287950516, "learning_rate": 1.73989336403915e-05, "loss": 9.059, "step": 84400 }, { "epoch": 0.4215336213138905, "grad_norm": 0.0921136885881424, "learning_rate": 1.739743172544995e-05, "loss": 9.0542, "step": 84410 }, { "epoch": 0.4215835601388299, "grad_norm": 0.09008321166038513, "learning_rate": 1.73959298105084e-05, "loss": 9.066, "step": 84420 }, { "epoch": 0.4216334989637694, "grad_norm": 0.0914497822523117, "learning_rate": 1.739442789556685e-05, "loss": 9.0638, "step": 84430 }, { "epoch": 0.4216834377887088, "grad_norm": 0.09600444883108139, "learning_rate": 1.7392925980625296e-05, "loss": 9.0733, "step": 84440 }, { "epoch": 0.4217333766136483, "grad_norm": 0.09309738129377365, "learning_rate": 1.739142406568375e-05, "loss": 9.0743, "step": 84450 }, { "epoch": 0.4217833154385877, "grad_norm": 0.09112364053726196, "learning_rate": 1.7389922150742196e-05, "loss": 9.0722, "step": 84460 }, { "epoch": 0.4218332542635272, "grad_norm": 0.09220999479293823, "learning_rate": 1.7388420235800646e-05, "loss": 9.0659, "step": 84470 }, { "epoch": 0.4218831930884666, "grad_norm": 0.09681863337755203, "learning_rate": 1.7386918320859096e-05, "loss": 9.0701, "step": 84480 }, { "epoch": 0.4219331319134061, "grad_norm": 0.09077723324298859, "learning_rate": 1.7385416405917543e-05, "loss": 9.0616, "step": 84490 }, { "epoch": 0.4219830707383455, "grad_norm": 0.09060750901699066, "learning_rate": 1.7383914490975997e-05, "loss": 9.06, "step": 84500 }, { "epoch": 0.422033009563285, "grad_norm": 0.09467575699090958, "learning_rate": 1.7382412576034443e-05, "loss": 9.0742, "step": 84510 }, { "epoch": 0.4220829483882244, "grad_norm": 0.09085453301668167, "learning_rate": 1.7380910661092894e-05, "loss": 9.0705, "step": 84520 }, { "epoch": 0.4221328872131639, "grad_norm": 0.09209267795085907, "learning_rate": 1.7379408746151344e-05, "loss": 9.0544, "step": 84530 }, { "epoch": 0.4221828260381033, "grad_norm": 0.09571056067943573, "learning_rate": 1.737790683120979e-05, "loss": 9.0732, "step": 84540 }, { "epoch": 0.4222327648630428, "grad_norm": 0.09486297518014908, "learning_rate": 1.7376404916268244e-05, "loss": 9.0753, "step": 84550 }, { "epoch": 0.4222827036879822, "grad_norm": 0.09144411236047745, "learning_rate": 1.737490300132669e-05, "loss": 9.0669, "step": 84560 }, { "epoch": 0.4223326425129217, "grad_norm": 0.09834633022546768, "learning_rate": 1.737340108638514e-05, "loss": 9.072, "step": 84570 }, { "epoch": 0.4223825813378611, "grad_norm": 0.09790683537721634, "learning_rate": 1.737189917144359e-05, "loss": 9.0566, "step": 84580 }, { "epoch": 0.4224325201628006, "grad_norm": 0.09472320228815079, "learning_rate": 1.7370397256502038e-05, "loss": 9.0709, "step": 84590 }, { "epoch": 0.42248245898774, "grad_norm": 0.0928400382399559, "learning_rate": 1.736889534156049e-05, "loss": 9.063, "step": 84600 }, { "epoch": 0.4225323978126795, "grad_norm": 0.09452667087316513, "learning_rate": 1.736739342661894e-05, "loss": 9.0671, "step": 84610 }, { "epoch": 0.4225823366376189, "grad_norm": 0.09719450771808624, "learning_rate": 1.736589151167739e-05, "loss": 9.0658, "step": 84620 }, { "epoch": 0.4226322754625584, "grad_norm": 0.08982671052217484, "learning_rate": 1.736438959673584e-05, "loss": 9.0629, "step": 84630 }, { "epoch": 0.4226822142874978, "grad_norm": 0.08864292502403259, "learning_rate": 1.7362887681794286e-05, "loss": 9.0618, "step": 84640 }, { "epoch": 0.4227321531124373, "grad_norm": 0.09744229912757874, "learning_rate": 1.736138576685274e-05, "loss": 9.0557, "step": 84650 }, { "epoch": 0.4227820919373767, "grad_norm": 0.09893699735403061, "learning_rate": 1.7359883851911186e-05, "loss": 9.067, "step": 84660 }, { "epoch": 0.4228320307623162, "grad_norm": 0.09269163012504578, "learning_rate": 1.735838193696964e-05, "loss": 9.0676, "step": 84670 }, { "epoch": 0.4228819695872556, "grad_norm": 0.09532048553228378, "learning_rate": 1.7356880022028086e-05, "loss": 9.0715, "step": 84680 }, { "epoch": 0.4229319084121951, "grad_norm": 0.09102669358253479, "learning_rate": 1.7355378107086533e-05, "loss": 9.0674, "step": 84690 }, { "epoch": 0.4229818472371345, "grad_norm": 0.09197783470153809, "learning_rate": 1.7353876192144987e-05, "loss": 9.0677, "step": 84700 }, { "epoch": 0.423031786062074, "grad_norm": 0.09266051650047302, "learning_rate": 1.7352374277203433e-05, "loss": 9.0652, "step": 84710 }, { "epoch": 0.4230817248870134, "grad_norm": 0.09375884383916855, "learning_rate": 1.7350872362261887e-05, "loss": 9.0622, "step": 84720 }, { "epoch": 0.4231316637119529, "grad_norm": 0.09187036752700806, "learning_rate": 1.7349370447320334e-05, "loss": 9.0654, "step": 84730 }, { "epoch": 0.4231816025368923, "grad_norm": 0.09767387062311172, "learning_rate": 1.734786853237878e-05, "loss": 9.074, "step": 84740 }, { "epoch": 0.4232315413618318, "grad_norm": 0.09047545492649078, "learning_rate": 1.7346366617437234e-05, "loss": 9.0664, "step": 84750 }, { "epoch": 0.4232814801867712, "grad_norm": 0.09297401458024979, "learning_rate": 1.734486470249568e-05, "loss": 9.073, "step": 84760 }, { "epoch": 0.4233314190117107, "grad_norm": 0.0917564183473587, "learning_rate": 1.7343362787554135e-05, "loss": 9.064, "step": 84770 }, { "epoch": 0.4233813578366501, "grad_norm": 0.09373626857995987, "learning_rate": 1.734186087261258e-05, "loss": 9.0597, "step": 84780 }, { "epoch": 0.4234312966615896, "grad_norm": 0.09210002422332764, "learning_rate": 1.7340358957671028e-05, "loss": 9.0762, "step": 84790 }, { "epoch": 0.423481235486529, "grad_norm": 0.09276442974805832, "learning_rate": 1.733885704272948e-05, "loss": 9.0565, "step": 84800 }, { "epoch": 0.4235311743114685, "grad_norm": 0.09392396360635757, "learning_rate": 1.733735512778793e-05, "loss": 9.0691, "step": 84810 }, { "epoch": 0.4235811131364079, "grad_norm": 0.09003560990095139, "learning_rate": 1.7335853212846382e-05, "loss": 9.0619, "step": 84820 }, { "epoch": 0.42363105196134737, "grad_norm": 0.09067775309085846, "learning_rate": 1.733435129790483e-05, "loss": 9.0586, "step": 84830 }, { "epoch": 0.4236809907862868, "grad_norm": 0.0904303789138794, "learning_rate": 1.7332849382963276e-05, "loss": 9.054, "step": 84840 }, { "epoch": 0.42373092961122627, "grad_norm": 0.09555737674236298, "learning_rate": 1.733134746802173e-05, "loss": 9.0534, "step": 84850 }, { "epoch": 0.4237808684361657, "grad_norm": 0.100531667470932, "learning_rate": 1.7329845553080176e-05, "loss": 9.0776, "step": 84860 }, { "epoch": 0.42383080726110517, "grad_norm": 0.09057488292455673, "learning_rate": 1.732834363813863e-05, "loss": 9.0704, "step": 84870 }, { "epoch": 0.4238807460860446, "grad_norm": 0.09145992249250412, "learning_rate": 1.7326841723197076e-05, "loss": 9.0489, "step": 84880 }, { "epoch": 0.42393068491098407, "grad_norm": 0.0870828926563263, "learning_rate": 1.7325339808255523e-05, "loss": 9.0725, "step": 84890 }, { "epoch": 0.4239806237359235, "grad_norm": 0.09605187177658081, "learning_rate": 1.7323837893313977e-05, "loss": 9.0487, "step": 84900 }, { "epoch": 0.42403056256086297, "grad_norm": 0.09087910503149033, "learning_rate": 1.7322335978372423e-05, "loss": 9.0577, "step": 84910 }, { "epoch": 0.4240805013858024, "grad_norm": 0.09988180547952652, "learning_rate": 1.7320834063430877e-05, "loss": 9.0576, "step": 84920 }, { "epoch": 0.42413044021074187, "grad_norm": 0.09044559299945831, "learning_rate": 1.7319332148489324e-05, "loss": 9.0767, "step": 84930 }, { "epoch": 0.4241803790356813, "grad_norm": 0.09994645416736603, "learning_rate": 1.731783023354777e-05, "loss": 9.0492, "step": 84940 }, { "epoch": 0.42423031786062076, "grad_norm": 0.09559715539216995, "learning_rate": 1.7316328318606224e-05, "loss": 9.0664, "step": 84950 }, { "epoch": 0.4242802566855602, "grad_norm": 0.08689738065004349, "learning_rate": 1.731482640366467e-05, "loss": 9.0722, "step": 84960 }, { "epoch": 0.42433019551049966, "grad_norm": 0.10374455153942108, "learning_rate": 1.7313324488723125e-05, "loss": 9.0643, "step": 84970 }, { "epoch": 0.4243801343354391, "grad_norm": 0.0957735925912857, "learning_rate": 1.731182257378157e-05, "loss": 9.0582, "step": 84980 }, { "epoch": 0.42443007316037856, "grad_norm": 0.09154541045427322, "learning_rate": 1.731032065884002e-05, "loss": 9.06, "step": 84990 }, { "epoch": 0.424480011985318, "grad_norm": 0.09392927587032318, "learning_rate": 1.730881874389847e-05, "loss": 9.0737, "step": 85000 }, { "epoch": 0.42452995081025746, "grad_norm": 0.09651625156402588, "learning_rate": 1.730731682895692e-05, "loss": 9.0567, "step": 85010 }, { "epoch": 0.4245798896351969, "grad_norm": 0.09320859611034393, "learning_rate": 1.7305814914015372e-05, "loss": 9.0656, "step": 85020 }, { "epoch": 0.42462982846013636, "grad_norm": 0.09693901240825653, "learning_rate": 1.730431299907382e-05, "loss": 9.0709, "step": 85030 }, { "epoch": 0.4246797672850758, "grad_norm": 0.09250442683696747, "learning_rate": 1.730281108413227e-05, "loss": 9.059, "step": 85040 }, { "epoch": 0.4247297061100152, "grad_norm": 0.09281265735626221, "learning_rate": 1.730130916919072e-05, "loss": 9.0618, "step": 85050 }, { "epoch": 0.4247796449349547, "grad_norm": 0.09139450639486313, "learning_rate": 1.7299807254249166e-05, "loss": 9.0574, "step": 85060 }, { "epoch": 0.4248295837598941, "grad_norm": 0.09575843065977097, "learning_rate": 1.729830533930762e-05, "loss": 9.0575, "step": 85070 }, { "epoch": 0.4248795225848336, "grad_norm": 0.09058381617069244, "learning_rate": 1.7296803424366066e-05, "loss": 9.0458, "step": 85080 }, { "epoch": 0.424929461409773, "grad_norm": 0.09589716792106628, "learning_rate": 1.7295301509424516e-05, "loss": 9.0613, "step": 85090 }, { "epoch": 0.4249794002347125, "grad_norm": 0.09588911384344101, "learning_rate": 1.7293799594482967e-05, "loss": 9.0706, "step": 85100 }, { "epoch": 0.4250293390596519, "grad_norm": 0.08973708748817444, "learning_rate": 1.7292297679541413e-05, "loss": 9.0601, "step": 85110 }, { "epoch": 0.4250792778845914, "grad_norm": 0.09021148830652237, "learning_rate": 1.7290795764599867e-05, "loss": 9.0696, "step": 85120 }, { "epoch": 0.4251292167095308, "grad_norm": 0.08464304357767105, "learning_rate": 1.7289293849658314e-05, "loss": 9.0598, "step": 85130 }, { "epoch": 0.4251791555344703, "grad_norm": 0.09651916474103928, "learning_rate": 1.7287791934716764e-05, "loss": 9.0664, "step": 85140 }, { "epoch": 0.4252290943594097, "grad_norm": 0.09342118352651596, "learning_rate": 1.7286290019775214e-05, "loss": 9.0592, "step": 85150 }, { "epoch": 0.4252790331843492, "grad_norm": 0.09311968088150024, "learning_rate": 1.728478810483366e-05, "loss": 9.0665, "step": 85160 }, { "epoch": 0.4253289720092886, "grad_norm": 0.08970993012189865, "learning_rate": 1.7283286189892115e-05, "loss": 9.0662, "step": 85170 }, { "epoch": 0.4253789108342281, "grad_norm": 0.09633956849575043, "learning_rate": 1.728178427495056e-05, "loss": 9.0603, "step": 85180 }, { "epoch": 0.4254288496591675, "grad_norm": 0.09612761437892914, "learning_rate": 1.728028236000901e-05, "loss": 9.0691, "step": 85190 }, { "epoch": 0.425478788484107, "grad_norm": 0.0876702144742012, "learning_rate": 1.727878044506746e-05, "loss": 9.0624, "step": 85200 }, { "epoch": 0.4255287273090464, "grad_norm": 0.09160758554935455, "learning_rate": 1.727727853012591e-05, "loss": 9.0717, "step": 85210 }, { "epoch": 0.4255786661339859, "grad_norm": 0.09074576944112778, "learning_rate": 1.7275776615184362e-05, "loss": 9.0706, "step": 85220 }, { "epoch": 0.4256286049589253, "grad_norm": 0.08834607899188995, "learning_rate": 1.727427470024281e-05, "loss": 9.06, "step": 85230 }, { "epoch": 0.42567854378386477, "grad_norm": 0.09007090330123901, "learning_rate": 1.727277278530126e-05, "loss": 9.0507, "step": 85240 }, { "epoch": 0.4257284826088042, "grad_norm": 0.09260173887014389, "learning_rate": 1.727127087035971e-05, "loss": 9.0542, "step": 85250 }, { "epoch": 0.42577842143374367, "grad_norm": 0.0915934219956398, "learning_rate": 1.7269768955418156e-05, "loss": 9.0627, "step": 85260 }, { "epoch": 0.4258283602586831, "grad_norm": 0.09786804020404816, "learning_rate": 1.726826704047661e-05, "loss": 9.0529, "step": 85270 }, { "epoch": 0.42587829908362257, "grad_norm": 0.09305689483880997, "learning_rate": 1.7266765125535056e-05, "loss": 9.0447, "step": 85280 }, { "epoch": 0.425928237908562, "grad_norm": 0.09683950990438461, "learning_rate": 1.7265263210593507e-05, "loss": 9.0657, "step": 85290 }, { "epoch": 0.42597817673350147, "grad_norm": 0.08442121744155884, "learning_rate": 1.7263761295651957e-05, "loss": 9.065, "step": 85300 }, { "epoch": 0.4260281155584409, "grad_norm": 0.09468364715576172, "learning_rate": 1.7262259380710403e-05, "loss": 9.0679, "step": 85310 }, { "epoch": 0.42607805438338037, "grad_norm": 0.09155453741550446, "learning_rate": 1.7260757465768857e-05, "loss": 9.0554, "step": 85320 }, { "epoch": 0.4261279932083198, "grad_norm": 0.09124631434679031, "learning_rate": 1.7259255550827304e-05, "loss": 9.0581, "step": 85330 }, { "epoch": 0.42617793203325927, "grad_norm": 0.09624254703521729, "learning_rate": 1.7257753635885757e-05, "loss": 9.0521, "step": 85340 }, { "epoch": 0.4262278708581987, "grad_norm": 0.09301303327083588, "learning_rate": 1.7256251720944204e-05, "loss": 9.0474, "step": 85350 }, { "epoch": 0.42627780968313816, "grad_norm": 0.09513042867183685, "learning_rate": 1.7254749806002654e-05, "loss": 9.0544, "step": 85360 }, { "epoch": 0.4263277485080776, "grad_norm": 0.08930395543575287, "learning_rate": 1.7253247891061105e-05, "loss": 9.0651, "step": 85370 }, { "epoch": 0.42637768733301706, "grad_norm": 0.09650768339633942, "learning_rate": 1.725174597611955e-05, "loss": 9.057, "step": 85380 }, { "epoch": 0.4264276261579565, "grad_norm": 0.09203314781188965, "learning_rate": 1.7250244061178005e-05, "loss": 9.0673, "step": 85390 }, { "epoch": 0.42647756498289596, "grad_norm": 0.09252451360225677, "learning_rate": 1.7248742146236452e-05, "loss": 9.0654, "step": 85400 }, { "epoch": 0.4265275038078354, "grad_norm": 0.09380394965410233, "learning_rate": 1.7247240231294902e-05, "loss": 9.0483, "step": 85410 }, { "epoch": 0.42657744263277486, "grad_norm": 0.09272889792919159, "learning_rate": 1.7245738316353352e-05, "loss": 9.0737, "step": 85420 }, { "epoch": 0.4266273814577143, "grad_norm": 0.09201991558074951, "learning_rate": 1.72442364014118e-05, "loss": 9.0526, "step": 85430 }, { "epoch": 0.42667732028265376, "grad_norm": 0.09079743921756744, "learning_rate": 1.7242734486470252e-05, "loss": 9.062, "step": 85440 }, { "epoch": 0.4267272591075932, "grad_norm": 0.09716988354921341, "learning_rate": 1.72412325715287e-05, "loss": 9.0581, "step": 85450 }, { "epoch": 0.42677719793253266, "grad_norm": 0.10238094627857208, "learning_rate": 1.723973065658715e-05, "loss": 9.0545, "step": 85460 }, { "epoch": 0.4268271367574721, "grad_norm": 0.09213068336248398, "learning_rate": 1.72382287416456e-05, "loss": 9.0599, "step": 85470 }, { "epoch": 0.42687707558241156, "grad_norm": 0.0878293439745903, "learning_rate": 1.7236726826704046e-05, "loss": 9.058, "step": 85480 }, { "epoch": 0.426927014407351, "grad_norm": 0.09897506982088089, "learning_rate": 1.72352249117625e-05, "loss": 9.0604, "step": 85490 }, { "epoch": 0.42697695323229046, "grad_norm": 0.09527362138032913, "learning_rate": 1.7233722996820947e-05, "loss": 9.0577, "step": 85500 }, { "epoch": 0.4270268920572299, "grad_norm": 0.09203501790761948, "learning_rate": 1.7232221081879397e-05, "loss": 9.0629, "step": 85510 }, { "epoch": 0.42707683088216936, "grad_norm": 0.09153282642364502, "learning_rate": 1.7230719166937847e-05, "loss": 9.0658, "step": 85520 }, { "epoch": 0.4271267697071088, "grad_norm": 0.09082131832838058, "learning_rate": 1.7229217251996294e-05, "loss": 9.0558, "step": 85530 }, { "epoch": 0.42717670853204825, "grad_norm": 0.09474789351224899, "learning_rate": 1.7227715337054747e-05, "loss": 9.0649, "step": 85540 }, { "epoch": 0.4272266473569877, "grad_norm": 0.09650855511426926, "learning_rate": 1.7226213422113194e-05, "loss": 9.0518, "step": 85550 }, { "epoch": 0.42727658618192715, "grad_norm": 0.09041815251111984, "learning_rate": 1.7224711507171644e-05, "loss": 9.0682, "step": 85560 }, { "epoch": 0.4273265250068666, "grad_norm": 0.09136392921209335, "learning_rate": 1.7223209592230095e-05, "loss": 9.0575, "step": 85570 }, { "epoch": 0.42737646383180605, "grad_norm": 0.09177730977535248, "learning_rate": 1.722170767728854e-05, "loss": 9.0442, "step": 85580 }, { "epoch": 0.4274264026567455, "grad_norm": 0.09383508563041687, "learning_rate": 1.7220205762346995e-05, "loss": 9.0745, "step": 85590 }, { "epoch": 0.42747634148168495, "grad_norm": 0.08896640688180923, "learning_rate": 1.7218703847405442e-05, "loss": 9.0603, "step": 85600 }, { "epoch": 0.4275262803066244, "grad_norm": 0.0970030426979065, "learning_rate": 1.7217201932463892e-05, "loss": 9.0605, "step": 85610 }, { "epoch": 0.42757621913156385, "grad_norm": 0.09975878149271011, "learning_rate": 1.7215700017522342e-05, "loss": 9.0549, "step": 85620 }, { "epoch": 0.42762615795650327, "grad_norm": 0.09379398822784424, "learning_rate": 1.721419810258079e-05, "loss": 9.0637, "step": 85630 }, { "epoch": 0.42767609678144275, "grad_norm": 0.09030137956142426, "learning_rate": 1.7212696187639242e-05, "loss": 9.0584, "step": 85640 }, { "epoch": 0.42772603560638217, "grad_norm": 0.09938555955886841, "learning_rate": 1.721119427269769e-05, "loss": 9.0446, "step": 85650 }, { "epoch": 0.42777597443132165, "grad_norm": 0.09401064366102219, "learning_rate": 1.720969235775614e-05, "loss": 9.0472, "step": 85660 }, { "epoch": 0.42782591325626107, "grad_norm": 0.10197415202856064, "learning_rate": 1.720819044281459e-05, "loss": 9.0556, "step": 85670 }, { "epoch": 0.42787585208120055, "grad_norm": 0.09055991470813751, "learning_rate": 1.720668852787304e-05, "loss": 9.0479, "step": 85680 }, { "epoch": 0.42792579090613997, "grad_norm": 0.0905674397945404, "learning_rate": 1.720518661293149e-05, "loss": 9.0593, "step": 85690 }, { "epoch": 0.42797572973107945, "grad_norm": 0.09657610207796097, "learning_rate": 1.7203684697989937e-05, "loss": 9.0484, "step": 85700 }, { "epoch": 0.42802566855601887, "grad_norm": 0.09140429645776749, "learning_rate": 1.7202182783048387e-05, "loss": 9.0471, "step": 85710 }, { "epoch": 0.42807560738095835, "grad_norm": 0.09539390355348587, "learning_rate": 1.7200680868106837e-05, "loss": 9.0487, "step": 85720 }, { "epoch": 0.42812554620589777, "grad_norm": 0.09347163140773773, "learning_rate": 1.7199178953165287e-05, "loss": 9.0556, "step": 85730 }, { "epoch": 0.42817548503083724, "grad_norm": 0.08956640213727951, "learning_rate": 1.7197677038223737e-05, "loss": 9.0475, "step": 85740 }, { "epoch": 0.42822542385577667, "grad_norm": 0.09470704942941666, "learning_rate": 1.7196175123282184e-05, "loss": 9.0453, "step": 85750 }, { "epoch": 0.42827536268071614, "grad_norm": 0.0948714017868042, "learning_rate": 1.7194673208340634e-05, "loss": 9.0497, "step": 85760 }, { "epoch": 0.42832530150565556, "grad_norm": 0.09796865284442902, "learning_rate": 1.7193171293399085e-05, "loss": 9.0603, "step": 85770 }, { "epoch": 0.42837524033059504, "grad_norm": 0.09614430367946625, "learning_rate": 1.7191669378457535e-05, "loss": 9.0641, "step": 85780 }, { "epoch": 0.42842517915553446, "grad_norm": 0.09698038548231125, "learning_rate": 1.7190167463515985e-05, "loss": 9.0549, "step": 85790 }, { "epoch": 0.42847511798047394, "grad_norm": 0.08912073075771332, "learning_rate": 1.7188665548574432e-05, "loss": 9.0442, "step": 85800 }, { "epoch": 0.42852505680541336, "grad_norm": 0.0899166464805603, "learning_rate": 1.7187163633632882e-05, "loss": 9.04, "step": 85810 }, { "epoch": 0.42857499563035284, "grad_norm": 0.09650041162967682, "learning_rate": 1.7185661718691332e-05, "loss": 9.0408, "step": 85820 }, { "epoch": 0.42862493445529226, "grad_norm": 0.09182535111904144, "learning_rate": 1.7184159803749782e-05, "loss": 9.0617, "step": 85830 }, { "epoch": 0.42867487328023174, "grad_norm": 0.09780148416757584, "learning_rate": 1.7182657888808232e-05, "loss": 9.0625, "step": 85840 }, { "epoch": 0.42872481210517116, "grad_norm": 0.09503006935119629, "learning_rate": 1.718115597386668e-05, "loss": 9.0631, "step": 85850 }, { "epoch": 0.42877475093011064, "grad_norm": 0.09269234538078308, "learning_rate": 1.717965405892513e-05, "loss": 9.055, "step": 85860 }, { "epoch": 0.42882468975505006, "grad_norm": 0.09522657096385956, "learning_rate": 1.717815214398358e-05, "loss": 9.0593, "step": 85870 }, { "epoch": 0.42887462857998954, "grad_norm": 0.09069998562335968, "learning_rate": 1.717665022904203e-05, "loss": 9.0425, "step": 85880 }, { "epoch": 0.42892456740492896, "grad_norm": 0.09758871048688889, "learning_rate": 1.717514831410048e-05, "loss": 9.0392, "step": 85890 }, { "epoch": 0.42897450622986844, "grad_norm": 0.09257560968399048, "learning_rate": 1.7173646399158927e-05, "loss": 9.0638, "step": 85900 }, { "epoch": 0.42902444505480786, "grad_norm": 0.09189421683549881, "learning_rate": 1.7172144484217377e-05, "loss": 9.0469, "step": 85910 }, { "epoch": 0.42907438387974733, "grad_norm": 0.09272563457489014, "learning_rate": 1.7170642569275827e-05, "loss": 9.0536, "step": 85920 }, { "epoch": 0.42912432270468676, "grad_norm": 0.08748695999383926, "learning_rate": 1.7169140654334277e-05, "loss": 9.0641, "step": 85930 }, { "epoch": 0.42917426152962623, "grad_norm": 0.09138987958431244, "learning_rate": 1.7167638739392727e-05, "loss": 9.057, "step": 85940 }, { "epoch": 0.42922420035456565, "grad_norm": 0.09145860373973846, "learning_rate": 1.7166136824451174e-05, "loss": 9.0571, "step": 85950 }, { "epoch": 0.42927413917950513, "grad_norm": 0.09510332345962524, "learning_rate": 1.7164634909509624e-05, "loss": 9.0455, "step": 85960 }, { "epoch": 0.42932407800444455, "grad_norm": 0.09964827448129654, "learning_rate": 1.7163132994568075e-05, "loss": 9.0313, "step": 85970 }, { "epoch": 0.42937401682938403, "grad_norm": 0.09246429055929184, "learning_rate": 1.7161631079626525e-05, "loss": 9.0544, "step": 85980 }, { "epoch": 0.42942395565432345, "grad_norm": 0.10065965354442596, "learning_rate": 1.7160129164684975e-05, "loss": 9.0548, "step": 85990 }, { "epoch": 0.42947389447926293, "grad_norm": 0.09086819738149643, "learning_rate": 1.7158627249743425e-05, "loss": 9.0661, "step": 86000 }, { "epoch": 0.42952383330420235, "grad_norm": 0.09284672141075134, "learning_rate": 1.7157125334801872e-05, "loss": 9.0489, "step": 86010 }, { "epoch": 0.42957377212914183, "grad_norm": 0.09478854387998581, "learning_rate": 1.7155623419860322e-05, "loss": 9.0511, "step": 86020 }, { "epoch": 0.42962371095408125, "grad_norm": 0.0997512936592102, "learning_rate": 1.7154121504918772e-05, "loss": 9.0386, "step": 86030 }, { "epoch": 0.42967364977902067, "grad_norm": 0.0923648402094841, "learning_rate": 1.7152619589977222e-05, "loss": 9.0533, "step": 86040 }, { "epoch": 0.42972358860396015, "grad_norm": 0.09019633382558823, "learning_rate": 1.7151117675035673e-05, "loss": 9.0481, "step": 86050 }, { "epoch": 0.42977352742889957, "grad_norm": 0.09562560170888901, "learning_rate": 1.714961576009412e-05, "loss": 9.0553, "step": 86060 }, { "epoch": 0.42982346625383905, "grad_norm": 0.09037364274263382, "learning_rate": 1.714811384515257e-05, "loss": 9.0621, "step": 86070 }, { "epoch": 0.42987340507877847, "grad_norm": 0.09504220634698868, "learning_rate": 1.714661193021102e-05, "loss": 9.0369, "step": 86080 }, { "epoch": 0.42992334390371795, "grad_norm": 0.09927166253328323, "learning_rate": 1.714511001526947e-05, "loss": 9.0363, "step": 86090 }, { "epoch": 0.42997328272865737, "grad_norm": 0.09197903424501419, "learning_rate": 1.714360810032792e-05, "loss": 9.0647, "step": 86100 }, { "epoch": 0.43002322155359685, "grad_norm": 0.08889306336641312, "learning_rate": 1.7142106185386367e-05, "loss": 9.0551, "step": 86110 }, { "epoch": 0.43007316037853627, "grad_norm": 0.09508626163005829, "learning_rate": 1.7140604270444817e-05, "loss": 9.0492, "step": 86120 }, { "epoch": 0.43012309920347574, "grad_norm": 0.0925615131855011, "learning_rate": 1.7139102355503267e-05, "loss": 9.0463, "step": 86130 }, { "epoch": 0.43017303802841517, "grad_norm": 0.0957641676068306, "learning_rate": 1.7137600440561717e-05, "loss": 9.0549, "step": 86140 }, { "epoch": 0.43022297685335464, "grad_norm": 0.09168253093957901, "learning_rate": 1.7136098525620168e-05, "loss": 9.0504, "step": 86150 }, { "epoch": 0.43027291567829407, "grad_norm": 0.09538407623767853, "learning_rate": 1.7134596610678614e-05, "loss": 9.0464, "step": 86160 }, { "epoch": 0.43032285450323354, "grad_norm": 0.09280803054571152, "learning_rate": 1.7133094695737065e-05, "loss": 9.0527, "step": 86170 }, { "epoch": 0.43037279332817296, "grad_norm": 0.09550796449184418, "learning_rate": 1.7131592780795515e-05, "loss": 9.0521, "step": 86180 }, { "epoch": 0.43042273215311244, "grad_norm": 0.09481924027204514, "learning_rate": 1.7130090865853965e-05, "loss": 9.058, "step": 86190 }, { "epoch": 0.43047267097805186, "grad_norm": 0.0906360000371933, "learning_rate": 1.7128588950912415e-05, "loss": 9.0544, "step": 86200 }, { "epoch": 0.43052260980299134, "grad_norm": 0.1016702726483345, "learning_rate": 1.7127087035970862e-05, "loss": 9.0553, "step": 86210 }, { "epoch": 0.43057254862793076, "grad_norm": 0.0934482216835022, "learning_rate": 1.7125585121029312e-05, "loss": 9.0411, "step": 86220 }, { "epoch": 0.43062248745287024, "grad_norm": 0.09161151945590973, "learning_rate": 1.7124083206087762e-05, "loss": 9.054, "step": 86230 }, { "epoch": 0.43067242627780966, "grad_norm": 0.09385374188423157, "learning_rate": 1.7122581291146212e-05, "loss": 9.0394, "step": 86240 }, { "epoch": 0.43072236510274914, "grad_norm": 0.09769625961780548, "learning_rate": 1.7121079376204663e-05, "loss": 9.0444, "step": 86250 }, { "epoch": 0.43077230392768856, "grad_norm": 0.09559006243944168, "learning_rate": 1.711957746126311e-05, "loss": 9.0386, "step": 86260 }, { "epoch": 0.43082224275262804, "grad_norm": 0.0927807167172432, "learning_rate": 1.711807554632156e-05, "loss": 9.064, "step": 86270 }, { "epoch": 0.43087218157756746, "grad_norm": 0.09350989013910294, "learning_rate": 1.711657363138001e-05, "loss": 9.0505, "step": 86280 }, { "epoch": 0.43092212040250694, "grad_norm": 0.09480515867471695, "learning_rate": 1.711507171643846e-05, "loss": 9.0377, "step": 86290 }, { "epoch": 0.43097205922744636, "grad_norm": 0.0930512323975563, "learning_rate": 1.711356980149691e-05, "loss": 9.0554, "step": 86300 }, { "epoch": 0.43102199805238584, "grad_norm": 0.09537147730588913, "learning_rate": 1.7112067886555357e-05, "loss": 9.0552, "step": 86310 }, { "epoch": 0.43107193687732526, "grad_norm": 0.09353245049715042, "learning_rate": 1.711056597161381e-05, "loss": 9.043, "step": 86320 }, { "epoch": 0.43112187570226473, "grad_norm": 0.09246620535850525, "learning_rate": 1.7109064056672257e-05, "loss": 9.0528, "step": 86330 }, { "epoch": 0.43117181452720416, "grad_norm": 0.09530563652515411, "learning_rate": 1.7107562141730707e-05, "loss": 9.0463, "step": 86340 }, { "epoch": 0.43122175335214363, "grad_norm": 0.09027519822120667, "learning_rate": 1.7106060226789158e-05, "loss": 9.0485, "step": 86350 }, { "epoch": 0.43127169217708305, "grad_norm": 0.09005729854106903, "learning_rate": 1.7104558311847604e-05, "loss": 9.0492, "step": 86360 }, { "epoch": 0.43132163100202253, "grad_norm": 0.0943072959780693, "learning_rate": 1.7103056396906058e-05, "loss": 9.054, "step": 86370 }, { "epoch": 0.43137156982696195, "grad_norm": 0.09449702501296997, "learning_rate": 1.7101554481964505e-05, "loss": 9.0423, "step": 86380 }, { "epoch": 0.43142150865190143, "grad_norm": 0.09583057463169098, "learning_rate": 1.7100052567022955e-05, "loss": 9.0625, "step": 86390 }, { "epoch": 0.43147144747684085, "grad_norm": 0.09351655840873718, "learning_rate": 1.7098550652081405e-05, "loss": 9.0511, "step": 86400 }, { "epoch": 0.43152138630178033, "grad_norm": 0.0870162844657898, "learning_rate": 1.7097048737139852e-05, "loss": 9.0509, "step": 86410 }, { "epoch": 0.43157132512671975, "grad_norm": 0.09593592584133148, "learning_rate": 1.7095546822198306e-05, "loss": 9.0419, "step": 86420 }, { "epoch": 0.43162126395165923, "grad_norm": 0.0929575264453888, "learning_rate": 1.7094044907256752e-05, "loss": 9.0523, "step": 86430 }, { "epoch": 0.43167120277659865, "grad_norm": 0.09491048753261566, "learning_rate": 1.7092542992315202e-05, "loss": 9.0308, "step": 86440 }, { "epoch": 0.4317211416015381, "grad_norm": 0.09691637009382248, "learning_rate": 1.7091041077373653e-05, "loss": 9.0335, "step": 86450 }, { "epoch": 0.43177108042647755, "grad_norm": 0.09719613939523697, "learning_rate": 1.70895391624321e-05, "loss": 9.0532, "step": 86460 }, { "epoch": 0.431821019251417, "grad_norm": 0.08485887944698334, "learning_rate": 1.7088037247490553e-05, "loss": 9.045, "step": 86470 }, { "epoch": 0.43187095807635645, "grad_norm": 0.09217669814825058, "learning_rate": 1.7086535332549e-05, "loss": 9.0437, "step": 86480 }, { "epoch": 0.4319208969012959, "grad_norm": 0.09682594239711761, "learning_rate": 1.708503341760745e-05, "loss": 9.0475, "step": 86490 }, { "epoch": 0.43197083572623535, "grad_norm": 0.09146720916032791, "learning_rate": 1.70835315026659e-05, "loss": 9.0369, "step": 86500 }, { "epoch": 0.4320207745511748, "grad_norm": 0.09073561429977417, "learning_rate": 1.7082029587724347e-05, "loss": 9.0416, "step": 86510 }, { "epoch": 0.43207071337611425, "grad_norm": 0.09679694473743439, "learning_rate": 1.70805276727828e-05, "loss": 9.0619, "step": 86520 }, { "epoch": 0.4321206522010537, "grad_norm": 0.09839240461587906, "learning_rate": 1.7079025757841247e-05, "loss": 9.0497, "step": 86530 }, { "epoch": 0.43217059102599314, "grad_norm": 0.09931270033121109, "learning_rate": 1.7077523842899697e-05, "loss": 9.055, "step": 86540 }, { "epoch": 0.4322205298509326, "grad_norm": 0.09096875041723251, "learning_rate": 1.7076021927958148e-05, "loss": 9.0453, "step": 86550 }, { "epoch": 0.43227046867587204, "grad_norm": 0.08868992328643799, "learning_rate": 1.7074520013016594e-05, "loss": 9.0377, "step": 86560 }, { "epoch": 0.4323204075008115, "grad_norm": 0.09178798645734787, "learning_rate": 1.7073018098075048e-05, "loss": 9.0494, "step": 86570 }, { "epoch": 0.43237034632575094, "grad_norm": 0.09545016288757324, "learning_rate": 1.7071516183133495e-05, "loss": 9.0489, "step": 86580 }, { "epoch": 0.4324202851506904, "grad_norm": 0.0965125560760498, "learning_rate": 1.7070014268191945e-05, "loss": 9.0364, "step": 86590 }, { "epoch": 0.43247022397562984, "grad_norm": 0.09696787595748901, "learning_rate": 1.7068512353250395e-05, "loss": 9.0447, "step": 86600 }, { "epoch": 0.4325201628005693, "grad_norm": 0.09214460849761963, "learning_rate": 1.7067010438308842e-05, "loss": 9.0573, "step": 86610 }, { "epoch": 0.43257010162550874, "grad_norm": 0.09552972763776779, "learning_rate": 1.7065508523367296e-05, "loss": 9.0402, "step": 86620 }, { "epoch": 0.4326200404504482, "grad_norm": 0.09437466412782669, "learning_rate": 1.7064006608425742e-05, "loss": 9.0492, "step": 86630 }, { "epoch": 0.43266997927538764, "grad_norm": 0.09180578589439392, "learning_rate": 1.7062504693484196e-05, "loss": 9.0461, "step": 86640 }, { "epoch": 0.4327199181003271, "grad_norm": 0.0951187014579773, "learning_rate": 1.7061002778542643e-05, "loss": 9.0486, "step": 86650 }, { "epoch": 0.43276985692526654, "grad_norm": 0.0927475169301033, "learning_rate": 1.705950086360109e-05, "loss": 9.04, "step": 86660 }, { "epoch": 0.432819795750206, "grad_norm": 0.09508585184812546, "learning_rate": 1.7057998948659543e-05, "loss": 9.0499, "step": 86670 }, { "epoch": 0.43286973457514544, "grad_norm": 0.09572357684373856, "learning_rate": 1.705649703371799e-05, "loss": 9.0267, "step": 86680 }, { "epoch": 0.4329196734000849, "grad_norm": 0.09100820124149323, "learning_rate": 1.7054995118776443e-05, "loss": 9.0476, "step": 86690 }, { "epoch": 0.43296961222502434, "grad_norm": 0.09594081342220306, "learning_rate": 1.705349320383489e-05, "loss": 9.0425, "step": 86700 }, { "epoch": 0.4330195510499638, "grad_norm": 0.09707988053560257, "learning_rate": 1.7051991288893337e-05, "loss": 9.0399, "step": 86710 }, { "epoch": 0.43306948987490324, "grad_norm": 0.09649763256311417, "learning_rate": 1.705048937395179e-05, "loss": 9.041, "step": 86720 }, { "epoch": 0.4331194286998427, "grad_norm": 0.0913090854883194, "learning_rate": 1.7048987459010237e-05, "loss": 9.0297, "step": 86730 }, { "epoch": 0.43316936752478213, "grad_norm": 0.09227044880390167, "learning_rate": 1.704748554406869e-05, "loss": 9.0434, "step": 86740 }, { "epoch": 0.4332193063497216, "grad_norm": 0.09608667343854904, "learning_rate": 1.7045983629127138e-05, "loss": 9.0428, "step": 86750 }, { "epoch": 0.43326924517466103, "grad_norm": 0.09107893705368042, "learning_rate": 1.7044481714185584e-05, "loss": 9.0533, "step": 86760 }, { "epoch": 0.4333191839996005, "grad_norm": 0.09340152889490128, "learning_rate": 1.7042979799244038e-05, "loss": 9.0453, "step": 86770 }, { "epoch": 0.43336912282453993, "grad_norm": 0.0952560156583786, "learning_rate": 1.7041477884302485e-05, "loss": 9.0421, "step": 86780 }, { "epoch": 0.4334190616494794, "grad_norm": 0.09432324022054672, "learning_rate": 1.703997596936094e-05, "loss": 9.0402, "step": 86790 }, { "epoch": 0.43346900047441883, "grad_norm": 0.09584697335958481, "learning_rate": 1.7038474054419385e-05, "loss": 9.0374, "step": 86800 }, { "epoch": 0.4335189392993583, "grad_norm": 0.09084445238113403, "learning_rate": 1.7036972139477832e-05, "loss": 9.0376, "step": 86810 }, { "epoch": 0.43356887812429773, "grad_norm": 0.09363221377134323, "learning_rate": 1.7035470224536286e-05, "loss": 9.0522, "step": 86820 }, { "epoch": 0.4336188169492372, "grad_norm": 0.09301798045635223, "learning_rate": 1.7033968309594732e-05, "loss": 9.0359, "step": 86830 }, { "epoch": 0.43366875577417663, "grad_norm": 0.09216799587011337, "learning_rate": 1.7032466394653186e-05, "loss": 9.0435, "step": 86840 }, { "epoch": 0.4337186945991161, "grad_norm": 0.09435062110424042, "learning_rate": 1.7030964479711633e-05, "loss": 9.0433, "step": 86850 }, { "epoch": 0.4337686334240555, "grad_norm": 0.08969014137983322, "learning_rate": 1.702946256477008e-05, "loss": 9.0326, "step": 86860 }, { "epoch": 0.433818572248995, "grad_norm": 0.09770084917545319, "learning_rate": 1.7027960649828533e-05, "loss": 9.0366, "step": 86870 }, { "epoch": 0.4338685110739344, "grad_norm": 0.0956239253282547, "learning_rate": 1.702645873488698e-05, "loss": 9.0302, "step": 86880 }, { "epoch": 0.4339184498988739, "grad_norm": 0.09152868390083313, "learning_rate": 1.7024956819945433e-05, "loss": 9.0351, "step": 86890 }, { "epoch": 0.4339683887238133, "grad_norm": 0.09322716295719147, "learning_rate": 1.702345490500388e-05, "loss": 9.0341, "step": 86900 }, { "epoch": 0.4340183275487528, "grad_norm": 0.08923067152500153, "learning_rate": 1.7021952990062327e-05, "loss": 9.0475, "step": 86910 }, { "epoch": 0.4340682663736922, "grad_norm": 0.09851467609405518, "learning_rate": 1.702045107512078e-05, "loss": 9.0337, "step": 86920 }, { "epoch": 0.4341182051986317, "grad_norm": 0.09365841746330261, "learning_rate": 1.7018949160179227e-05, "loss": 9.0379, "step": 86930 }, { "epoch": 0.4341681440235711, "grad_norm": 0.09401962906122208, "learning_rate": 1.701744724523768e-05, "loss": 9.053, "step": 86940 }, { "epoch": 0.4342180828485106, "grad_norm": 0.09199943393468857, "learning_rate": 1.7015945330296128e-05, "loss": 9.0282, "step": 86950 }, { "epoch": 0.43426802167345, "grad_norm": 0.09017936885356903, "learning_rate": 1.7014443415354578e-05, "loss": 9.0395, "step": 86960 }, { "epoch": 0.4343179604983895, "grad_norm": 0.09726618975400925, "learning_rate": 1.7012941500413028e-05, "loss": 9.0404, "step": 86970 }, { "epoch": 0.4343678993233289, "grad_norm": 0.0918731540441513, "learning_rate": 1.7011439585471475e-05, "loss": 9.0201, "step": 86980 }, { "epoch": 0.4344178381482684, "grad_norm": 0.09042965620756149, "learning_rate": 1.700993767052993e-05, "loss": 9.035, "step": 86990 }, { "epoch": 0.4344677769732078, "grad_norm": 0.09324657171964645, "learning_rate": 1.7008435755588375e-05, "loss": 9.0361, "step": 87000 }, { "epoch": 0.4345177157981473, "grad_norm": 0.09195488691329956, "learning_rate": 1.7006933840646825e-05, "loss": 9.049, "step": 87010 }, { "epoch": 0.4345676546230867, "grad_norm": 0.08934830129146576, "learning_rate": 1.7005431925705276e-05, "loss": 9.0324, "step": 87020 }, { "epoch": 0.43461759344802614, "grad_norm": 0.0945764034986496, "learning_rate": 1.7003930010763722e-05, "loss": 9.0484, "step": 87030 }, { "epoch": 0.4346675322729656, "grad_norm": 0.09223344922065735, "learning_rate": 1.7002428095822176e-05, "loss": 9.0433, "step": 87040 }, { "epoch": 0.43471747109790504, "grad_norm": 0.09764394164085388, "learning_rate": 1.7000926180880623e-05, "loss": 9.0318, "step": 87050 }, { "epoch": 0.4347674099228445, "grad_norm": 0.08980736136436462, "learning_rate": 1.6999424265939073e-05, "loss": 9.0387, "step": 87060 }, { "epoch": 0.43481734874778394, "grad_norm": 0.0914120152592659, "learning_rate": 1.6997922350997523e-05, "loss": 9.052, "step": 87070 }, { "epoch": 0.4348672875727234, "grad_norm": 0.09246059507131577, "learning_rate": 1.699642043605597e-05, "loss": 9.0412, "step": 87080 }, { "epoch": 0.43491722639766284, "grad_norm": 0.09065555036067963, "learning_rate": 1.6994918521114423e-05, "loss": 9.035, "step": 87090 }, { "epoch": 0.4349671652226023, "grad_norm": 0.0936904326081276, "learning_rate": 1.699341660617287e-05, "loss": 9.0367, "step": 87100 }, { "epoch": 0.43501710404754174, "grad_norm": 0.1051855981349945, "learning_rate": 1.699191469123132e-05, "loss": 9.0431, "step": 87110 }, { "epoch": 0.4350670428724812, "grad_norm": 0.08944005519151688, "learning_rate": 1.699041277628977e-05, "loss": 9.0287, "step": 87120 }, { "epoch": 0.43511698169742064, "grad_norm": 0.09485218673944473, "learning_rate": 1.6988910861348217e-05, "loss": 9.0493, "step": 87130 }, { "epoch": 0.4351669205223601, "grad_norm": 0.09217767417430878, "learning_rate": 1.698740894640667e-05, "loss": 9.0362, "step": 87140 }, { "epoch": 0.43521685934729953, "grad_norm": 0.09442111849784851, "learning_rate": 1.6985907031465118e-05, "loss": 9.0246, "step": 87150 }, { "epoch": 0.435266798172239, "grad_norm": 0.10553400963544846, "learning_rate": 1.6984405116523568e-05, "loss": 9.0343, "step": 87160 }, { "epoch": 0.43531673699717843, "grad_norm": 0.08932023495435715, "learning_rate": 1.6982903201582018e-05, "loss": 9.0449, "step": 87170 }, { "epoch": 0.4353666758221179, "grad_norm": 0.09344980865716934, "learning_rate": 1.6981401286640465e-05, "loss": 9.0279, "step": 87180 }, { "epoch": 0.43541661464705733, "grad_norm": 0.09347915649414062, "learning_rate": 1.697989937169892e-05, "loss": 9.0341, "step": 87190 }, { "epoch": 0.4354665534719968, "grad_norm": 0.0941552221775055, "learning_rate": 1.6978397456757365e-05, "loss": 9.0331, "step": 87200 }, { "epoch": 0.43551649229693623, "grad_norm": 0.09416943788528442, "learning_rate": 1.6976895541815815e-05, "loss": 9.0384, "step": 87210 }, { "epoch": 0.4355664311218757, "grad_norm": 0.0943121612071991, "learning_rate": 1.6975393626874266e-05, "loss": 9.038, "step": 87220 }, { "epoch": 0.43561636994681513, "grad_norm": 0.09376168996095657, "learning_rate": 1.6973891711932712e-05, "loss": 9.0261, "step": 87230 }, { "epoch": 0.4356663087717546, "grad_norm": 0.09052184224128723, "learning_rate": 1.6972389796991166e-05, "loss": 9.0356, "step": 87240 }, { "epoch": 0.43571624759669403, "grad_norm": 0.0898606926202774, "learning_rate": 1.6970887882049613e-05, "loss": 9.0347, "step": 87250 }, { "epoch": 0.4357661864216335, "grad_norm": 0.09599337726831436, "learning_rate": 1.6969385967108063e-05, "loss": 9.0272, "step": 87260 }, { "epoch": 0.4358161252465729, "grad_norm": 0.093189537525177, "learning_rate": 1.6967884052166513e-05, "loss": 9.033, "step": 87270 }, { "epoch": 0.4358660640715124, "grad_norm": 0.09416911005973816, "learning_rate": 1.6966382137224963e-05, "loss": 9.0371, "step": 87280 }, { "epoch": 0.4359160028964518, "grad_norm": 0.09415122121572495, "learning_rate": 1.6964880222283413e-05, "loss": 9.0361, "step": 87290 }, { "epoch": 0.4359659417213913, "grad_norm": 0.09333626925945282, "learning_rate": 1.696337830734186e-05, "loss": 9.0317, "step": 87300 }, { "epoch": 0.4360158805463307, "grad_norm": 0.09361656755208969, "learning_rate": 1.696187639240031e-05, "loss": 9.0233, "step": 87310 }, { "epoch": 0.4360658193712702, "grad_norm": 0.0928577408194542, "learning_rate": 1.696037447745876e-05, "loss": 9.044, "step": 87320 }, { "epoch": 0.4361157581962096, "grad_norm": 0.09233950078487396, "learning_rate": 1.695887256251721e-05, "loss": 9.0395, "step": 87330 }, { "epoch": 0.4361656970211491, "grad_norm": 0.09364930540323257, "learning_rate": 1.695737064757566e-05, "loss": 9.0438, "step": 87340 }, { "epoch": 0.4362156358460885, "grad_norm": 0.09141746908426285, "learning_rate": 1.6955868732634108e-05, "loss": 9.0329, "step": 87350 }, { "epoch": 0.436265574671028, "grad_norm": 0.08941878378391266, "learning_rate": 1.6954366817692558e-05, "loss": 9.0257, "step": 87360 }, { "epoch": 0.4363155134959674, "grad_norm": 0.09561556577682495, "learning_rate": 1.6952864902751008e-05, "loss": 9.0315, "step": 87370 }, { "epoch": 0.4363654523209069, "grad_norm": 0.09164825826883316, "learning_rate": 1.6951362987809458e-05, "loss": 9.027, "step": 87380 }, { "epoch": 0.4364153911458463, "grad_norm": 0.0955611914396286, "learning_rate": 1.694986107286791e-05, "loss": 9.034, "step": 87390 }, { "epoch": 0.4364653299707858, "grad_norm": 0.09065394103527069, "learning_rate": 1.6948359157926355e-05, "loss": 9.0308, "step": 87400 }, { "epoch": 0.4365152687957252, "grad_norm": 0.09663858264684677, "learning_rate": 1.6946857242984805e-05, "loss": 9.0187, "step": 87410 }, { "epoch": 0.4365652076206647, "grad_norm": 0.0890546515583992, "learning_rate": 1.6945355328043256e-05, "loss": 9.0392, "step": 87420 }, { "epoch": 0.4366151464456041, "grad_norm": 0.09616228938102722, "learning_rate": 1.6943853413101706e-05, "loss": 9.0342, "step": 87430 }, { "epoch": 0.4366650852705436, "grad_norm": 0.10570753365755081, "learning_rate": 1.6942351498160156e-05, "loss": 9.0383, "step": 87440 }, { "epoch": 0.436715024095483, "grad_norm": 0.09239917248487473, "learning_rate": 1.6940849583218603e-05, "loss": 9.0355, "step": 87450 }, { "epoch": 0.4367649629204225, "grad_norm": 0.09370817989110947, "learning_rate": 1.6939347668277053e-05, "loss": 9.0242, "step": 87460 }, { "epoch": 0.4368149017453619, "grad_norm": 0.09490583091974258, "learning_rate": 1.6937845753335503e-05, "loss": 9.0441, "step": 87470 }, { "epoch": 0.4368648405703014, "grad_norm": 0.0928240418434143, "learning_rate": 1.6936343838393953e-05, "loss": 9.0362, "step": 87480 }, { "epoch": 0.4369147793952408, "grad_norm": 0.09075388312339783, "learning_rate": 1.6934841923452403e-05, "loss": 9.0322, "step": 87490 }, { "epoch": 0.4369647182201803, "grad_norm": 0.09307154268026352, "learning_rate": 1.693334000851085e-05, "loss": 9.0344, "step": 87500 }, { "epoch": 0.4370146570451197, "grad_norm": 0.0918651670217514, "learning_rate": 1.69318380935693e-05, "loss": 9.0393, "step": 87510 }, { "epoch": 0.4370645958700592, "grad_norm": 0.0964551717042923, "learning_rate": 1.693033617862775e-05, "loss": 9.0446, "step": 87520 }, { "epoch": 0.4371145346949986, "grad_norm": 0.09495722502470016, "learning_rate": 1.69288342636862e-05, "loss": 9.0284, "step": 87530 }, { "epoch": 0.4371644735199381, "grad_norm": 0.09483332186937332, "learning_rate": 1.692733234874465e-05, "loss": 9.0341, "step": 87540 }, { "epoch": 0.4372144123448775, "grad_norm": 0.08984597027301788, "learning_rate": 1.6925830433803098e-05, "loss": 9.0351, "step": 87550 }, { "epoch": 0.437264351169817, "grad_norm": 0.09564351290464401, "learning_rate": 1.6924328518861548e-05, "loss": 9.023, "step": 87560 }, { "epoch": 0.4373142899947564, "grad_norm": 0.09509795159101486, "learning_rate": 1.6922826603919998e-05, "loss": 9.0338, "step": 87570 }, { "epoch": 0.4373642288196959, "grad_norm": 0.09072297811508179, "learning_rate": 1.6921324688978448e-05, "loss": 9.0442, "step": 87580 }, { "epoch": 0.4374141676446353, "grad_norm": 0.09715980291366577, "learning_rate": 1.69198227740369e-05, "loss": 9.0252, "step": 87590 }, { "epoch": 0.4374641064695748, "grad_norm": 0.09227906912565231, "learning_rate": 1.691832085909535e-05, "loss": 9.0273, "step": 87600 }, { "epoch": 0.4375140452945142, "grad_norm": 0.10533630102872849, "learning_rate": 1.6916818944153795e-05, "loss": 9.0322, "step": 87610 }, { "epoch": 0.4375639841194537, "grad_norm": 0.09329358488321304, "learning_rate": 1.6915317029212246e-05, "loss": 9.0214, "step": 87620 }, { "epoch": 0.4376139229443931, "grad_norm": 0.09366413950920105, "learning_rate": 1.6913815114270696e-05, "loss": 9.0308, "step": 87630 }, { "epoch": 0.4376638617693326, "grad_norm": 0.10361622273921967, "learning_rate": 1.6912313199329146e-05, "loss": 9.0371, "step": 87640 }, { "epoch": 0.437713800594272, "grad_norm": 0.09569653123617172, "learning_rate": 1.6910811284387596e-05, "loss": 9.0347, "step": 87650 }, { "epoch": 0.4377637394192115, "grad_norm": 0.09628830850124359, "learning_rate": 1.6909309369446043e-05, "loss": 9.0361, "step": 87660 }, { "epoch": 0.4378136782441509, "grad_norm": 0.09476719051599503, "learning_rate": 1.6907807454504493e-05, "loss": 9.0287, "step": 87670 }, { "epoch": 0.4378636170690904, "grad_norm": 0.09486107528209686, "learning_rate": 1.6906305539562943e-05, "loss": 9.0361, "step": 87680 }, { "epoch": 0.4379135558940298, "grad_norm": 0.08894047141075134, "learning_rate": 1.6904803624621393e-05, "loss": 9.0196, "step": 87690 }, { "epoch": 0.4379634947189693, "grad_norm": 0.09540939331054688, "learning_rate": 1.6903301709679844e-05, "loss": 9.039, "step": 87700 }, { "epoch": 0.4380134335439087, "grad_norm": 0.09612215310335159, "learning_rate": 1.690179979473829e-05, "loss": 9.0244, "step": 87710 }, { "epoch": 0.4380633723688482, "grad_norm": 0.0884467363357544, "learning_rate": 1.690029787979674e-05, "loss": 9.0421, "step": 87720 }, { "epoch": 0.4381133111937876, "grad_norm": 0.09004423022270203, "learning_rate": 1.689879596485519e-05, "loss": 9.0254, "step": 87730 }, { "epoch": 0.4381632500187271, "grad_norm": 0.09423902630805969, "learning_rate": 1.689729404991364e-05, "loss": 9.0198, "step": 87740 }, { "epoch": 0.4382131888436665, "grad_norm": 0.09269099682569504, "learning_rate": 1.689579213497209e-05, "loss": 9.0444, "step": 87750 }, { "epoch": 0.438263127668606, "grad_norm": 0.0918196514248848, "learning_rate": 1.6894290220030538e-05, "loss": 9.024, "step": 87760 }, { "epoch": 0.4383130664935454, "grad_norm": 0.09599689394235611, "learning_rate": 1.6892788305088988e-05, "loss": 9.0233, "step": 87770 }, { "epoch": 0.4383630053184849, "grad_norm": 0.09426075220108032, "learning_rate": 1.6891286390147438e-05, "loss": 9.0151, "step": 87780 }, { "epoch": 0.4384129441434243, "grad_norm": 0.09202740341424942, "learning_rate": 1.688978447520589e-05, "loss": 9.0304, "step": 87790 }, { "epoch": 0.4384628829683638, "grad_norm": 0.09728818386793137, "learning_rate": 1.688828256026434e-05, "loss": 9.0325, "step": 87800 }, { "epoch": 0.4385128217933032, "grad_norm": 0.09795814007520676, "learning_rate": 1.6886780645322785e-05, "loss": 9.0093, "step": 87810 }, { "epoch": 0.4385627606182427, "grad_norm": 0.0872364342212677, "learning_rate": 1.6885278730381236e-05, "loss": 9.0299, "step": 87820 }, { "epoch": 0.4386126994431821, "grad_norm": 0.09706996381282806, "learning_rate": 1.6883776815439686e-05, "loss": 9.0341, "step": 87830 }, { "epoch": 0.4386626382681216, "grad_norm": 0.08879675716161728, "learning_rate": 1.6882274900498136e-05, "loss": 9.0358, "step": 87840 }, { "epoch": 0.438712577093061, "grad_norm": 0.09184462577104568, "learning_rate": 1.6880772985556586e-05, "loss": 9.0178, "step": 87850 }, { "epoch": 0.4387625159180005, "grad_norm": 0.09184456616640091, "learning_rate": 1.6879271070615033e-05, "loss": 9.0246, "step": 87860 }, { "epoch": 0.4388124547429399, "grad_norm": 0.08778972923755646, "learning_rate": 1.6877769155673483e-05, "loss": 9.0291, "step": 87870 }, { "epoch": 0.43886239356787937, "grad_norm": 0.09503248333930969, "learning_rate": 1.6876267240731933e-05, "loss": 9.0374, "step": 87880 }, { "epoch": 0.4389123323928188, "grad_norm": 0.08978200703859329, "learning_rate": 1.6874765325790383e-05, "loss": 9.0381, "step": 87890 }, { "epoch": 0.43896227121775827, "grad_norm": 0.098361074924469, "learning_rate": 1.6873263410848834e-05, "loss": 9.0196, "step": 87900 }, { "epoch": 0.4390122100426977, "grad_norm": 0.09547007083892822, "learning_rate": 1.687176149590728e-05, "loss": 9.0084, "step": 87910 }, { "epoch": 0.43906214886763717, "grad_norm": 0.09400343149900436, "learning_rate": 1.6870259580965734e-05, "loss": 9.0175, "step": 87920 }, { "epoch": 0.4391120876925766, "grad_norm": 0.09383000433444977, "learning_rate": 1.686875766602418e-05, "loss": 9.0114, "step": 87930 }, { "epoch": 0.43916202651751607, "grad_norm": 0.09463747590780258, "learning_rate": 1.686725575108263e-05, "loss": 9.0246, "step": 87940 }, { "epoch": 0.4392119653424555, "grad_norm": 0.09992758929729462, "learning_rate": 1.686575383614108e-05, "loss": 9.0191, "step": 87950 }, { "epoch": 0.43926190416739497, "grad_norm": 0.09506499767303467, "learning_rate": 1.6864251921199528e-05, "loss": 9.0212, "step": 87960 }, { "epoch": 0.4393118429923344, "grad_norm": 0.09309326857328415, "learning_rate": 1.686275000625798e-05, "loss": 9.0304, "step": 87970 }, { "epoch": 0.43936178181727387, "grad_norm": 0.09387887269258499, "learning_rate": 1.686124809131643e-05, "loss": 9.0263, "step": 87980 }, { "epoch": 0.4394117206422133, "grad_norm": 0.09608437120914459, "learning_rate": 1.685974617637488e-05, "loss": 9.0282, "step": 87990 }, { "epoch": 0.43946165946715277, "grad_norm": 0.10036759823560715, "learning_rate": 1.685824426143333e-05, "loss": 9.0298, "step": 88000 }, { "epoch": 0.4395115982920922, "grad_norm": 0.09929006546735764, "learning_rate": 1.6856742346491775e-05, "loss": 9.0269, "step": 88010 }, { "epoch": 0.4395615371170316, "grad_norm": 0.08878950029611588, "learning_rate": 1.685524043155023e-05, "loss": 9.0244, "step": 88020 }, { "epoch": 0.4396114759419711, "grad_norm": 0.09790655225515366, "learning_rate": 1.6853738516608676e-05, "loss": 9.0289, "step": 88030 }, { "epoch": 0.4396614147669105, "grad_norm": 0.09261323511600494, "learning_rate": 1.6852236601667126e-05, "loss": 9.032, "step": 88040 }, { "epoch": 0.43971135359185, "grad_norm": 0.09531623125076294, "learning_rate": 1.6850734686725576e-05, "loss": 9.0355, "step": 88050 }, { "epoch": 0.4397612924167894, "grad_norm": 0.09662208706140518, "learning_rate": 1.6849232771784023e-05, "loss": 9.0177, "step": 88060 }, { "epoch": 0.4398112312417289, "grad_norm": 0.09118068218231201, "learning_rate": 1.6847730856842477e-05, "loss": 9.017, "step": 88070 }, { "epoch": 0.4398611700666683, "grad_norm": 0.09556883573532104, "learning_rate": 1.6846228941900923e-05, "loss": 9.0367, "step": 88080 }, { "epoch": 0.4399111088916078, "grad_norm": 0.08983524143695831, "learning_rate": 1.6844727026959373e-05, "loss": 9.0285, "step": 88090 }, { "epoch": 0.4399610477165472, "grad_norm": 0.09559369832277298, "learning_rate": 1.6843225112017824e-05, "loss": 9.0309, "step": 88100 }, { "epoch": 0.4400109865414867, "grad_norm": 0.09884733706712723, "learning_rate": 1.684172319707627e-05, "loss": 9.0156, "step": 88110 }, { "epoch": 0.4400609253664261, "grad_norm": 0.08653125166893005, "learning_rate": 1.6840221282134724e-05, "loss": 9.0307, "step": 88120 }, { "epoch": 0.4401108641913656, "grad_norm": 0.09182146936655045, "learning_rate": 1.683871936719317e-05, "loss": 9.0374, "step": 88130 }, { "epoch": 0.440160803016305, "grad_norm": 0.091147780418396, "learning_rate": 1.683721745225162e-05, "loss": 9.0369, "step": 88140 }, { "epoch": 0.4402107418412445, "grad_norm": 0.09663031250238419, "learning_rate": 1.683571553731007e-05, "loss": 9.0242, "step": 88150 }, { "epoch": 0.4402606806661839, "grad_norm": 0.09547109156847, "learning_rate": 1.6834213622368518e-05, "loss": 9.0246, "step": 88160 }, { "epoch": 0.4403106194911234, "grad_norm": 0.09579718112945557, "learning_rate": 1.683271170742697e-05, "loss": 9.022, "step": 88170 }, { "epoch": 0.4403605583160628, "grad_norm": 0.09461940824985504, "learning_rate": 1.683120979248542e-05, "loss": 9.0285, "step": 88180 }, { "epoch": 0.4404104971410023, "grad_norm": 0.09216568619012833, "learning_rate": 1.682970787754387e-05, "loss": 9.0253, "step": 88190 }, { "epoch": 0.4404604359659417, "grad_norm": 0.08952445536851883, "learning_rate": 1.682820596260232e-05, "loss": 9.0147, "step": 88200 }, { "epoch": 0.4405103747908812, "grad_norm": 0.09351477026939392, "learning_rate": 1.6826704047660765e-05, "loss": 9.0297, "step": 88210 }, { "epoch": 0.4405603136158206, "grad_norm": 0.09434119611978531, "learning_rate": 1.682520213271922e-05, "loss": 9.0314, "step": 88220 }, { "epoch": 0.4406102524407601, "grad_norm": 0.09438735991716385, "learning_rate": 1.6823700217777666e-05, "loss": 9.0262, "step": 88230 }, { "epoch": 0.4406601912656995, "grad_norm": 0.08906865119934082, "learning_rate": 1.6822198302836116e-05, "loss": 9.0275, "step": 88240 }, { "epoch": 0.440710130090639, "grad_norm": 0.09299363940954208, "learning_rate": 1.6820696387894566e-05, "loss": 9.0376, "step": 88250 }, { "epoch": 0.4407600689155784, "grad_norm": 0.09099581837654114, "learning_rate": 1.6819194472953013e-05, "loss": 9.03, "step": 88260 }, { "epoch": 0.4408100077405179, "grad_norm": 0.08823831379413605, "learning_rate": 1.6817692558011467e-05, "loss": 9.0356, "step": 88270 }, { "epoch": 0.4408599465654573, "grad_norm": 0.0898958370089531, "learning_rate": 1.6816190643069913e-05, "loss": 9.0328, "step": 88280 }, { "epoch": 0.44090988539039677, "grad_norm": 0.09236274659633636, "learning_rate": 1.6814688728128367e-05, "loss": 9.0355, "step": 88290 }, { "epoch": 0.4409598242153362, "grad_norm": 0.08883727341890335, "learning_rate": 1.6813186813186814e-05, "loss": 9.0354, "step": 88300 }, { "epoch": 0.44100976304027567, "grad_norm": 0.09699812531471252, "learning_rate": 1.681168489824526e-05, "loss": 9.0269, "step": 88310 }, { "epoch": 0.4410597018652151, "grad_norm": 0.09533574432134628, "learning_rate": 1.6810182983303714e-05, "loss": 9.0313, "step": 88320 }, { "epoch": 0.44110964069015457, "grad_norm": 0.09363710880279541, "learning_rate": 1.680868106836216e-05, "loss": 9.0249, "step": 88330 }, { "epoch": 0.441159579515094, "grad_norm": 0.09056729823350906, "learning_rate": 1.6807179153420614e-05, "loss": 9.013, "step": 88340 }, { "epoch": 0.44120951834003347, "grad_norm": 0.08941638469696045, "learning_rate": 1.680567723847906e-05, "loss": 9.0277, "step": 88350 }, { "epoch": 0.4412594571649729, "grad_norm": 0.09786325693130493, "learning_rate": 1.6804175323537508e-05, "loss": 9.0257, "step": 88360 }, { "epoch": 0.44130939598991237, "grad_norm": 0.09090113639831543, "learning_rate": 1.680267340859596e-05, "loss": 9.0268, "step": 88370 }, { "epoch": 0.4413593348148518, "grad_norm": 0.09401243180036545, "learning_rate": 1.680117149365441e-05, "loss": 9.0274, "step": 88380 }, { "epoch": 0.44140927363979127, "grad_norm": 0.09182031452655792, "learning_rate": 1.6799669578712862e-05, "loss": 9.0217, "step": 88390 }, { "epoch": 0.4414592124647307, "grad_norm": 0.09471350908279419, "learning_rate": 1.679816766377131e-05, "loss": 9.0152, "step": 88400 }, { "epoch": 0.44150915128967017, "grad_norm": 0.0950457975268364, "learning_rate": 1.6796665748829755e-05, "loss": 9.0367, "step": 88410 }, { "epoch": 0.4415590901146096, "grad_norm": 0.09334580600261688, "learning_rate": 1.679516383388821e-05, "loss": 9.0324, "step": 88420 }, { "epoch": 0.44160902893954906, "grad_norm": 0.09345501661300659, "learning_rate": 1.6793661918946656e-05, "loss": 9.0421, "step": 88430 }, { "epoch": 0.4416589677644885, "grad_norm": 0.09563722461462021, "learning_rate": 1.679216000400511e-05, "loss": 9.0331, "step": 88440 }, { "epoch": 0.44170890658942796, "grad_norm": 0.09366366267204285, "learning_rate": 1.6790658089063556e-05, "loss": 9.022, "step": 88450 }, { "epoch": 0.4417588454143674, "grad_norm": 0.09602910280227661, "learning_rate": 1.6789156174122003e-05, "loss": 9.0281, "step": 88460 }, { "epoch": 0.44180878423930686, "grad_norm": 0.09258101880550385, "learning_rate": 1.6787654259180457e-05, "loss": 9.0255, "step": 88470 }, { "epoch": 0.4418587230642463, "grad_norm": 0.09126337617635727, "learning_rate": 1.6786152344238903e-05, "loss": 9.0267, "step": 88480 }, { "epoch": 0.44190866188918576, "grad_norm": 0.0905313566327095, "learning_rate": 1.6784650429297357e-05, "loss": 9.0256, "step": 88490 }, { "epoch": 0.4419586007141252, "grad_norm": 0.09191538393497467, "learning_rate": 1.6783148514355804e-05, "loss": 9.0323, "step": 88500 }, { "epoch": 0.44200853953906466, "grad_norm": 0.08896350860595703, "learning_rate": 1.678164659941425e-05, "loss": 9.0267, "step": 88510 }, { "epoch": 0.4420584783640041, "grad_norm": 0.0963086411356926, "learning_rate": 1.6780144684472704e-05, "loss": 9.0095, "step": 88520 }, { "epoch": 0.44210841718894356, "grad_norm": 0.09583403170108795, "learning_rate": 1.677864276953115e-05, "loss": 9.0238, "step": 88530 }, { "epoch": 0.442158356013883, "grad_norm": 0.09216772019863129, "learning_rate": 1.6777140854589604e-05, "loss": 9.0123, "step": 88540 }, { "epoch": 0.44220829483882246, "grad_norm": 0.09259237349033356, "learning_rate": 1.677563893964805e-05, "loss": 9.024, "step": 88550 }, { "epoch": 0.4422582336637619, "grad_norm": 0.09866563975811005, "learning_rate": 1.6774137024706498e-05, "loss": 9.0138, "step": 88560 }, { "epoch": 0.44230817248870136, "grad_norm": 0.09354985505342484, "learning_rate": 1.677263510976495e-05, "loss": 9.026, "step": 88570 }, { "epoch": 0.4423581113136408, "grad_norm": 0.10093557089567184, "learning_rate": 1.67711331948234e-05, "loss": 9.0229, "step": 88580 }, { "epoch": 0.44240805013858026, "grad_norm": 0.09272787719964981, "learning_rate": 1.6769631279881852e-05, "loss": 9.0249, "step": 88590 }, { "epoch": 0.4424579889635197, "grad_norm": 0.09445803612470627, "learning_rate": 1.67681293649403e-05, "loss": 9.0102, "step": 88600 }, { "epoch": 0.44250792778845915, "grad_norm": 0.09491786360740662, "learning_rate": 1.676662744999875e-05, "loss": 9.0247, "step": 88610 }, { "epoch": 0.4425578666133986, "grad_norm": 0.09124266356229782, "learning_rate": 1.67651255350572e-05, "loss": 9.0244, "step": 88620 }, { "epoch": 0.44260780543833805, "grad_norm": 0.09176667034626007, "learning_rate": 1.6763623620115646e-05, "loss": 9.0165, "step": 88630 }, { "epoch": 0.4426577442632775, "grad_norm": 0.10504433512687683, "learning_rate": 1.67621217051741e-05, "loss": 9.0253, "step": 88640 }, { "epoch": 0.44270768308821695, "grad_norm": 0.09985138475894928, "learning_rate": 1.6760619790232546e-05, "loss": 9.0125, "step": 88650 }, { "epoch": 0.4427576219131564, "grad_norm": 0.0883818119764328, "learning_rate": 1.6759117875290996e-05, "loss": 9.0167, "step": 88660 }, { "epoch": 0.44280756073809585, "grad_norm": 0.08730896562337875, "learning_rate": 1.6757615960349447e-05, "loss": 9.033, "step": 88670 }, { "epoch": 0.4428574995630353, "grad_norm": 0.08953774720430374, "learning_rate": 1.6756114045407893e-05, "loss": 9.0397, "step": 88680 }, { "epoch": 0.44290743838797475, "grad_norm": 0.09112643450498581, "learning_rate": 1.6754612130466347e-05, "loss": 9.0137, "step": 88690 }, { "epoch": 0.44295737721291417, "grad_norm": 0.09895043820142746, "learning_rate": 1.6753110215524794e-05, "loss": 9.0121, "step": 88700 }, { "epoch": 0.44300731603785365, "grad_norm": 0.09690089523792267, "learning_rate": 1.6751608300583244e-05, "loss": 9.0322, "step": 88710 }, { "epoch": 0.44305725486279307, "grad_norm": 0.09116695076227188, "learning_rate": 1.6750106385641694e-05, "loss": 9.0266, "step": 88720 }, { "epoch": 0.44310719368773255, "grad_norm": 0.09873844683170319, "learning_rate": 1.674860447070014e-05, "loss": 9.0144, "step": 88730 }, { "epoch": 0.44315713251267197, "grad_norm": 0.08854333311319351, "learning_rate": 1.6747102555758594e-05, "loss": 9.0264, "step": 88740 }, { "epoch": 0.44320707133761145, "grad_norm": 0.09332463145256042, "learning_rate": 1.674560064081704e-05, "loss": 9.0221, "step": 88750 }, { "epoch": 0.44325701016255087, "grad_norm": 0.0980052724480629, "learning_rate": 1.674409872587549e-05, "loss": 9.0311, "step": 88760 }, { "epoch": 0.44330694898749035, "grad_norm": 0.09149298816919327, "learning_rate": 1.674259681093394e-05, "loss": 9.0343, "step": 88770 }, { "epoch": 0.44335688781242977, "grad_norm": 0.0906062051653862, "learning_rate": 1.674109489599239e-05, "loss": 9.0131, "step": 88780 }, { "epoch": 0.44340682663736924, "grad_norm": 0.09233875572681427, "learning_rate": 1.6739592981050842e-05, "loss": 9.0148, "step": 88790 }, { "epoch": 0.44345676546230867, "grad_norm": 0.09254756569862366, "learning_rate": 1.673809106610929e-05, "loss": 9.0121, "step": 88800 }, { "epoch": 0.44350670428724814, "grad_norm": 0.0948517769575119, "learning_rate": 1.673658915116774e-05, "loss": 9.0205, "step": 88810 }, { "epoch": 0.44355664311218757, "grad_norm": 0.0976976752281189, "learning_rate": 1.673508723622619e-05, "loss": 9.0087, "step": 88820 }, { "epoch": 0.44360658193712704, "grad_norm": 0.0981476828455925, "learning_rate": 1.6733585321284636e-05, "loss": 9.0068, "step": 88830 }, { "epoch": 0.44365652076206646, "grad_norm": 0.09623267501592636, "learning_rate": 1.673208340634309e-05, "loss": 9.0195, "step": 88840 }, { "epoch": 0.44370645958700594, "grad_norm": 0.09452231973409653, "learning_rate": 1.6730581491401536e-05, "loss": 9.016, "step": 88850 }, { "epoch": 0.44375639841194536, "grad_norm": 0.09390220791101456, "learning_rate": 1.6729079576459986e-05, "loss": 9.0134, "step": 88860 }, { "epoch": 0.44380633723688484, "grad_norm": 0.09678220003843307, "learning_rate": 1.6727577661518437e-05, "loss": 9.012, "step": 88870 }, { "epoch": 0.44385627606182426, "grad_norm": 0.09419732540845871, "learning_rate": 1.6726075746576883e-05, "loss": 9.0304, "step": 88880 }, { "epoch": 0.44390621488676374, "grad_norm": 0.08998965471982956, "learning_rate": 1.6724573831635337e-05, "loss": 9.0247, "step": 88890 }, { "epoch": 0.44395615371170316, "grad_norm": 0.09407693147659302, "learning_rate": 1.6723071916693784e-05, "loss": 9.0233, "step": 88900 }, { "epoch": 0.44400609253664264, "grad_norm": 0.094719298183918, "learning_rate": 1.6721570001752234e-05, "loss": 9.0212, "step": 88910 }, { "epoch": 0.44405603136158206, "grad_norm": 0.09490375965833664, "learning_rate": 1.6720068086810684e-05, "loss": 9.0231, "step": 88920 }, { "epoch": 0.44410597018652154, "grad_norm": 0.09172754734754562, "learning_rate": 1.6718566171869134e-05, "loss": 9.0039, "step": 88930 }, { "epoch": 0.44415590901146096, "grad_norm": 0.09175007045269012, "learning_rate": 1.6717064256927584e-05, "loss": 9.0265, "step": 88940 }, { "epoch": 0.44420584783640044, "grad_norm": 0.0903623029589653, "learning_rate": 1.671556234198603e-05, "loss": 9.0105, "step": 88950 }, { "epoch": 0.44425578666133986, "grad_norm": 0.09337350726127625, "learning_rate": 1.671406042704448e-05, "loss": 9.0146, "step": 88960 }, { "epoch": 0.44430572548627933, "grad_norm": 0.0933094471693039, "learning_rate": 1.671255851210293e-05, "loss": 9.0125, "step": 88970 }, { "epoch": 0.44435566431121876, "grad_norm": 0.0940171629190445, "learning_rate": 1.6711056597161382e-05, "loss": 9.0088, "step": 88980 }, { "epoch": 0.44440560313615823, "grad_norm": 0.0906391590833664, "learning_rate": 1.6709554682219832e-05, "loss": 9.0209, "step": 88990 }, { "epoch": 0.44445554196109766, "grad_norm": 0.09381049126386642, "learning_rate": 1.670805276727828e-05, "loss": 9.0177, "step": 89000 }, { "epoch": 0.4445054807860371, "grad_norm": 0.09514244645833969, "learning_rate": 1.670655085233673e-05, "loss": 9.0153, "step": 89010 }, { "epoch": 0.44455541961097655, "grad_norm": 0.09447330236434937, "learning_rate": 1.670504893739518e-05, "loss": 9.0064, "step": 89020 }, { "epoch": 0.444605358435916, "grad_norm": 0.09046784043312073, "learning_rate": 1.670354702245363e-05, "loss": 9.011, "step": 89030 }, { "epoch": 0.44465529726085545, "grad_norm": 0.09899982064962387, "learning_rate": 1.670204510751208e-05, "loss": 9.0164, "step": 89040 }, { "epoch": 0.4447052360857949, "grad_norm": 0.0965995118021965, "learning_rate": 1.6700543192570526e-05, "loss": 9.0145, "step": 89050 }, { "epoch": 0.44475517491073435, "grad_norm": 0.09488395601511002, "learning_rate": 1.6699041277628976e-05, "loss": 9.0108, "step": 89060 }, { "epoch": 0.4448051137356738, "grad_norm": 0.09230642020702362, "learning_rate": 1.6697539362687427e-05, "loss": 9.0086, "step": 89070 }, { "epoch": 0.44485505256061325, "grad_norm": 0.10005513578653336, "learning_rate": 1.6696037447745877e-05, "loss": 9.0129, "step": 89080 }, { "epoch": 0.4449049913855527, "grad_norm": 0.09370043128728867, "learning_rate": 1.6694535532804327e-05, "loss": 9.0219, "step": 89090 }, { "epoch": 0.44495493021049215, "grad_norm": 0.09334039688110352, "learning_rate": 1.6693033617862774e-05, "loss": 9.0091, "step": 89100 }, { "epoch": 0.44500486903543157, "grad_norm": 0.09181877970695496, "learning_rate": 1.6691531702921224e-05, "loss": 9.0125, "step": 89110 }, { "epoch": 0.44505480786037105, "grad_norm": 0.09253057092428207, "learning_rate": 1.6690029787979674e-05, "loss": 9.0284, "step": 89120 }, { "epoch": 0.44510474668531047, "grad_norm": 0.08906669169664383, "learning_rate": 1.6688527873038124e-05, "loss": 9.0251, "step": 89130 }, { "epoch": 0.44515468551024995, "grad_norm": 0.09391651302576065, "learning_rate": 1.6687025958096574e-05, "loss": 9.0138, "step": 89140 }, { "epoch": 0.44520462433518937, "grad_norm": 0.09748826175928116, "learning_rate": 1.668552404315502e-05, "loss": 9.0223, "step": 89150 }, { "epoch": 0.44525456316012885, "grad_norm": 0.09539515525102615, "learning_rate": 1.668402212821347e-05, "loss": 8.9965, "step": 89160 }, { "epoch": 0.44530450198506827, "grad_norm": 0.0892728641629219, "learning_rate": 1.668252021327192e-05, "loss": 9.0155, "step": 89170 }, { "epoch": 0.44535444081000775, "grad_norm": 0.09498701989650726, "learning_rate": 1.6681018298330372e-05, "loss": 9.0113, "step": 89180 }, { "epoch": 0.44540437963494717, "grad_norm": 0.09873063117265701, "learning_rate": 1.6679516383388822e-05, "loss": 9.0206, "step": 89190 }, { "epoch": 0.44545431845988664, "grad_norm": 0.093873530626297, "learning_rate": 1.667801446844727e-05, "loss": 9.0141, "step": 89200 }, { "epoch": 0.44550425728482607, "grad_norm": 0.09323079884052277, "learning_rate": 1.667651255350572e-05, "loss": 9.0133, "step": 89210 }, { "epoch": 0.44555419610976554, "grad_norm": 0.09667900949716568, "learning_rate": 1.667501063856417e-05, "loss": 9.0206, "step": 89220 }, { "epoch": 0.44560413493470497, "grad_norm": 0.1016487404704094, "learning_rate": 1.667350872362262e-05, "loss": 9.036, "step": 89230 }, { "epoch": 0.44565407375964444, "grad_norm": 0.09627226740121841, "learning_rate": 1.667200680868107e-05, "loss": 9.0168, "step": 89240 }, { "epoch": 0.44570401258458386, "grad_norm": 0.09360161423683167, "learning_rate": 1.667050489373952e-05, "loss": 9.0101, "step": 89250 }, { "epoch": 0.44575395140952334, "grad_norm": 0.08747625350952148, "learning_rate": 1.6669002978797966e-05, "loss": 9.0054, "step": 89260 }, { "epoch": 0.44580389023446276, "grad_norm": 0.09256748855113983, "learning_rate": 1.6667501063856417e-05, "loss": 9.0228, "step": 89270 }, { "epoch": 0.44585382905940224, "grad_norm": 0.09370601922273636, "learning_rate": 1.6665999148914867e-05, "loss": 9.0211, "step": 89280 }, { "epoch": 0.44590376788434166, "grad_norm": 0.09242367744445801, "learning_rate": 1.6664497233973317e-05, "loss": 9.0083, "step": 89290 }, { "epoch": 0.44595370670928114, "grad_norm": 0.0926143079996109, "learning_rate": 1.6662995319031767e-05, "loss": 9.0202, "step": 89300 }, { "epoch": 0.44600364553422056, "grad_norm": 0.0988818109035492, "learning_rate": 1.6661493404090214e-05, "loss": 9.0048, "step": 89310 }, { "epoch": 0.44605358435916004, "grad_norm": 0.09795771539211273, "learning_rate": 1.6659991489148664e-05, "loss": 9.0216, "step": 89320 }, { "epoch": 0.44610352318409946, "grad_norm": 0.09289045631885529, "learning_rate": 1.6658489574207114e-05, "loss": 9.0187, "step": 89330 }, { "epoch": 0.44615346200903894, "grad_norm": 0.09757115691900253, "learning_rate": 1.6656987659265564e-05, "loss": 9.0029, "step": 89340 }, { "epoch": 0.44620340083397836, "grad_norm": 0.09858351945877075, "learning_rate": 1.6655485744324015e-05, "loss": 9.0203, "step": 89350 }, { "epoch": 0.44625333965891784, "grad_norm": 0.09712626785039902, "learning_rate": 1.665398382938246e-05, "loss": 9.0095, "step": 89360 }, { "epoch": 0.44630327848385726, "grad_norm": 0.09400411695241928, "learning_rate": 1.665248191444091e-05, "loss": 9.007, "step": 89370 }, { "epoch": 0.44635321730879673, "grad_norm": 0.0991569310426712, "learning_rate": 1.6650979999499362e-05, "loss": 9.0118, "step": 89380 }, { "epoch": 0.44640315613373616, "grad_norm": 0.09559716284275055, "learning_rate": 1.6649478084557812e-05, "loss": 8.9983, "step": 89390 }, { "epoch": 0.44645309495867563, "grad_norm": 0.09568917006254196, "learning_rate": 1.6647976169616262e-05, "loss": 9.0076, "step": 89400 }, { "epoch": 0.44650303378361506, "grad_norm": 0.09183740615844727, "learning_rate": 1.664647425467471e-05, "loss": 9.0106, "step": 89410 }, { "epoch": 0.44655297260855453, "grad_norm": 0.0893789678812027, "learning_rate": 1.664497233973316e-05, "loss": 9.0001, "step": 89420 }, { "epoch": 0.44660291143349395, "grad_norm": 0.09366993606090546, "learning_rate": 1.664347042479161e-05, "loss": 9.0137, "step": 89430 }, { "epoch": 0.44665285025843343, "grad_norm": 0.09987689554691315, "learning_rate": 1.664196850985006e-05, "loss": 9.0049, "step": 89440 }, { "epoch": 0.44670278908337285, "grad_norm": 0.09278554469347, "learning_rate": 1.664046659490851e-05, "loss": 9.0149, "step": 89450 }, { "epoch": 0.44675272790831233, "grad_norm": 0.08935824036598206, "learning_rate": 1.6638964679966956e-05, "loss": 9.0228, "step": 89460 }, { "epoch": 0.44680266673325175, "grad_norm": 0.08843576163053513, "learning_rate": 1.6637462765025407e-05, "loss": 9.0136, "step": 89470 }, { "epoch": 0.44685260555819123, "grad_norm": 0.09229853004217148, "learning_rate": 1.6635960850083857e-05, "loss": 9.0065, "step": 89480 }, { "epoch": 0.44690254438313065, "grad_norm": 0.09317019581794739, "learning_rate": 1.6634458935142307e-05, "loss": 9.0074, "step": 89490 }, { "epoch": 0.44695248320807013, "grad_norm": 0.09514672309160233, "learning_rate": 1.6632957020200757e-05, "loss": 9.0009, "step": 89500 }, { "epoch": 0.44700242203300955, "grad_norm": 0.09481526166200638, "learning_rate": 1.6631455105259207e-05, "loss": 9.0216, "step": 89510 }, { "epoch": 0.447052360857949, "grad_norm": 0.09262920171022415, "learning_rate": 1.6629953190317654e-05, "loss": 9.0146, "step": 89520 }, { "epoch": 0.44710229968288845, "grad_norm": 0.09552852809429169, "learning_rate": 1.6628451275376104e-05, "loss": 9.0086, "step": 89530 }, { "epoch": 0.4471522385078279, "grad_norm": 0.09410271048545837, "learning_rate": 1.6626949360434554e-05, "loss": 9.0166, "step": 89540 }, { "epoch": 0.44720217733276735, "grad_norm": 0.09333918243646622, "learning_rate": 1.6625447445493005e-05, "loss": 9.0171, "step": 89550 }, { "epoch": 0.4472521161577068, "grad_norm": 0.09849578142166138, "learning_rate": 1.6623945530551455e-05, "loss": 9.0056, "step": 89560 }, { "epoch": 0.44730205498264625, "grad_norm": 0.09501996636390686, "learning_rate": 1.6622443615609905e-05, "loss": 9.0066, "step": 89570 }, { "epoch": 0.4473519938075857, "grad_norm": 0.09807047247886658, "learning_rate": 1.6620941700668352e-05, "loss": 9.0202, "step": 89580 }, { "epoch": 0.44740193263252515, "grad_norm": 0.09879709780216217, "learning_rate": 1.6619439785726802e-05, "loss": 9.0203, "step": 89590 }, { "epoch": 0.4474518714574646, "grad_norm": 0.08965755254030228, "learning_rate": 1.6617937870785252e-05, "loss": 9.0089, "step": 89600 }, { "epoch": 0.44750181028240404, "grad_norm": 0.09854756295681, "learning_rate": 1.6616435955843702e-05, "loss": 9.0021, "step": 89610 }, { "epoch": 0.4475517491073435, "grad_norm": 0.09752686321735382, "learning_rate": 1.6614934040902153e-05, "loss": 9.0026, "step": 89620 }, { "epoch": 0.44760168793228294, "grad_norm": 0.09632866829633713, "learning_rate": 1.66134321259606e-05, "loss": 9.0084, "step": 89630 }, { "epoch": 0.4476516267572224, "grad_norm": 0.09156852960586548, "learning_rate": 1.661193021101905e-05, "loss": 9.0133, "step": 89640 }, { "epoch": 0.44770156558216184, "grad_norm": 0.08667552471160889, "learning_rate": 1.66104282960775e-05, "loss": 9.0044, "step": 89650 }, { "epoch": 0.4477515044071013, "grad_norm": 0.09284383803606033, "learning_rate": 1.660892638113595e-05, "loss": 9.0035, "step": 89660 }, { "epoch": 0.44780144323204074, "grad_norm": 0.09126190096139908, "learning_rate": 1.66074244661944e-05, "loss": 9.014, "step": 89670 }, { "epoch": 0.4478513820569802, "grad_norm": 0.10024043917655945, "learning_rate": 1.6605922551252847e-05, "loss": 8.9948, "step": 89680 }, { "epoch": 0.44790132088191964, "grad_norm": 0.09772263467311859, "learning_rate": 1.6604420636311297e-05, "loss": 9.0078, "step": 89690 }, { "epoch": 0.4479512597068591, "grad_norm": 0.09419326484203339, "learning_rate": 1.6602918721369747e-05, "loss": 9.0066, "step": 89700 }, { "epoch": 0.44800119853179854, "grad_norm": 0.08692155033349991, "learning_rate": 1.6601416806428197e-05, "loss": 9.0163, "step": 89710 }, { "epoch": 0.448051137356738, "grad_norm": 0.09567783027887344, "learning_rate": 1.6599914891486648e-05, "loss": 9.0016, "step": 89720 }, { "epoch": 0.44810107618167744, "grad_norm": 0.0902228057384491, "learning_rate": 1.6598412976545094e-05, "loss": 9.0098, "step": 89730 }, { "epoch": 0.4481510150066169, "grad_norm": 0.09784096479415894, "learning_rate": 1.6596911061603545e-05, "loss": 9.0028, "step": 89740 }, { "epoch": 0.44820095383155634, "grad_norm": 0.09646725654602051, "learning_rate": 1.6595409146661995e-05, "loss": 8.9891, "step": 89750 }, { "epoch": 0.4482508926564958, "grad_norm": 0.09333818405866623, "learning_rate": 1.6593907231720445e-05, "loss": 8.9968, "step": 89760 }, { "epoch": 0.44830083148143524, "grad_norm": 0.09000226110219955, "learning_rate": 1.6592405316778895e-05, "loss": 9.0099, "step": 89770 }, { "epoch": 0.4483507703063747, "grad_norm": 0.09583406150341034, "learning_rate": 1.6590903401837342e-05, "loss": 9.0108, "step": 89780 }, { "epoch": 0.44840070913131413, "grad_norm": 0.09547807276248932, "learning_rate": 1.6589401486895792e-05, "loss": 9.0058, "step": 89790 }, { "epoch": 0.4484506479562536, "grad_norm": 0.09241116046905518, "learning_rate": 1.6587899571954242e-05, "loss": 8.9989, "step": 89800 }, { "epoch": 0.44850058678119303, "grad_norm": 0.09252715855836868, "learning_rate": 1.6586397657012692e-05, "loss": 9.0156, "step": 89810 }, { "epoch": 0.4485505256061325, "grad_norm": 0.09530036151409149, "learning_rate": 1.6584895742071143e-05, "loss": 9.0116, "step": 89820 }, { "epoch": 0.44860046443107193, "grad_norm": 0.08951985090970993, "learning_rate": 1.658339382712959e-05, "loss": 9.0116, "step": 89830 }, { "epoch": 0.4486504032560114, "grad_norm": 0.09694526344537735, "learning_rate": 1.658189191218804e-05, "loss": 8.9973, "step": 89840 }, { "epoch": 0.44870034208095083, "grad_norm": 0.09165141731500626, "learning_rate": 1.658038999724649e-05, "loss": 9.0058, "step": 89850 }, { "epoch": 0.4487502809058903, "grad_norm": 0.09192651510238647, "learning_rate": 1.657888808230494e-05, "loss": 9.0044, "step": 89860 }, { "epoch": 0.44880021973082973, "grad_norm": 0.08684073388576508, "learning_rate": 1.657738616736339e-05, "loss": 9.0147, "step": 89870 }, { "epoch": 0.4488501585557692, "grad_norm": 0.09026368707418442, "learning_rate": 1.6575884252421837e-05, "loss": 8.9914, "step": 89880 }, { "epoch": 0.44890009738070863, "grad_norm": 0.09688662737607956, "learning_rate": 1.657438233748029e-05, "loss": 9.0052, "step": 89890 }, { "epoch": 0.4489500362056481, "grad_norm": 0.09581411629915237, "learning_rate": 1.6572880422538737e-05, "loss": 8.9905, "step": 89900 }, { "epoch": 0.44899997503058753, "grad_norm": 0.1003839373588562, "learning_rate": 1.6571378507597187e-05, "loss": 9.0148, "step": 89910 }, { "epoch": 0.449049913855527, "grad_norm": 0.09192468225955963, "learning_rate": 1.6569876592655638e-05, "loss": 9.0248, "step": 89920 }, { "epoch": 0.4490998526804664, "grad_norm": 0.09126332402229309, "learning_rate": 1.6568374677714084e-05, "loss": 9.0122, "step": 89930 }, { "epoch": 0.4491497915054059, "grad_norm": 0.0944957584142685, "learning_rate": 1.6566872762772538e-05, "loss": 9.0082, "step": 89940 }, { "epoch": 0.4491997303303453, "grad_norm": 0.0930357500910759, "learning_rate": 1.6565370847830985e-05, "loss": 8.9977, "step": 89950 }, { "epoch": 0.4492496691552848, "grad_norm": 0.09657001495361328, "learning_rate": 1.6563868932889435e-05, "loss": 9.006, "step": 89960 }, { "epoch": 0.4492996079802242, "grad_norm": 0.09886710345745087, "learning_rate": 1.6562367017947885e-05, "loss": 9.0056, "step": 89970 }, { "epoch": 0.4493495468051637, "grad_norm": 0.0890527293086052, "learning_rate": 1.6560865103006332e-05, "loss": 9.0023, "step": 89980 }, { "epoch": 0.4493994856301031, "grad_norm": 0.0896250382065773, "learning_rate": 1.6559363188064785e-05, "loss": 9.0064, "step": 89990 }, { "epoch": 0.44944942445504255, "grad_norm": 0.09379280358552933, "learning_rate": 1.6557861273123232e-05, "loss": 9.0103, "step": 90000 }, { "epoch": 0.449499363279982, "grad_norm": 0.09411850571632385, "learning_rate": 1.6556359358181682e-05, "loss": 9.0098, "step": 90010 }, { "epoch": 0.44954930210492144, "grad_norm": 0.08976029604673386, "learning_rate": 1.6554857443240133e-05, "loss": 9.0026, "step": 90020 }, { "epoch": 0.4495992409298609, "grad_norm": 0.0921340063214302, "learning_rate": 1.655335552829858e-05, "loss": 9.0126, "step": 90030 }, { "epoch": 0.44964917975480034, "grad_norm": 0.09887304157018661, "learning_rate": 1.6551853613357033e-05, "loss": 9.0057, "step": 90040 }, { "epoch": 0.4496991185797398, "grad_norm": 0.09693904966115952, "learning_rate": 1.655035169841548e-05, "loss": 8.9959, "step": 90050 }, { "epoch": 0.44974905740467924, "grad_norm": 0.09151757508516312, "learning_rate": 1.654884978347393e-05, "loss": 9.0101, "step": 90060 }, { "epoch": 0.4497989962296187, "grad_norm": 0.1048135980963707, "learning_rate": 1.654734786853238e-05, "loss": 8.9994, "step": 90070 }, { "epoch": 0.44984893505455814, "grad_norm": 0.09414626657962799, "learning_rate": 1.6545845953590827e-05, "loss": 9.0041, "step": 90080 }, { "epoch": 0.4498988738794976, "grad_norm": 0.0916576012969017, "learning_rate": 1.654434403864928e-05, "loss": 8.9947, "step": 90090 }, { "epoch": 0.44994881270443704, "grad_norm": 0.09090348333120346, "learning_rate": 1.6542842123707727e-05, "loss": 8.9906, "step": 90100 }, { "epoch": 0.4499987515293765, "grad_norm": 0.0916331559419632, "learning_rate": 1.6541340208766177e-05, "loss": 9.0048, "step": 90110 }, { "epoch": 0.45004869035431594, "grad_norm": 0.09484575688838959, "learning_rate": 1.6539838293824628e-05, "loss": 9.0064, "step": 90120 }, { "epoch": 0.4500986291792554, "grad_norm": 0.09016244858503342, "learning_rate": 1.6538336378883074e-05, "loss": 9.0041, "step": 90130 }, { "epoch": 0.45014856800419484, "grad_norm": 0.09121375530958176, "learning_rate": 1.6536834463941528e-05, "loss": 9.0068, "step": 90140 }, { "epoch": 0.4501985068291343, "grad_norm": 0.09483055770397186, "learning_rate": 1.6535332548999975e-05, "loss": 9.0051, "step": 90150 }, { "epoch": 0.45024844565407374, "grad_norm": 0.09125950932502747, "learning_rate": 1.6533830634058425e-05, "loss": 8.9956, "step": 90160 }, { "epoch": 0.4502983844790132, "grad_norm": 0.09234707057476044, "learning_rate": 1.6532328719116875e-05, "loss": 9.0004, "step": 90170 }, { "epoch": 0.45034832330395264, "grad_norm": 0.09413158893585205, "learning_rate": 1.6530826804175322e-05, "loss": 8.9987, "step": 90180 }, { "epoch": 0.4503982621288921, "grad_norm": 0.09710296243429184, "learning_rate": 1.6529324889233775e-05, "loss": 8.9944, "step": 90190 }, { "epoch": 0.45044820095383153, "grad_norm": 0.0949583351612091, "learning_rate": 1.6527822974292222e-05, "loss": 9.003, "step": 90200 }, { "epoch": 0.450498139778771, "grad_norm": 0.09892937541007996, "learning_rate": 1.6526321059350676e-05, "loss": 8.9983, "step": 90210 }, { "epoch": 0.45054807860371043, "grad_norm": 0.09644380211830139, "learning_rate": 1.6524819144409123e-05, "loss": 8.9919, "step": 90220 }, { "epoch": 0.4505980174286499, "grad_norm": 0.09606988728046417, "learning_rate": 1.652331722946757e-05, "loss": 9.0063, "step": 90230 }, { "epoch": 0.45064795625358933, "grad_norm": 0.09773648530244827, "learning_rate": 1.6521815314526023e-05, "loss": 8.9835, "step": 90240 }, { "epoch": 0.4506978950785288, "grad_norm": 0.09979324787855148, "learning_rate": 1.652031339958447e-05, "loss": 8.9963, "step": 90250 }, { "epoch": 0.45074783390346823, "grad_norm": 0.09351041167974472, "learning_rate": 1.6518811484642923e-05, "loss": 9.0103, "step": 90260 }, { "epoch": 0.4507977727284077, "grad_norm": 0.09703806042671204, "learning_rate": 1.651730956970137e-05, "loss": 8.9954, "step": 90270 }, { "epoch": 0.45084771155334713, "grad_norm": 0.09339191764593124, "learning_rate": 1.6515807654759817e-05, "loss": 9.0095, "step": 90280 }, { "epoch": 0.4508976503782866, "grad_norm": 0.09220651537179947, "learning_rate": 1.651430573981827e-05, "loss": 8.9959, "step": 90290 }, { "epoch": 0.45094758920322603, "grad_norm": 0.09635385125875473, "learning_rate": 1.6512803824876717e-05, "loss": 8.9845, "step": 90300 }, { "epoch": 0.4509975280281655, "grad_norm": 0.08985468745231628, "learning_rate": 1.651130190993517e-05, "loss": 9.0037, "step": 90310 }, { "epoch": 0.45104746685310493, "grad_norm": 0.0972968265414238, "learning_rate": 1.6509799994993618e-05, "loss": 8.9896, "step": 90320 }, { "epoch": 0.4510974056780444, "grad_norm": 0.09027385711669922, "learning_rate": 1.6508298080052064e-05, "loss": 9.0151, "step": 90330 }, { "epoch": 0.4511473445029838, "grad_norm": 0.09425677359104156, "learning_rate": 1.6506796165110518e-05, "loss": 9.0061, "step": 90340 }, { "epoch": 0.4511972833279233, "grad_norm": 0.09738679975271225, "learning_rate": 1.6505294250168965e-05, "loss": 8.9966, "step": 90350 }, { "epoch": 0.4512472221528627, "grad_norm": 0.08872897177934647, "learning_rate": 1.6503792335227418e-05, "loss": 9.0142, "step": 90360 }, { "epoch": 0.4512971609778022, "grad_norm": 0.09494879096746445, "learning_rate": 1.6502290420285865e-05, "loss": 9.0005, "step": 90370 }, { "epoch": 0.4513470998027416, "grad_norm": 0.09621385484933853, "learning_rate": 1.6500788505344312e-05, "loss": 9.0012, "step": 90380 }, { "epoch": 0.4513970386276811, "grad_norm": 0.09302900731563568, "learning_rate": 1.6499286590402765e-05, "loss": 9.0184, "step": 90390 }, { "epoch": 0.4514469774526205, "grad_norm": 0.09508734941482544, "learning_rate": 1.6497784675461212e-05, "loss": 9.007, "step": 90400 }, { "epoch": 0.45149691627756, "grad_norm": 0.09385523945093155, "learning_rate": 1.6496282760519666e-05, "loss": 9.006, "step": 90410 }, { "epoch": 0.4515468551024994, "grad_norm": 0.09022466838359833, "learning_rate": 1.6494780845578113e-05, "loss": 9.0222, "step": 90420 }, { "epoch": 0.4515967939274389, "grad_norm": 0.09833023697137833, "learning_rate": 1.649327893063656e-05, "loss": 8.985, "step": 90430 }, { "epoch": 0.4516467327523783, "grad_norm": 0.09564008563756943, "learning_rate": 1.6491777015695013e-05, "loss": 9.0073, "step": 90440 }, { "epoch": 0.4516966715773178, "grad_norm": 0.0922297015786171, "learning_rate": 1.649027510075346e-05, "loss": 8.9923, "step": 90450 }, { "epoch": 0.4517466104022572, "grad_norm": 0.09411637485027313, "learning_rate": 1.6488773185811913e-05, "loss": 9.0043, "step": 90460 }, { "epoch": 0.4517965492271967, "grad_norm": 0.09667911380529404, "learning_rate": 1.648727127087036e-05, "loss": 8.9967, "step": 90470 }, { "epoch": 0.4518464880521361, "grad_norm": 0.09421195834875107, "learning_rate": 1.6485769355928807e-05, "loss": 9.0058, "step": 90480 }, { "epoch": 0.4518964268770756, "grad_norm": 0.09238166362047195, "learning_rate": 1.648426744098726e-05, "loss": 9.0087, "step": 90490 }, { "epoch": 0.451946365702015, "grad_norm": 0.09417644143104553, "learning_rate": 1.6482765526045707e-05, "loss": 9.0012, "step": 90500 }, { "epoch": 0.4519963045269545, "grad_norm": 0.09228955209255219, "learning_rate": 1.648126361110416e-05, "loss": 9.0016, "step": 90510 }, { "epoch": 0.4520462433518939, "grad_norm": 0.09172794967889786, "learning_rate": 1.6479761696162608e-05, "loss": 8.9866, "step": 90520 }, { "epoch": 0.4520961821768334, "grad_norm": 0.10119758546352386, "learning_rate": 1.6478259781221058e-05, "loss": 9.0031, "step": 90530 }, { "epoch": 0.4521461210017728, "grad_norm": 0.10038284212350845, "learning_rate": 1.6476757866279508e-05, "loss": 9.0136, "step": 90540 }, { "epoch": 0.4521960598267123, "grad_norm": 0.09320379048585892, "learning_rate": 1.6475255951337955e-05, "loss": 8.9853, "step": 90550 }, { "epoch": 0.4522459986516517, "grad_norm": 0.08812331408262253, "learning_rate": 1.647375403639641e-05, "loss": 9.0052, "step": 90560 }, { "epoch": 0.4522959374765912, "grad_norm": 0.08946395665407181, "learning_rate": 1.6472252121454855e-05, "loss": 9.0011, "step": 90570 }, { "epoch": 0.4523458763015306, "grad_norm": 0.09697841852903366, "learning_rate": 1.6470750206513305e-05, "loss": 9.004, "step": 90580 }, { "epoch": 0.4523958151264701, "grad_norm": 0.09299259632825851, "learning_rate": 1.6469248291571755e-05, "loss": 8.9935, "step": 90590 }, { "epoch": 0.4524457539514095, "grad_norm": 0.09182324260473251, "learning_rate": 1.6467746376630202e-05, "loss": 9.0015, "step": 90600 }, { "epoch": 0.452495692776349, "grad_norm": 0.08882872015237808, "learning_rate": 1.6466244461688656e-05, "loss": 9.0002, "step": 90610 }, { "epoch": 0.4525456316012884, "grad_norm": 0.08870082348585129, "learning_rate": 1.6464742546747103e-05, "loss": 8.9936, "step": 90620 }, { "epoch": 0.4525955704262279, "grad_norm": 0.09134729206562042, "learning_rate": 1.6463240631805553e-05, "loss": 8.9908, "step": 90630 }, { "epoch": 0.4526455092511673, "grad_norm": 0.09212338924407959, "learning_rate": 1.6461738716864003e-05, "loss": 9.0031, "step": 90640 }, { "epoch": 0.4526954480761068, "grad_norm": 0.09525448828935623, "learning_rate": 1.646023680192245e-05, "loss": 8.998, "step": 90650 }, { "epoch": 0.4527453869010462, "grad_norm": 0.09147049486637115, "learning_rate": 1.6458734886980903e-05, "loss": 9.0005, "step": 90660 }, { "epoch": 0.4527953257259857, "grad_norm": 0.09832822531461716, "learning_rate": 1.645723297203935e-05, "loss": 8.9923, "step": 90670 }, { "epoch": 0.4528452645509251, "grad_norm": 0.09704606980085373, "learning_rate": 1.64557310570978e-05, "loss": 8.9927, "step": 90680 }, { "epoch": 0.4528952033758646, "grad_norm": 0.09421838819980621, "learning_rate": 1.645422914215625e-05, "loss": 9.0079, "step": 90690 }, { "epoch": 0.452945142200804, "grad_norm": 0.09166116267442703, "learning_rate": 1.6452727227214697e-05, "loss": 8.9912, "step": 90700 }, { "epoch": 0.4529950810257435, "grad_norm": 0.0929771363735199, "learning_rate": 1.645122531227315e-05, "loss": 8.9928, "step": 90710 }, { "epoch": 0.4530450198506829, "grad_norm": 0.09662994742393494, "learning_rate": 1.6449723397331598e-05, "loss": 8.997, "step": 90720 }, { "epoch": 0.4530949586756224, "grad_norm": 0.09966935217380524, "learning_rate": 1.6448221482390048e-05, "loss": 8.9901, "step": 90730 }, { "epoch": 0.4531448975005618, "grad_norm": 0.09319128841161728, "learning_rate": 1.6446719567448498e-05, "loss": 8.9974, "step": 90740 }, { "epoch": 0.4531948363255013, "grad_norm": 0.09141194820404053, "learning_rate": 1.6445217652506945e-05, "loss": 9.0124, "step": 90750 }, { "epoch": 0.4532447751504407, "grad_norm": 0.09347580373287201, "learning_rate": 1.64437157375654e-05, "loss": 8.996, "step": 90760 }, { "epoch": 0.4532947139753802, "grad_norm": 0.09879345446825027, "learning_rate": 1.6442213822623845e-05, "loss": 8.9926, "step": 90770 }, { "epoch": 0.4533446528003196, "grad_norm": 0.09470192342996597, "learning_rate": 1.6440711907682295e-05, "loss": 9.0014, "step": 90780 }, { "epoch": 0.4533945916252591, "grad_norm": 0.09187999367713928, "learning_rate": 1.6439209992740745e-05, "loss": 9.0035, "step": 90790 }, { "epoch": 0.4534445304501985, "grad_norm": 0.0892479196190834, "learning_rate": 1.6437708077799192e-05, "loss": 9.0248, "step": 90800 }, { "epoch": 0.453494469275138, "grad_norm": 0.09171134233474731, "learning_rate": 1.6436206162857646e-05, "loss": 8.9885, "step": 90810 }, { "epoch": 0.4535444081000774, "grad_norm": 0.09449323266744614, "learning_rate": 1.6434704247916093e-05, "loss": 9.0067, "step": 90820 }, { "epoch": 0.4535943469250169, "grad_norm": 0.09131663292646408, "learning_rate": 1.6433202332974543e-05, "loss": 8.9834, "step": 90830 }, { "epoch": 0.4536442857499563, "grad_norm": 0.0928645208477974, "learning_rate": 1.6431700418032993e-05, "loss": 8.9925, "step": 90840 }, { "epoch": 0.4536942245748958, "grad_norm": 0.09293372929096222, "learning_rate": 1.6430198503091443e-05, "loss": 8.998, "step": 90850 }, { "epoch": 0.4537441633998352, "grad_norm": 0.09626687318086624, "learning_rate": 1.6428696588149893e-05, "loss": 9.0024, "step": 90860 }, { "epoch": 0.4537941022247747, "grad_norm": 0.09633610397577286, "learning_rate": 1.642719467320834e-05, "loss": 8.9846, "step": 90870 }, { "epoch": 0.4538440410497141, "grad_norm": 0.09213805943727493, "learning_rate": 1.642569275826679e-05, "loss": 8.9934, "step": 90880 }, { "epoch": 0.4538939798746536, "grad_norm": 0.09418069571256638, "learning_rate": 1.642419084332524e-05, "loss": 8.987, "step": 90890 }, { "epoch": 0.453943918699593, "grad_norm": 0.09727146476507187, "learning_rate": 1.642268892838369e-05, "loss": 8.9973, "step": 90900 }, { "epoch": 0.4539938575245325, "grad_norm": 0.09132228046655655, "learning_rate": 1.642118701344214e-05, "loss": 9.0022, "step": 90910 }, { "epoch": 0.4540437963494719, "grad_norm": 0.09720085561275482, "learning_rate": 1.6419685098500588e-05, "loss": 9.0058, "step": 90920 }, { "epoch": 0.4540937351744114, "grad_norm": 0.08539389073848724, "learning_rate": 1.6418183183559038e-05, "loss": 9.0059, "step": 90930 }, { "epoch": 0.4541436739993508, "grad_norm": 0.09037039428949356, "learning_rate": 1.6416681268617488e-05, "loss": 8.9984, "step": 90940 }, { "epoch": 0.45419361282429027, "grad_norm": 0.09338293969631195, "learning_rate": 1.6415179353675938e-05, "loss": 8.998, "step": 90950 }, { "epoch": 0.4542435516492297, "grad_norm": 0.09810806810855865, "learning_rate": 1.641367743873439e-05, "loss": 8.9885, "step": 90960 }, { "epoch": 0.45429349047416917, "grad_norm": 0.09057030826807022, "learning_rate": 1.6412175523792835e-05, "loss": 8.9988, "step": 90970 }, { "epoch": 0.4543434292991086, "grad_norm": 0.09103839844465256, "learning_rate": 1.6410673608851285e-05, "loss": 8.9834, "step": 90980 }, { "epoch": 0.454393368124048, "grad_norm": 0.09123621881008148, "learning_rate": 1.6409171693909735e-05, "loss": 8.9904, "step": 90990 }, { "epoch": 0.4544433069489875, "grad_norm": 0.09989268332719803, "learning_rate": 1.6407669778968186e-05, "loss": 8.9973, "step": 91000 }, { "epoch": 0.4544932457739269, "grad_norm": 0.09387070685625076, "learning_rate": 1.6406167864026636e-05, "loss": 8.9821, "step": 91010 }, { "epoch": 0.4545431845988664, "grad_norm": 0.09730041772127151, "learning_rate": 1.6404665949085083e-05, "loss": 8.9905, "step": 91020 }, { "epoch": 0.4545931234238058, "grad_norm": 0.09198877215385437, "learning_rate": 1.6403164034143533e-05, "loss": 8.9827, "step": 91030 }, { "epoch": 0.4546430622487453, "grad_norm": 0.10066971182823181, "learning_rate": 1.6401662119201983e-05, "loss": 8.9825, "step": 91040 }, { "epoch": 0.4546930010736847, "grad_norm": 0.09631640464067459, "learning_rate": 1.6400160204260433e-05, "loss": 8.9979, "step": 91050 }, { "epoch": 0.4547429398986242, "grad_norm": 0.09197012335062027, "learning_rate": 1.6398658289318883e-05, "loss": 8.9957, "step": 91060 }, { "epoch": 0.4547928787235636, "grad_norm": 0.09876050055027008, "learning_rate": 1.639715637437733e-05, "loss": 8.9956, "step": 91070 }, { "epoch": 0.4548428175485031, "grad_norm": 0.09191004186868668, "learning_rate": 1.639565445943578e-05, "loss": 8.9973, "step": 91080 }, { "epoch": 0.4548927563734425, "grad_norm": 0.09481745958328247, "learning_rate": 1.639415254449423e-05, "loss": 9.0117, "step": 91090 }, { "epoch": 0.454942695198382, "grad_norm": 0.09424830228090286, "learning_rate": 1.639265062955268e-05, "loss": 9.0045, "step": 91100 }, { "epoch": 0.4549926340233214, "grad_norm": 0.09049578011035919, "learning_rate": 1.639114871461113e-05, "loss": 9.0088, "step": 91110 }, { "epoch": 0.4550425728482609, "grad_norm": 0.09442015737295151, "learning_rate": 1.6389646799669578e-05, "loss": 8.9877, "step": 91120 }, { "epoch": 0.4550925116732003, "grad_norm": 0.09153556078672409, "learning_rate": 1.6388144884728028e-05, "loss": 8.9957, "step": 91130 }, { "epoch": 0.4551424504981398, "grad_norm": 0.08938729763031006, "learning_rate": 1.6386642969786478e-05, "loss": 8.9926, "step": 91140 }, { "epoch": 0.4551923893230792, "grad_norm": 0.10056551545858383, "learning_rate": 1.6385141054844928e-05, "loss": 8.9848, "step": 91150 }, { "epoch": 0.4552423281480187, "grad_norm": 0.09079209715127945, "learning_rate": 1.638363913990338e-05, "loss": 8.9895, "step": 91160 }, { "epoch": 0.4552922669729581, "grad_norm": 0.08931328356266022, "learning_rate": 1.6382137224961825e-05, "loss": 8.9885, "step": 91170 }, { "epoch": 0.4553422057978976, "grad_norm": 0.09365954250097275, "learning_rate": 1.6380635310020275e-05, "loss": 8.9991, "step": 91180 }, { "epoch": 0.455392144622837, "grad_norm": 0.0947716236114502, "learning_rate": 1.6379133395078725e-05, "loss": 8.993, "step": 91190 }, { "epoch": 0.4554420834477765, "grad_norm": 0.09569630026817322, "learning_rate": 1.6377631480137176e-05, "loss": 8.9796, "step": 91200 }, { "epoch": 0.4554920222727159, "grad_norm": 0.09704722464084625, "learning_rate": 1.6376129565195626e-05, "loss": 8.9829, "step": 91210 }, { "epoch": 0.4555419610976554, "grad_norm": 0.0929335281252861, "learning_rate": 1.6374627650254076e-05, "loss": 9.0032, "step": 91220 }, { "epoch": 0.4555918999225948, "grad_norm": 0.09553791582584381, "learning_rate": 1.6373125735312523e-05, "loss": 8.9954, "step": 91230 }, { "epoch": 0.4556418387475343, "grad_norm": 0.09323886781930923, "learning_rate": 1.6371623820370973e-05, "loss": 9.0015, "step": 91240 }, { "epoch": 0.4556917775724737, "grad_norm": 0.09437048435211182, "learning_rate": 1.6370121905429423e-05, "loss": 9.0025, "step": 91250 }, { "epoch": 0.4557417163974132, "grad_norm": 0.09459199756383896, "learning_rate": 1.6368619990487873e-05, "loss": 8.9955, "step": 91260 }, { "epoch": 0.4557916552223526, "grad_norm": 0.09053412079811096, "learning_rate": 1.6367118075546324e-05, "loss": 8.9967, "step": 91270 }, { "epoch": 0.4558415940472921, "grad_norm": 0.0967337116599083, "learning_rate": 1.636561616060477e-05, "loss": 8.9871, "step": 91280 }, { "epoch": 0.4558915328722315, "grad_norm": 0.09642165154218674, "learning_rate": 1.636411424566322e-05, "loss": 9.0127, "step": 91290 }, { "epoch": 0.455941471697171, "grad_norm": 0.09764105081558228, "learning_rate": 1.636261233072167e-05, "loss": 9.0031, "step": 91300 }, { "epoch": 0.4559914105221104, "grad_norm": 0.08854164928197861, "learning_rate": 1.636111041578012e-05, "loss": 8.9874, "step": 91310 }, { "epoch": 0.4560413493470499, "grad_norm": 0.09614869207143784, "learning_rate": 1.635960850083857e-05, "loss": 8.977, "step": 91320 }, { "epoch": 0.4560912881719893, "grad_norm": 0.09570986032485962, "learning_rate": 1.6358106585897018e-05, "loss": 8.9986, "step": 91330 }, { "epoch": 0.4561412269969288, "grad_norm": 0.0936940461397171, "learning_rate": 1.6356604670955468e-05, "loss": 8.988, "step": 91340 }, { "epoch": 0.4561911658218682, "grad_norm": 0.08797448128461838, "learning_rate": 1.6355102756013918e-05, "loss": 8.998, "step": 91350 }, { "epoch": 0.45624110464680767, "grad_norm": 0.0959649458527565, "learning_rate": 1.635360084107237e-05, "loss": 8.9901, "step": 91360 }, { "epoch": 0.4562910434717471, "grad_norm": 0.08933617919683456, "learning_rate": 1.635209892613082e-05, "loss": 9.0037, "step": 91370 }, { "epoch": 0.45634098229668657, "grad_norm": 0.09029172360897064, "learning_rate": 1.6350597011189265e-05, "loss": 8.9963, "step": 91380 }, { "epoch": 0.456390921121626, "grad_norm": 0.0927506536245346, "learning_rate": 1.6349095096247716e-05, "loss": 8.9835, "step": 91390 }, { "epoch": 0.45644085994656547, "grad_norm": 0.08801010996103287, "learning_rate": 1.6347593181306166e-05, "loss": 8.9871, "step": 91400 }, { "epoch": 0.4564907987715049, "grad_norm": 0.09252002835273743, "learning_rate": 1.6346091266364616e-05, "loss": 9.0016, "step": 91410 }, { "epoch": 0.45654073759644437, "grad_norm": 0.09484586119651794, "learning_rate": 1.6344589351423066e-05, "loss": 8.9857, "step": 91420 }, { "epoch": 0.4565906764213838, "grad_norm": 0.10182981193065643, "learning_rate": 1.6343087436481513e-05, "loss": 8.9776, "step": 91430 }, { "epoch": 0.45664061524632327, "grad_norm": 0.0948307141661644, "learning_rate": 1.6341585521539963e-05, "loss": 8.9847, "step": 91440 }, { "epoch": 0.4566905540712627, "grad_norm": 0.0894957110285759, "learning_rate": 1.6340083606598413e-05, "loss": 8.9764, "step": 91450 }, { "epoch": 0.45674049289620217, "grad_norm": 0.09226840734481812, "learning_rate": 1.6338581691656863e-05, "loss": 8.9938, "step": 91460 }, { "epoch": 0.4567904317211416, "grad_norm": 0.09212039411067963, "learning_rate": 1.6337079776715314e-05, "loss": 8.9952, "step": 91470 }, { "epoch": 0.45684037054608106, "grad_norm": 0.09596432745456696, "learning_rate": 1.633557786177376e-05, "loss": 8.9927, "step": 91480 }, { "epoch": 0.4568903093710205, "grad_norm": 0.09584586322307587, "learning_rate": 1.633407594683221e-05, "loss": 8.9862, "step": 91490 }, { "epoch": 0.45694024819595996, "grad_norm": 0.09555765986442566, "learning_rate": 1.633257403189066e-05, "loss": 8.9894, "step": 91500 }, { "epoch": 0.4569901870208994, "grad_norm": 0.09596934914588928, "learning_rate": 1.633107211694911e-05, "loss": 8.9971, "step": 91510 }, { "epoch": 0.45704012584583886, "grad_norm": 0.09511646628379822, "learning_rate": 1.632957020200756e-05, "loss": 8.9951, "step": 91520 }, { "epoch": 0.4570900646707783, "grad_norm": 0.09373897314071655, "learning_rate": 1.6328068287066008e-05, "loss": 8.9797, "step": 91530 }, { "epoch": 0.45714000349571776, "grad_norm": 0.0908622294664383, "learning_rate": 1.632656637212446e-05, "loss": 9.0091, "step": 91540 }, { "epoch": 0.4571899423206572, "grad_norm": 0.09412607550621033, "learning_rate": 1.6325064457182908e-05, "loss": 8.9875, "step": 91550 }, { "epoch": 0.45723988114559666, "grad_norm": 0.09757382422685623, "learning_rate": 1.632356254224136e-05, "loss": 8.9888, "step": 91560 }, { "epoch": 0.4572898199705361, "grad_norm": 0.09289320558309555, "learning_rate": 1.632206062729981e-05, "loss": 8.9767, "step": 91570 }, { "epoch": 0.45733975879547556, "grad_norm": 0.08800926059484482, "learning_rate": 1.6320558712358255e-05, "loss": 8.9755, "step": 91580 }, { "epoch": 0.457389697620415, "grad_norm": 0.09186160564422607, "learning_rate": 1.631905679741671e-05, "loss": 8.992, "step": 91590 }, { "epoch": 0.45743963644535446, "grad_norm": 0.09121951460838318, "learning_rate": 1.6317554882475156e-05, "loss": 8.9873, "step": 91600 }, { "epoch": 0.4574895752702939, "grad_norm": 0.09226646274328232, "learning_rate": 1.6316052967533606e-05, "loss": 8.9873, "step": 91610 }, { "epoch": 0.45753951409523336, "grad_norm": 0.0960613414645195, "learning_rate": 1.6314551052592056e-05, "loss": 8.9843, "step": 91620 }, { "epoch": 0.4575894529201728, "grad_norm": 0.10511282831430435, "learning_rate": 1.6313049137650503e-05, "loss": 8.9846, "step": 91630 }, { "epoch": 0.45763939174511226, "grad_norm": 0.09277866035699844, "learning_rate": 1.6311547222708956e-05, "loss": 8.9834, "step": 91640 }, { "epoch": 0.4576893305700517, "grad_norm": 0.09160225838422775, "learning_rate": 1.6310045307767403e-05, "loss": 8.9996, "step": 91650 }, { "epoch": 0.45773926939499116, "grad_norm": 0.08975838869810104, "learning_rate": 1.6308543392825853e-05, "loss": 8.9845, "step": 91660 }, { "epoch": 0.4577892082199306, "grad_norm": 0.09508371353149414, "learning_rate": 1.6307041477884304e-05, "loss": 8.9995, "step": 91670 }, { "epoch": 0.45783914704487005, "grad_norm": 0.09741570800542831, "learning_rate": 1.630553956294275e-05, "loss": 8.9987, "step": 91680 }, { "epoch": 0.4578890858698095, "grad_norm": 0.09434978663921356, "learning_rate": 1.6304037648001204e-05, "loss": 8.9934, "step": 91690 }, { "epoch": 0.45793902469474895, "grad_norm": 0.0960199385881424, "learning_rate": 1.630253573305965e-05, "loss": 8.9918, "step": 91700 }, { "epoch": 0.4579889635196884, "grad_norm": 0.09522512555122375, "learning_rate": 1.63010338181181e-05, "loss": 8.9856, "step": 91710 }, { "epoch": 0.45803890234462785, "grad_norm": 0.09180162101984024, "learning_rate": 1.629953190317655e-05, "loss": 8.986, "step": 91720 }, { "epoch": 0.4580888411695673, "grad_norm": 0.09206482768058777, "learning_rate": 1.6298029988234998e-05, "loss": 8.9953, "step": 91730 }, { "epoch": 0.45813877999450675, "grad_norm": 0.09780178964138031, "learning_rate": 1.629652807329345e-05, "loss": 8.9888, "step": 91740 }, { "epoch": 0.4581887188194462, "grad_norm": 0.09504421800374985, "learning_rate": 1.6295026158351898e-05, "loss": 8.9846, "step": 91750 }, { "epoch": 0.45823865764438565, "grad_norm": 0.09662793576717377, "learning_rate": 1.629352424341035e-05, "loss": 8.9828, "step": 91760 }, { "epoch": 0.45828859646932507, "grad_norm": 0.09310472756624222, "learning_rate": 1.62920223284688e-05, "loss": 9.0069, "step": 91770 }, { "epoch": 0.45833853529426455, "grad_norm": 0.09177019447088242, "learning_rate": 1.6290520413527245e-05, "loss": 8.9781, "step": 91780 }, { "epoch": 0.45838847411920397, "grad_norm": 0.09365440160036087, "learning_rate": 1.62890184985857e-05, "loss": 8.9753, "step": 91790 }, { "epoch": 0.45843841294414345, "grad_norm": 0.09410764276981354, "learning_rate": 1.6287516583644146e-05, "loss": 8.977, "step": 91800 }, { "epoch": 0.45848835176908287, "grad_norm": 0.09689280390739441, "learning_rate": 1.6286014668702596e-05, "loss": 8.9838, "step": 91810 }, { "epoch": 0.45853829059402235, "grad_norm": 0.09834188967943192, "learning_rate": 1.6284512753761046e-05, "loss": 8.98, "step": 91820 }, { "epoch": 0.45858822941896177, "grad_norm": 0.10252006351947784, "learning_rate": 1.6283010838819493e-05, "loss": 8.9913, "step": 91830 }, { "epoch": 0.45863816824390125, "grad_norm": 0.09507527202367783, "learning_rate": 1.6281508923877946e-05, "loss": 8.9938, "step": 91840 }, { "epoch": 0.45868810706884067, "grad_norm": 0.09752275049686432, "learning_rate": 1.6280007008936393e-05, "loss": 8.9923, "step": 91850 }, { "epoch": 0.45873804589378014, "grad_norm": 0.10007300972938538, "learning_rate": 1.6278505093994847e-05, "loss": 9.0013, "step": 91860 }, { "epoch": 0.45878798471871957, "grad_norm": 0.09385237097740173, "learning_rate": 1.6277003179053294e-05, "loss": 8.975, "step": 91870 }, { "epoch": 0.45883792354365904, "grad_norm": 0.09489066153764725, "learning_rate": 1.627550126411174e-05, "loss": 8.9761, "step": 91880 }, { "epoch": 0.45888786236859846, "grad_norm": 0.0923139825463295, "learning_rate": 1.6273999349170194e-05, "loss": 8.9836, "step": 91890 }, { "epoch": 0.45893780119353794, "grad_norm": 0.09415728598833084, "learning_rate": 1.627249743422864e-05, "loss": 8.984, "step": 91900 }, { "epoch": 0.45898774001847736, "grad_norm": 0.08979801088571548, "learning_rate": 1.6270995519287094e-05, "loss": 8.9757, "step": 91910 }, { "epoch": 0.45903767884341684, "grad_norm": 0.09274829924106598, "learning_rate": 1.626949360434554e-05, "loss": 8.9781, "step": 91920 }, { "epoch": 0.45908761766835626, "grad_norm": 0.09117092192173004, "learning_rate": 1.6267991689403988e-05, "loss": 8.9811, "step": 91930 }, { "epoch": 0.45913755649329574, "grad_norm": 0.09696992486715317, "learning_rate": 1.626648977446244e-05, "loss": 8.9823, "step": 91940 }, { "epoch": 0.45918749531823516, "grad_norm": 0.09253089129924774, "learning_rate": 1.6264987859520888e-05, "loss": 8.9919, "step": 91950 }, { "epoch": 0.45923743414317464, "grad_norm": 0.09517562389373779, "learning_rate": 1.6263485944579342e-05, "loss": 9.0024, "step": 91960 }, { "epoch": 0.45928737296811406, "grad_norm": 0.09681175649166107, "learning_rate": 1.626198402963779e-05, "loss": 8.9724, "step": 91970 }, { "epoch": 0.4593373117930535, "grad_norm": 0.08983436971902847, "learning_rate": 1.6260482114696235e-05, "loss": 8.9792, "step": 91980 }, { "epoch": 0.45938725061799296, "grad_norm": 0.0926215648651123, "learning_rate": 1.625898019975469e-05, "loss": 8.9755, "step": 91990 }, { "epoch": 0.4594371894429324, "grad_norm": 0.09400998055934906, "learning_rate": 1.6257478284813136e-05, "loss": 8.9726, "step": 92000 }, { "epoch": 0.45948712826787186, "grad_norm": 0.09896674752235413, "learning_rate": 1.625597636987159e-05, "loss": 8.9919, "step": 92010 }, { "epoch": 0.4595370670928113, "grad_norm": 0.09126316010951996, "learning_rate": 1.6254474454930036e-05, "loss": 8.9908, "step": 92020 }, { "epoch": 0.45958700591775076, "grad_norm": 0.09881032258272171, "learning_rate": 1.6252972539988483e-05, "loss": 8.9719, "step": 92030 }, { "epoch": 0.4596369447426902, "grad_norm": 0.09345117956399918, "learning_rate": 1.6251470625046936e-05, "loss": 8.9801, "step": 92040 }, { "epoch": 0.45968688356762966, "grad_norm": 0.0911770686507225, "learning_rate": 1.6249968710105383e-05, "loss": 8.9831, "step": 92050 }, { "epoch": 0.4597368223925691, "grad_norm": 0.09339731186628342, "learning_rate": 1.6248466795163837e-05, "loss": 8.9748, "step": 92060 }, { "epoch": 0.45978676121750855, "grad_norm": 0.09199456870555878, "learning_rate": 1.6246964880222284e-05, "loss": 8.9885, "step": 92070 }, { "epoch": 0.459836700042448, "grad_norm": 0.09440702199935913, "learning_rate": 1.624546296528073e-05, "loss": 8.9864, "step": 92080 }, { "epoch": 0.45988663886738745, "grad_norm": 0.09405873715877533, "learning_rate": 1.6243961050339184e-05, "loss": 8.9815, "step": 92090 }, { "epoch": 0.4599365776923269, "grad_norm": 0.09130971878767014, "learning_rate": 1.624245913539763e-05, "loss": 8.9784, "step": 92100 }, { "epoch": 0.45998651651726635, "grad_norm": 0.09779450297355652, "learning_rate": 1.6240957220456084e-05, "loss": 8.9849, "step": 92110 }, { "epoch": 0.4600364553422058, "grad_norm": 0.09931951016187668, "learning_rate": 1.623945530551453e-05, "loss": 8.9855, "step": 92120 }, { "epoch": 0.46008639416714525, "grad_norm": 0.08796689659357071, "learning_rate": 1.6237953390572978e-05, "loss": 8.9822, "step": 92130 }, { "epoch": 0.4601363329920847, "grad_norm": 0.09218308329582214, "learning_rate": 1.623645147563143e-05, "loss": 8.9771, "step": 92140 }, { "epoch": 0.46018627181702415, "grad_norm": 0.09494258463382721, "learning_rate": 1.6234949560689878e-05, "loss": 8.9803, "step": 92150 }, { "epoch": 0.4602362106419636, "grad_norm": 0.09072136878967285, "learning_rate": 1.6233447645748332e-05, "loss": 8.9806, "step": 92160 }, { "epoch": 0.46028614946690305, "grad_norm": 0.09422437846660614, "learning_rate": 1.623194573080678e-05, "loss": 8.992, "step": 92170 }, { "epoch": 0.46033608829184247, "grad_norm": 0.09309976547956467, "learning_rate": 1.623044381586523e-05, "loss": 8.9822, "step": 92180 }, { "epoch": 0.46038602711678195, "grad_norm": 0.09436167776584625, "learning_rate": 1.622894190092368e-05, "loss": 8.9833, "step": 92190 }, { "epoch": 0.46043596594172137, "grad_norm": 0.09215915203094482, "learning_rate": 1.6227439985982126e-05, "loss": 8.9867, "step": 92200 }, { "epoch": 0.46048590476666085, "grad_norm": 0.0927964299917221, "learning_rate": 1.622593807104058e-05, "loss": 8.9965, "step": 92210 }, { "epoch": 0.46053584359160027, "grad_norm": 0.08990570157766342, "learning_rate": 1.6224436156099026e-05, "loss": 8.9703, "step": 92220 }, { "epoch": 0.46058578241653975, "grad_norm": 0.09287389367818832, "learning_rate": 1.6222934241157476e-05, "loss": 8.9852, "step": 92230 }, { "epoch": 0.46063572124147917, "grad_norm": 0.09232201427221298, "learning_rate": 1.6221432326215926e-05, "loss": 8.9978, "step": 92240 }, { "epoch": 0.46068566006641865, "grad_norm": 0.09181149303913116, "learning_rate": 1.6219930411274373e-05, "loss": 8.9753, "step": 92250 }, { "epoch": 0.46073559889135807, "grad_norm": 0.08810899406671524, "learning_rate": 1.6218428496332827e-05, "loss": 8.9912, "step": 92260 }, { "epoch": 0.46078553771629754, "grad_norm": 0.09229025989770889, "learning_rate": 1.6216926581391274e-05, "loss": 8.9822, "step": 92270 }, { "epoch": 0.46083547654123697, "grad_norm": 0.09612645953893661, "learning_rate": 1.6215424666449724e-05, "loss": 8.9762, "step": 92280 }, { "epoch": 0.46088541536617644, "grad_norm": 0.09475258737802505, "learning_rate": 1.6213922751508174e-05, "loss": 8.9794, "step": 92290 }, { "epoch": 0.46093535419111586, "grad_norm": 0.08649539202451706, "learning_rate": 1.621242083656662e-05, "loss": 8.9705, "step": 92300 }, { "epoch": 0.46098529301605534, "grad_norm": 0.09077353775501251, "learning_rate": 1.6210918921625074e-05, "loss": 8.9873, "step": 92310 }, { "epoch": 0.46103523184099476, "grad_norm": 0.09602386504411697, "learning_rate": 1.620941700668352e-05, "loss": 8.9859, "step": 92320 }, { "epoch": 0.46108517066593424, "grad_norm": 0.09134723991155624, "learning_rate": 1.620791509174197e-05, "loss": 8.9876, "step": 92330 }, { "epoch": 0.46113510949087366, "grad_norm": 0.09485665708780289, "learning_rate": 1.620641317680042e-05, "loss": 8.9749, "step": 92340 }, { "epoch": 0.46118504831581314, "grad_norm": 0.089450903236866, "learning_rate": 1.6204911261858868e-05, "loss": 8.9815, "step": 92350 }, { "epoch": 0.46123498714075256, "grad_norm": 0.09187912195920944, "learning_rate": 1.6203409346917322e-05, "loss": 8.9729, "step": 92360 }, { "epoch": 0.46128492596569204, "grad_norm": 0.09176033735275269, "learning_rate": 1.620190743197577e-05, "loss": 8.9813, "step": 92370 }, { "epoch": 0.46133486479063146, "grad_norm": 0.09702310711145401, "learning_rate": 1.620040551703422e-05, "loss": 8.9771, "step": 92380 }, { "epoch": 0.46138480361557094, "grad_norm": 0.09183552861213684, "learning_rate": 1.619890360209267e-05, "loss": 8.9827, "step": 92390 }, { "epoch": 0.46143474244051036, "grad_norm": 0.09094628691673279, "learning_rate": 1.6197401687151116e-05, "loss": 8.9792, "step": 92400 }, { "epoch": 0.46148468126544984, "grad_norm": 0.09234099090099335, "learning_rate": 1.619589977220957e-05, "loss": 8.9879, "step": 92410 }, { "epoch": 0.46153462009038926, "grad_norm": 0.0963030532002449, "learning_rate": 1.6194397857268016e-05, "loss": 8.9812, "step": 92420 }, { "epoch": 0.46158455891532874, "grad_norm": 0.09307033568620682, "learning_rate": 1.6192895942326466e-05, "loss": 8.9806, "step": 92430 }, { "epoch": 0.46163449774026816, "grad_norm": 0.08998160809278488, "learning_rate": 1.6191394027384916e-05, "loss": 8.9827, "step": 92440 }, { "epoch": 0.46168443656520763, "grad_norm": 0.09202240407466888, "learning_rate": 1.6189892112443363e-05, "loss": 8.9737, "step": 92450 }, { "epoch": 0.46173437539014706, "grad_norm": 0.08661884814500809, "learning_rate": 1.6188390197501817e-05, "loss": 8.9747, "step": 92460 }, { "epoch": 0.46178431421508653, "grad_norm": 0.08981632441282272, "learning_rate": 1.6186888282560264e-05, "loss": 8.973, "step": 92470 }, { "epoch": 0.46183425304002595, "grad_norm": 0.09431172162294388, "learning_rate": 1.6185386367618714e-05, "loss": 8.972, "step": 92480 }, { "epoch": 0.46188419186496543, "grad_norm": 0.09960649162530899, "learning_rate": 1.6183884452677164e-05, "loss": 8.9833, "step": 92490 }, { "epoch": 0.46193413068990485, "grad_norm": 0.09025320410728455, "learning_rate": 1.6182382537735614e-05, "loss": 8.9745, "step": 92500 }, { "epoch": 0.46198406951484433, "grad_norm": 0.09079807996749878, "learning_rate": 1.6180880622794064e-05, "loss": 8.9746, "step": 92510 }, { "epoch": 0.46203400833978375, "grad_norm": 0.09874525666236877, "learning_rate": 1.617937870785251e-05, "loss": 8.9767, "step": 92520 }, { "epoch": 0.46208394716472323, "grad_norm": 0.09728190302848816, "learning_rate": 1.617787679291096e-05, "loss": 8.9802, "step": 92530 }, { "epoch": 0.46213388598966265, "grad_norm": 0.09223682433366776, "learning_rate": 1.617637487796941e-05, "loss": 8.9872, "step": 92540 }, { "epoch": 0.46218382481460213, "grad_norm": 0.09065338224172592, "learning_rate": 1.617487296302786e-05, "loss": 8.9868, "step": 92550 }, { "epoch": 0.46223376363954155, "grad_norm": 0.09436984360218048, "learning_rate": 1.6173371048086312e-05, "loss": 8.989, "step": 92560 }, { "epoch": 0.46228370246448103, "grad_norm": 0.09117233753204346, "learning_rate": 1.617186913314476e-05, "loss": 8.9857, "step": 92570 }, { "epoch": 0.46233364128942045, "grad_norm": 0.09342578798532486, "learning_rate": 1.617036721820321e-05, "loss": 8.9786, "step": 92580 }, { "epoch": 0.4623835801143599, "grad_norm": 0.09534727782011032, "learning_rate": 1.616886530326166e-05, "loss": 8.9899, "step": 92590 }, { "epoch": 0.46243351893929935, "grad_norm": 0.09320477396249771, "learning_rate": 1.616736338832011e-05, "loss": 8.9657, "step": 92600 }, { "epoch": 0.4624834577642388, "grad_norm": 0.09745677560567856, "learning_rate": 1.616586147337856e-05, "loss": 8.9708, "step": 92610 }, { "epoch": 0.46253339658917825, "grad_norm": 0.09360513091087341, "learning_rate": 1.6164359558437006e-05, "loss": 8.9846, "step": 92620 }, { "epoch": 0.4625833354141177, "grad_norm": 0.09421921521425247, "learning_rate": 1.6162857643495456e-05, "loss": 8.9829, "step": 92630 }, { "epoch": 0.46263327423905715, "grad_norm": 0.10035274922847748, "learning_rate": 1.6161355728553906e-05, "loss": 8.9633, "step": 92640 }, { "epoch": 0.4626832130639966, "grad_norm": 0.09385058283805847, "learning_rate": 1.6159853813612357e-05, "loss": 8.9702, "step": 92650 }, { "epoch": 0.46273315188893605, "grad_norm": 0.0909324511885643, "learning_rate": 1.6158351898670807e-05, "loss": 8.9831, "step": 92660 }, { "epoch": 0.4627830907138755, "grad_norm": 0.08683860301971436, "learning_rate": 1.6156849983729254e-05, "loss": 8.9797, "step": 92670 }, { "epoch": 0.46283302953881494, "grad_norm": 0.09413138777017593, "learning_rate": 1.6155348068787704e-05, "loss": 8.9752, "step": 92680 }, { "epoch": 0.4628829683637544, "grad_norm": 0.09523726254701614, "learning_rate": 1.6153846153846154e-05, "loss": 8.9746, "step": 92690 }, { "epoch": 0.46293290718869384, "grad_norm": 0.09190825372934341, "learning_rate": 1.6152344238904604e-05, "loss": 8.9821, "step": 92700 }, { "epoch": 0.4629828460136333, "grad_norm": 0.09379702806472778, "learning_rate": 1.6150842323963054e-05, "loss": 8.9772, "step": 92710 }, { "epoch": 0.46303278483857274, "grad_norm": 0.09580748528242111, "learning_rate": 1.61493404090215e-05, "loss": 8.9785, "step": 92720 }, { "epoch": 0.4630827236635122, "grad_norm": 0.10296031832695007, "learning_rate": 1.614783849407995e-05, "loss": 8.9668, "step": 92730 }, { "epoch": 0.46313266248845164, "grad_norm": 0.09141591936349869, "learning_rate": 1.61463365791384e-05, "loss": 8.9757, "step": 92740 }, { "epoch": 0.4631826013133911, "grad_norm": 0.09824127703905106, "learning_rate": 1.614483466419685e-05, "loss": 8.9831, "step": 92750 }, { "epoch": 0.46323254013833054, "grad_norm": 0.0958201214671135, "learning_rate": 1.6143332749255302e-05, "loss": 8.9645, "step": 92760 }, { "epoch": 0.46328247896327, "grad_norm": 0.08969037979841232, "learning_rate": 1.614183083431375e-05, "loss": 8.9695, "step": 92770 }, { "epoch": 0.46333241778820944, "grad_norm": 0.0948222279548645, "learning_rate": 1.61403289193722e-05, "loss": 8.9856, "step": 92780 }, { "epoch": 0.4633823566131489, "grad_norm": 0.08855247497558594, "learning_rate": 1.613882700443065e-05, "loss": 8.9811, "step": 92790 }, { "epoch": 0.46343229543808834, "grad_norm": 0.08753466606140137, "learning_rate": 1.61373250894891e-05, "loss": 8.9728, "step": 92800 }, { "epoch": 0.4634822342630278, "grad_norm": 0.09330339729785919, "learning_rate": 1.613582317454755e-05, "loss": 8.9759, "step": 92810 }, { "epoch": 0.46353217308796724, "grad_norm": 0.0953167974948883, "learning_rate": 1.6134321259606e-05, "loss": 8.9906, "step": 92820 }, { "epoch": 0.4635821119129067, "grad_norm": 0.09375966340303421, "learning_rate": 1.6132819344664446e-05, "loss": 8.9694, "step": 92830 }, { "epoch": 0.46363205073784614, "grad_norm": 0.08928825706243515, "learning_rate": 1.6131317429722897e-05, "loss": 8.9641, "step": 92840 }, { "epoch": 0.4636819895627856, "grad_norm": 0.09437457472085953, "learning_rate": 1.6129815514781347e-05, "loss": 8.9668, "step": 92850 }, { "epoch": 0.46373192838772503, "grad_norm": 0.0952828899025917, "learning_rate": 1.6128313599839797e-05, "loss": 8.9885, "step": 92860 }, { "epoch": 0.4637818672126645, "grad_norm": 0.08954987674951553, "learning_rate": 1.6126811684898247e-05, "loss": 8.9779, "step": 92870 }, { "epoch": 0.46383180603760393, "grad_norm": 0.095025435090065, "learning_rate": 1.6125309769956694e-05, "loss": 8.978, "step": 92880 }, { "epoch": 0.4638817448625434, "grad_norm": 0.09305167198181152, "learning_rate": 1.6123807855015144e-05, "loss": 8.975, "step": 92890 }, { "epoch": 0.46393168368748283, "grad_norm": 0.09365760535001755, "learning_rate": 1.6122305940073594e-05, "loss": 8.9796, "step": 92900 }, { "epoch": 0.4639816225124223, "grad_norm": 0.0907236710190773, "learning_rate": 1.6120804025132044e-05, "loss": 8.9814, "step": 92910 }, { "epoch": 0.46403156133736173, "grad_norm": 0.09334593266248703, "learning_rate": 1.6119302110190495e-05, "loss": 8.9935, "step": 92920 }, { "epoch": 0.4640815001623012, "grad_norm": 0.0942806750535965, "learning_rate": 1.611780019524894e-05, "loss": 8.968, "step": 92930 }, { "epoch": 0.46413143898724063, "grad_norm": 0.09642312675714493, "learning_rate": 1.611629828030739e-05, "loss": 8.9773, "step": 92940 }, { "epoch": 0.4641813778121801, "grad_norm": 0.10480822622776031, "learning_rate": 1.611479636536584e-05, "loss": 8.9643, "step": 92950 }, { "epoch": 0.46423131663711953, "grad_norm": 0.09461807459592819, "learning_rate": 1.6113294450424292e-05, "loss": 8.9774, "step": 92960 }, { "epoch": 0.46428125546205895, "grad_norm": 0.09156806021928787, "learning_rate": 1.6111792535482742e-05, "loss": 8.983, "step": 92970 }, { "epoch": 0.4643311942869984, "grad_norm": 0.09091312438249588, "learning_rate": 1.611029062054119e-05, "loss": 8.9742, "step": 92980 }, { "epoch": 0.46438113311193785, "grad_norm": 0.09511202573776245, "learning_rate": 1.610878870559964e-05, "loss": 8.9784, "step": 92990 }, { "epoch": 0.4644310719368773, "grad_norm": 0.09062772989273071, "learning_rate": 1.610728679065809e-05, "loss": 8.9831, "step": 93000 }, { "epoch": 0.46448101076181675, "grad_norm": 0.09874343872070312, "learning_rate": 1.610578487571654e-05, "loss": 8.9679, "step": 93010 }, { "epoch": 0.4645309495867562, "grad_norm": 0.09453938156366348, "learning_rate": 1.610428296077499e-05, "loss": 8.9649, "step": 93020 }, { "epoch": 0.46458088841169565, "grad_norm": 0.09480418264865875, "learning_rate": 1.6102781045833436e-05, "loss": 8.9785, "step": 93030 }, { "epoch": 0.4646308272366351, "grad_norm": 0.08807534724473953, "learning_rate": 1.6101279130891887e-05, "loss": 8.9728, "step": 93040 }, { "epoch": 0.46468076606157455, "grad_norm": 0.09540516883134842, "learning_rate": 1.6099777215950337e-05, "loss": 8.9793, "step": 93050 }, { "epoch": 0.464730704886514, "grad_norm": 0.09324967861175537, "learning_rate": 1.6098275301008787e-05, "loss": 8.9792, "step": 93060 }, { "epoch": 0.46478064371145345, "grad_norm": 0.08924508839845657, "learning_rate": 1.6096773386067237e-05, "loss": 8.9898, "step": 93070 }, { "epoch": 0.4648305825363929, "grad_norm": 0.09416824579238892, "learning_rate": 1.6095271471125684e-05, "loss": 8.9758, "step": 93080 }, { "epoch": 0.46488052136133234, "grad_norm": 0.09986349195241928, "learning_rate": 1.6093769556184134e-05, "loss": 8.9774, "step": 93090 }, { "epoch": 0.4649304601862718, "grad_norm": 0.10064547508955002, "learning_rate": 1.6092267641242584e-05, "loss": 8.9643, "step": 93100 }, { "epoch": 0.46498039901121124, "grad_norm": 0.09657155722379684, "learning_rate": 1.6090765726301034e-05, "loss": 8.992, "step": 93110 }, { "epoch": 0.4650303378361507, "grad_norm": 0.09835278242826462, "learning_rate": 1.6089263811359485e-05, "loss": 8.9773, "step": 93120 }, { "epoch": 0.46508027666109014, "grad_norm": 0.09444111585617065, "learning_rate": 1.608776189641793e-05, "loss": 8.9678, "step": 93130 }, { "epoch": 0.4651302154860296, "grad_norm": 0.09481803327798843, "learning_rate": 1.6086259981476385e-05, "loss": 8.9745, "step": 93140 }, { "epoch": 0.46518015431096904, "grad_norm": 0.09177707135677338, "learning_rate": 1.6084758066534832e-05, "loss": 8.973, "step": 93150 }, { "epoch": 0.4652300931359085, "grad_norm": 0.09027852863073349, "learning_rate": 1.6083256151593282e-05, "loss": 8.9625, "step": 93160 }, { "epoch": 0.46528003196084794, "grad_norm": 0.0904780849814415, "learning_rate": 1.6081754236651732e-05, "loss": 8.9686, "step": 93170 }, { "epoch": 0.4653299707857874, "grad_norm": 0.09435466676950455, "learning_rate": 1.608025232171018e-05, "loss": 8.9654, "step": 93180 }, { "epoch": 0.46537990961072684, "grad_norm": 0.09567007422447205, "learning_rate": 1.6078750406768632e-05, "loss": 8.9825, "step": 93190 }, { "epoch": 0.4654298484356663, "grad_norm": 0.09451539069414139, "learning_rate": 1.607724849182708e-05, "loss": 8.9788, "step": 93200 }, { "epoch": 0.46547978726060574, "grad_norm": 0.09207061678171158, "learning_rate": 1.607574657688553e-05, "loss": 8.972, "step": 93210 }, { "epoch": 0.4655297260855452, "grad_norm": 0.09581438452005386, "learning_rate": 1.607424466194398e-05, "loss": 8.9847, "step": 93220 }, { "epoch": 0.46557966491048464, "grad_norm": 0.09088779985904694, "learning_rate": 1.6072742747002426e-05, "loss": 8.9714, "step": 93230 }, { "epoch": 0.4656296037354241, "grad_norm": 0.09318163245916367, "learning_rate": 1.607124083206088e-05, "loss": 8.9726, "step": 93240 }, { "epoch": 0.46567954256036354, "grad_norm": 0.09258940815925598, "learning_rate": 1.6069738917119327e-05, "loss": 8.9878, "step": 93250 }, { "epoch": 0.465729481385303, "grad_norm": 0.09311637282371521, "learning_rate": 1.6068237002177777e-05, "loss": 8.9817, "step": 93260 }, { "epoch": 0.46577942021024243, "grad_norm": 0.09517588466405869, "learning_rate": 1.6066735087236227e-05, "loss": 8.9635, "step": 93270 }, { "epoch": 0.4658293590351819, "grad_norm": 0.08702480047941208, "learning_rate": 1.6065233172294674e-05, "loss": 8.9762, "step": 93280 }, { "epoch": 0.46587929786012133, "grad_norm": 0.0963396281003952, "learning_rate": 1.6063731257353127e-05, "loss": 8.967, "step": 93290 }, { "epoch": 0.4659292366850608, "grad_norm": 0.09384248405694962, "learning_rate": 1.6062229342411574e-05, "loss": 8.9723, "step": 93300 }, { "epoch": 0.46597917551000023, "grad_norm": 0.1044907197356224, "learning_rate": 1.6060727427470024e-05, "loss": 8.9606, "step": 93310 }, { "epoch": 0.4660291143349397, "grad_norm": 0.09456614404916763, "learning_rate": 1.6059225512528475e-05, "loss": 8.9805, "step": 93320 }, { "epoch": 0.46607905315987913, "grad_norm": 0.09772161394357681, "learning_rate": 1.605772359758692e-05, "loss": 8.967, "step": 93330 }, { "epoch": 0.4661289919848186, "grad_norm": 0.08841816335916519, "learning_rate": 1.6056221682645375e-05, "loss": 8.9762, "step": 93340 }, { "epoch": 0.46617893080975803, "grad_norm": 0.0914144292473793, "learning_rate": 1.6054719767703822e-05, "loss": 8.978, "step": 93350 }, { "epoch": 0.4662288696346975, "grad_norm": 0.09596528857946396, "learning_rate": 1.6053217852762272e-05, "loss": 8.956, "step": 93360 }, { "epoch": 0.46627880845963693, "grad_norm": 0.09019865840673447, "learning_rate": 1.6051715937820722e-05, "loss": 8.965, "step": 93370 }, { "epoch": 0.4663287472845764, "grad_norm": 0.09683724492788315, "learning_rate": 1.605021402287917e-05, "loss": 8.9587, "step": 93380 }, { "epoch": 0.4663786861095158, "grad_norm": 0.08984372764825821, "learning_rate": 1.6048712107937622e-05, "loss": 8.9803, "step": 93390 }, { "epoch": 0.4664286249344553, "grad_norm": 0.09242388606071472, "learning_rate": 1.604721019299607e-05, "loss": 8.991, "step": 93400 }, { "epoch": 0.4664785637593947, "grad_norm": 0.0912506952881813, "learning_rate": 1.604570827805452e-05, "loss": 8.9865, "step": 93410 }, { "epoch": 0.4665285025843342, "grad_norm": 0.09702800214290619, "learning_rate": 1.604420636311297e-05, "loss": 8.9536, "step": 93420 }, { "epoch": 0.4665784414092736, "grad_norm": 0.09083381295204163, "learning_rate": 1.6042704448171416e-05, "loss": 8.9659, "step": 93430 }, { "epoch": 0.4666283802342131, "grad_norm": 0.09859057515859604, "learning_rate": 1.604120253322987e-05, "loss": 8.9757, "step": 93440 }, { "epoch": 0.4666783190591525, "grad_norm": 0.09232448786497116, "learning_rate": 1.6039700618288317e-05, "loss": 8.9693, "step": 93450 }, { "epoch": 0.466728257884092, "grad_norm": 0.09486878663301468, "learning_rate": 1.603819870334677e-05, "loss": 8.9813, "step": 93460 }, { "epoch": 0.4667781967090314, "grad_norm": 0.09302292764186859, "learning_rate": 1.6036696788405217e-05, "loss": 8.9782, "step": 93470 }, { "epoch": 0.4668281355339709, "grad_norm": 0.09293092787265778, "learning_rate": 1.6035194873463664e-05, "loss": 8.9629, "step": 93480 }, { "epoch": 0.4668780743589103, "grad_norm": 0.09174246340990067, "learning_rate": 1.6033692958522117e-05, "loss": 8.9694, "step": 93490 }, { "epoch": 0.4669280131838498, "grad_norm": 0.09290467202663422, "learning_rate": 1.6032191043580564e-05, "loss": 8.9646, "step": 93500 }, { "epoch": 0.4669779520087892, "grad_norm": 0.09408622980117798, "learning_rate": 1.6030689128639018e-05, "loss": 8.9694, "step": 93510 }, { "epoch": 0.4670278908337287, "grad_norm": 0.09096479415893555, "learning_rate": 1.6029187213697465e-05, "loss": 8.966, "step": 93520 }, { "epoch": 0.4670778296586681, "grad_norm": 0.09430520981550217, "learning_rate": 1.602768529875591e-05, "loss": 8.9614, "step": 93530 }, { "epoch": 0.4671277684836076, "grad_norm": 0.09501869231462479, "learning_rate": 1.6026183383814365e-05, "loss": 8.954, "step": 93540 }, { "epoch": 0.467177707308547, "grad_norm": 0.09236142039299011, "learning_rate": 1.6024681468872812e-05, "loss": 8.9749, "step": 93550 }, { "epoch": 0.4672276461334865, "grad_norm": 0.09646128863096237, "learning_rate": 1.6023179553931265e-05, "loss": 8.985, "step": 93560 }, { "epoch": 0.4672775849584259, "grad_norm": 0.08994904160499573, "learning_rate": 1.6021677638989712e-05, "loss": 8.9734, "step": 93570 }, { "epoch": 0.4673275237833654, "grad_norm": 0.0974574014544487, "learning_rate": 1.602017572404816e-05, "loss": 8.9715, "step": 93580 }, { "epoch": 0.4673774626083048, "grad_norm": 0.09206629544496536, "learning_rate": 1.6018673809106612e-05, "loss": 8.9758, "step": 93590 }, { "epoch": 0.4674274014332443, "grad_norm": 0.08921888470649719, "learning_rate": 1.601717189416506e-05, "loss": 8.9747, "step": 93600 }, { "epoch": 0.4674773402581837, "grad_norm": 0.09754341095685959, "learning_rate": 1.6015669979223513e-05, "loss": 8.9635, "step": 93610 }, { "epoch": 0.4675272790831232, "grad_norm": 0.09427353739738464, "learning_rate": 1.601416806428196e-05, "loss": 8.9758, "step": 93620 }, { "epoch": 0.4675772179080626, "grad_norm": 0.09223822504281998, "learning_rate": 1.6012666149340406e-05, "loss": 8.9795, "step": 93630 }, { "epoch": 0.4676271567330021, "grad_norm": 0.09685010462999344, "learning_rate": 1.601116423439886e-05, "loss": 8.9705, "step": 93640 }, { "epoch": 0.4676770955579415, "grad_norm": 0.09444907307624817, "learning_rate": 1.6009662319457307e-05, "loss": 8.9722, "step": 93650 }, { "epoch": 0.467727034382881, "grad_norm": 0.09394410252571106, "learning_rate": 1.600816040451576e-05, "loss": 8.9806, "step": 93660 }, { "epoch": 0.4677769732078204, "grad_norm": 0.09267649054527283, "learning_rate": 1.6006658489574207e-05, "loss": 8.9722, "step": 93670 }, { "epoch": 0.4678269120327599, "grad_norm": 0.08718396723270416, "learning_rate": 1.6005156574632654e-05, "loss": 8.9693, "step": 93680 }, { "epoch": 0.4678768508576993, "grad_norm": 0.08936282247304916, "learning_rate": 1.6003654659691107e-05, "loss": 8.9726, "step": 93690 }, { "epoch": 0.4679267896826388, "grad_norm": 0.09415874630212784, "learning_rate": 1.6002152744749554e-05, "loss": 8.9622, "step": 93700 }, { "epoch": 0.4679767285075782, "grad_norm": 0.09426844865083694, "learning_rate": 1.6000650829808008e-05, "loss": 8.9738, "step": 93710 }, { "epoch": 0.4680266673325177, "grad_norm": 0.09191013872623444, "learning_rate": 1.5999148914866455e-05, "loss": 8.9705, "step": 93720 }, { "epoch": 0.4680766061574571, "grad_norm": 0.09426682442426682, "learning_rate": 1.5997646999924905e-05, "loss": 8.9628, "step": 93730 }, { "epoch": 0.4681265449823966, "grad_norm": 0.08494877815246582, "learning_rate": 1.5996145084983355e-05, "loss": 8.9671, "step": 93740 }, { "epoch": 0.468176483807336, "grad_norm": 0.09612907469272614, "learning_rate": 1.5994643170041802e-05, "loss": 8.9614, "step": 93750 }, { "epoch": 0.4682264226322755, "grad_norm": 0.09567809104919434, "learning_rate": 1.5993141255100255e-05, "loss": 8.9663, "step": 93760 }, { "epoch": 0.4682763614572149, "grad_norm": 0.09354501962661743, "learning_rate": 1.5991639340158702e-05, "loss": 8.9696, "step": 93770 }, { "epoch": 0.4683263002821544, "grad_norm": 0.09527242928743362, "learning_rate": 1.5990137425217152e-05, "loss": 8.976, "step": 93780 }, { "epoch": 0.4683762391070938, "grad_norm": 0.09733609855175018, "learning_rate": 1.5988635510275602e-05, "loss": 8.9744, "step": 93790 }, { "epoch": 0.4684261779320333, "grad_norm": 0.10153631865978241, "learning_rate": 1.598713359533405e-05, "loss": 8.9545, "step": 93800 }, { "epoch": 0.4684761167569727, "grad_norm": 0.09621601551771164, "learning_rate": 1.5985631680392503e-05, "loss": 8.992, "step": 93810 }, { "epoch": 0.4685260555819122, "grad_norm": 0.09544295817613602, "learning_rate": 1.598412976545095e-05, "loss": 8.9597, "step": 93820 }, { "epoch": 0.4685759944068516, "grad_norm": 0.09087269753217697, "learning_rate": 1.59826278505094e-05, "loss": 8.9515, "step": 93830 }, { "epoch": 0.4686259332317911, "grad_norm": 0.09846234321594238, "learning_rate": 1.598112593556785e-05, "loss": 8.9714, "step": 93840 }, { "epoch": 0.4686758720567305, "grad_norm": 0.09184664487838745, "learning_rate": 1.5979624020626297e-05, "loss": 8.9678, "step": 93850 }, { "epoch": 0.46872581088167, "grad_norm": 0.0921328142285347, "learning_rate": 1.597812210568475e-05, "loss": 8.9638, "step": 93860 }, { "epoch": 0.4687757497066094, "grad_norm": 0.09225039184093475, "learning_rate": 1.5976620190743197e-05, "loss": 8.9748, "step": 93870 }, { "epoch": 0.4688256885315489, "grad_norm": 0.09426619857549667, "learning_rate": 1.5975118275801647e-05, "loss": 8.957, "step": 93880 }, { "epoch": 0.4688756273564883, "grad_norm": 0.08906444162130356, "learning_rate": 1.5973616360860097e-05, "loss": 8.9666, "step": 93890 }, { "epoch": 0.4689255661814278, "grad_norm": 0.09076976776123047, "learning_rate": 1.5972114445918544e-05, "loss": 8.9806, "step": 93900 }, { "epoch": 0.4689755050063672, "grad_norm": 0.09307584911584854, "learning_rate": 1.5970612530976998e-05, "loss": 8.9683, "step": 93910 }, { "epoch": 0.4690254438313067, "grad_norm": 0.09109794348478317, "learning_rate": 1.5969110616035445e-05, "loss": 8.9724, "step": 93920 }, { "epoch": 0.4690753826562461, "grad_norm": 0.09649874269962311, "learning_rate": 1.5967608701093895e-05, "loss": 8.9707, "step": 93930 }, { "epoch": 0.4691253214811856, "grad_norm": 0.09002482146024704, "learning_rate": 1.5966106786152345e-05, "loss": 8.9689, "step": 93940 }, { "epoch": 0.469175260306125, "grad_norm": 0.0938982143998146, "learning_rate": 1.5964604871210792e-05, "loss": 8.9652, "step": 93950 }, { "epoch": 0.4692251991310644, "grad_norm": 0.0954759269952774, "learning_rate": 1.5963102956269245e-05, "loss": 8.9681, "step": 93960 }, { "epoch": 0.4692751379560039, "grad_norm": 0.09431935846805573, "learning_rate": 1.5961601041327692e-05, "loss": 8.9643, "step": 93970 }, { "epoch": 0.4693250767809433, "grad_norm": 0.10007733106613159, "learning_rate": 1.5960099126386142e-05, "loss": 8.9655, "step": 93980 }, { "epoch": 0.4693750156058828, "grad_norm": 0.09566391259431839, "learning_rate": 1.5958597211444592e-05, "loss": 8.9486, "step": 93990 }, { "epoch": 0.4694249544308222, "grad_norm": 0.08696243166923523, "learning_rate": 1.595709529650304e-05, "loss": 8.9544, "step": 94000 }, { "epoch": 0.4694748932557617, "grad_norm": 0.0935448408126831, "learning_rate": 1.5955593381561493e-05, "loss": 8.9584, "step": 94010 }, { "epoch": 0.4695248320807011, "grad_norm": 0.09453287720680237, "learning_rate": 1.595409146661994e-05, "loss": 8.9592, "step": 94020 }, { "epoch": 0.4695747709056406, "grad_norm": 0.0924343541264534, "learning_rate": 1.5952589551678393e-05, "loss": 8.9641, "step": 94030 }, { "epoch": 0.46962470973058, "grad_norm": 0.09673213213682175, "learning_rate": 1.595108763673684e-05, "loss": 8.9499, "step": 94040 }, { "epoch": 0.4696746485555195, "grad_norm": 0.08831555396318436, "learning_rate": 1.5949585721795287e-05, "loss": 8.9571, "step": 94050 }, { "epoch": 0.4697245873804589, "grad_norm": 0.09080936759710312, "learning_rate": 1.594808380685374e-05, "loss": 8.9697, "step": 94060 }, { "epoch": 0.4697745262053984, "grad_norm": 0.09151973575353622, "learning_rate": 1.5946581891912187e-05, "loss": 8.9607, "step": 94070 }, { "epoch": 0.4698244650303378, "grad_norm": 0.09453890472650528, "learning_rate": 1.594507997697064e-05, "loss": 8.9754, "step": 94080 }, { "epoch": 0.4698744038552773, "grad_norm": 0.09043656289577484, "learning_rate": 1.5943578062029087e-05, "loss": 8.9796, "step": 94090 }, { "epoch": 0.4699243426802167, "grad_norm": 0.09902900457382202, "learning_rate": 1.5942076147087534e-05, "loss": 8.9608, "step": 94100 }, { "epoch": 0.4699742815051562, "grad_norm": 0.09724241495132446, "learning_rate": 1.5940574232145988e-05, "loss": 8.9604, "step": 94110 }, { "epoch": 0.4700242203300956, "grad_norm": 0.09389027208089828, "learning_rate": 1.5939072317204435e-05, "loss": 8.9627, "step": 94120 }, { "epoch": 0.4700741591550351, "grad_norm": 0.08884572237730026, "learning_rate": 1.5937570402262888e-05, "loss": 8.9623, "step": 94130 }, { "epoch": 0.4701240979799745, "grad_norm": 0.09251714497804642, "learning_rate": 1.5936068487321335e-05, "loss": 8.9787, "step": 94140 }, { "epoch": 0.470174036804914, "grad_norm": 0.09012773633003235, "learning_rate": 1.5934566572379785e-05, "loss": 8.9716, "step": 94150 }, { "epoch": 0.4702239756298534, "grad_norm": 0.09131991118192673, "learning_rate": 1.5933064657438235e-05, "loss": 8.9629, "step": 94160 }, { "epoch": 0.4702739144547929, "grad_norm": 0.09043431282043457, "learning_rate": 1.5931562742496682e-05, "loss": 8.9654, "step": 94170 }, { "epoch": 0.4703238532797323, "grad_norm": 0.09283377230167389, "learning_rate": 1.5930060827555136e-05, "loss": 8.9507, "step": 94180 }, { "epoch": 0.4703737921046718, "grad_norm": 0.09180030971765518, "learning_rate": 1.5928558912613582e-05, "loss": 8.9708, "step": 94190 }, { "epoch": 0.4704237309296112, "grad_norm": 0.08900587260723114, "learning_rate": 1.5927056997672033e-05, "loss": 8.9548, "step": 94200 }, { "epoch": 0.4704736697545507, "grad_norm": 0.10260471701622009, "learning_rate": 1.5925555082730483e-05, "loss": 8.9641, "step": 94210 }, { "epoch": 0.4705236085794901, "grad_norm": 0.09451324492692947, "learning_rate": 1.592405316778893e-05, "loss": 8.9697, "step": 94220 }, { "epoch": 0.4705735474044296, "grad_norm": 0.1005653440952301, "learning_rate": 1.5922551252847383e-05, "loss": 8.9598, "step": 94230 }, { "epoch": 0.470623486229369, "grad_norm": 0.09918899834156036, "learning_rate": 1.592104933790583e-05, "loss": 8.9575, "step": 94240 }, { "epoch": 0.4706734250543085, "grad_norm": 0.09747923910617828, "learning_rate": 1.591954742296428e-05, "loss": 8.9644, "step": 94250 }, { "epoch": 0.4707233638792479, "grad_norm": 0.09829793125391006, "learning_rate": 1.591804550802273e-05, "loss": 8.9704, "step": 94260 }, { "epoch": 0.4707733027041874, "grad_norm": 0.10006602108478546, "learning_rate": 1.5916543593081177e-05, "loss": 8.9625, "step": 94270 }, { "epoch": 0.4708232415291268, "grad_norm": 0.09519977867603302, "learning_rate": 1.591504167813963e-05, "loss": 8.9572, "step": 94280 }, { "epoch": 0.4708731803540663, "grad_norm": 0.09414579719305038, "learning_rate": 1.5913539763198077e-05, "loss": 8.9618, "step": 94290 }, { "epoch": 0.4709231191790057, "grad_norm": 0.09214229881763458, "learning_rate": 1.5912037848256528e-05, "loss": 8.968, "step": 94300 }, { "epoch": 0.4709730580039452, "grad_norm": 0.09208973497152328, "learning_rate": 1.5910535933314978e-05, "loss": 8.9711, "step": 94310 }, { "epoch": 0.4710229968288846, "grad_norm": 0.09273689985275269, "learning_rate": 1.5909034018373425e-05, "loss": 8.9477, "step": 94320 }, { "epoch": 0.4710729356538241, "grad_norm": 0.09345366805791855, "learning_rate": 1.5907532103431878e-05, "loss": 8.9459, "step": 94330 }, { "epoch": 0.4711228744787635, "grad_norm": 0.09309998899698257, "learning_rate": 1.5906030188490325e-05, "loss": 8.9669, "step": 94340 }, { "epoch": 0.471172813303703, "grad_norm": 0.0877714604139328, "learning_rate": 1.5904528273548775e-05, "loss": 8.967, "step": 94350 }, { "epoch": 0.4712227521286424, "grad_norm": 0.09679785370826721, "learning_rate": 1.5903026358607225e-05, "loss": 8.9599, "step": 94360 }, { "epoch": 0.4712726909535819, "grad_norm": 0.10014106333255768, "learning_rate": 1.5901524443665672e-05, "loss": 8.9567, "step": 94370 }, { "epoch": 0.4713226297785213, "grad_norm": 0.09142360091209412, "learning_rate": 1.5900022528724126e-05, "loss": 8.9641, "step": 94380 }, { "epoch": 0.4713725686034608, "grad_norm": 0.09642203897237778, "learning_rate": 1.5898520613782573e-05, "loss": 8.9582, "step": 94390 }, { "epoch": 0.4714225074284002, "grad_norm": 0.0904778316617012, "learning_rate": 1.5897018698841023e-05, "loss": 8.9714, "step": 94400 }, { "epoch": 0.47147244625333967, "grad_norm": 0.09670837968587875, "learning_rate": 1.5895516783899473e-05, "loss": 8.9466, "step": 94410 }, { "epoch": 0.4715223850782791, "grad_norm": 0.09427211433649063, "learning_rate": 1.589401486895792e-05, "loss": 8.9493, "step": 94420 }, { "epoch": 0.47157232390321857, "grad_norm": 0.09509798139333725, "learning_rate": 1.5892512954016373e-05, "loss": 8.9536, "step": 94430 }, { "epoch": 0.471622262728158, "grad_norm": 0.09237992763519287, "learning_rate": 1.589101103907482e-05, "loss": 8.9698, "step": 94440 }, { "epoch": 0.47167220155309747, "grad_norm": 0.09039132297039032, "learning_rate": 1.588950912413327e-05, "loss": 8.96, "step": 94450 }, { "epoch": 0.4717221403780369, "grad_norm": 0.09362545609474182, "learning_rate": 1.588800720919172e-05, "loss": 8.9552, "step": 94460 }, { "epoch": 0.47177207920297637, "grad_norm": 0.09487809240818024, "learning_rate": 1.588650529425017e-05, "loss": 8.9529, "step": 94470 }, { "epoch": 0.4718220180279158, "grad_norm": 0.09452398866415024, "learning_rate": 1.588500337930862e-05, "loss": 8.9601, "step": 94480 }, { "epoch": 0.47187195685285527, "grad_norm": 0.08807900547981262, "learning_rate": 1.5883501464367068e-05, "loss": 8.9602, "step": 94490 }, { "epoch": 0.4719218956777947, "grad_norm": 0.09054072201251984, "learning_rate": 1.5881999549425518e-05, "loss": 8.9705, "step": 94500 }, { "epoch": 0.47197183450273417, "grad_norm": 0.0914943590760231, "learning_rate": 1.5880497634483968e-05, "loss": 8.9667, "step": 94510 }, { "epoch": 0.4720217733276736, "grad_norm": 0.09846024960279465, "learning_rate": 1.5878995719542418e-05, "loss": 8.964, "step": 94520 }, { "epoch": 0.47207171215261307, "grad_norm": 0.08973458409309387, "learning_rate": 1.5877493804600868e-05, "loss": 8.9691, "step": 94530 }, { "epoch": 0.4721216509775525, "grad_norm": 0.09380858391523361, "learning_rate": 1.5875991889659315e-05, "loss": 8.9595, "step": 94540 }, { "epoch": 0.47217158980249196, "grad_norm": 0.09617038071155548, "learning_rate": 1.5874489974717765e-05, "loss": 8.9691, "step": 94550 }, { "epoch": 0.4722215286274314, "grad_norm": 0.09885700792074203, "learning_rate": 1.5872988059776215e-05, "loss": 8.9462, "step": 94560 }, { "epoch": 0.47227146745237086, "grad_norm": 0.08958373963832855, "learning_rate": 1.5871486144834666e-05, "loss": 8.9666, "step": 94570 }, { "epoch": 0.4723214062773103, "grad_norm": 0.09378182142972946, "learning_rate": 1.5869984229893116e-05, "loss": 8.9554, "step": 94580 }, { "epoch": 0.47237134510224976, "grad_norm": 0.09632568061351776, "learning_rate": 1.5868482314951563e-05, "loss": 8.9563, "step": 94590 }, { "epoch": 0.4724212839271892, "grad_norm": 0.09778321534395218, "learning_rate": 1.5866980400010013e-05, "loss": 8.9532, "step": 94600 }, { "epoch": 0.47247122275212866, "grad_norm": 0.08909459412097931, "learning_rate": 1.5865478485068463e-05, "loss": 8.9759, "step": 94610 }, { "epoch": 0.4725211615770681, "grad_norm": 0.09531407058238983, "learning_rate": 1.5863976570126913e-05, "loss": 8.9577, "step": 94620 }, { "epoch": 0.47257110040200756, "grad_norm": 0.09269222617149353, "learning_rate": 1.5862474655185363e-05, "loss": 8.959, "step": 94630 }, { "epoch": 0.472621039226947, "grad_norm": 0.09362973272800446, "learning_rate": 1.586097274024381e-05, "loss": 8.953, "step": 94640 }, { "epoch": 0.47267097805188646, "grad_norm": 0.09257864207029343, "learning_rate": 1.585947082530226e-05, "loss": 8.9646, "step": 94650 }, { "epoch": 0.4727209168768259, "grad_norm": 0.10312341898679733, "learning_rate": 1.585796891036071e-05, "loss": 8.9662, "step": 94660 }, { "epoch": 0.47277085570176536, "grad_norm": 0.09162463247776031, "learning_rate": 1.585646699541916e-05, "loss": 8.9528, "step": 94670 }, { "epoch": 0.4728207945267048, "grad_norm": 0.09410829097032547, "learning_rate": 1.585496508047761e-05, "loss": 8.9487, "step": 94680 }, { "epoch": 0.47287073335164426, "grad_norm": 0.09216289222240448, "learning_rate": 1.5853463165536058e-05, "loss": 8.9574, "step": 94690 }, { "epoch": 0.4729206721765837, "grad_norm": 0.09374202787876129, "learning_rate": 1.5851961250594508e-05, "loss": 8.9629, "step": 94700 }, { "epoch": 0.47297061100152316, "grad_norm": 0.09870696812868118, "learning_rate": 1.5850459335652958e-05, "loss": 8.9696, "step": 94710 }, { "epoch": 0.4730205498264626, "grad_norm": 0.0899372324347496, "learning_rate": 1.5848957420711408e-05, "loss": 8.9589, "step": 94720 }, { "epoch": 0.47307048865140205, "grad_norm": 0.0908857062458992, "learning_rate": 1.5847455505769858e-05, "loss": 8.9605, "step": 94730 }, { "epoch": 0.4731204274763415, "grad_norm": 0.08634936064481735, "learning_rate": 1.5845953590828305e-05, "loss": 8.9659, "step": 94740 }, { "epoch": 0.47317036630128095, "grad_norm": 0.09862346947193146, "learning_rate": 1.5844451675886755e-05, "loss": 8.9518, "step": 94750 }, { "epoch": 0.4732203051262204, "grad_norm": 0.09859306365251541, "learning_rate": 1.5842949760945205e-05, "loss": 8.9509, "step": 94760 }, { "epoch": 0.47327024395115985, "grad_norm": 0.09249970316886902, "learning_rate": 1.5841447846003656e-05, "loss": 8.9593, "step": 94770 }, { "epoch": 0.4733201827760993, "grad_norm": 0.09957744181156158, "learning_rate": 1.5839945931062106e-05, "loss": 8.9597, "step": 94780 }, { "epoch": 0.47337012160103875, "grad_norm": 0.0913504958152771, "learning_rate": 1.5838444016120556e-05, "loss": 8.968, "step": 94790 }, { "epoch": 0.4734200604259782, "grad_norm": 0.09559644013643265, "learning_rate": 1.5836942101179003e-05, "loss": 8.9594, "step": 94800 }, { "epoch": 0.47346999925091765, "grad_norm": 0.09303098171949387, "learning_rate": 1.5835440186237453e-05, "loss": 8.9535, "step": 94810 }, { "epoch": 0.47351993807585707, "grad_norm": 0.09096693992614746, "learning_rate": 1.5833938271295903e-05, "loss": 8.9503, "step": 94820 }, { "epoch": 0.47356987690079655, "grad_norm": 0.09463432431221008, "learning_rate": 1.5832436356354353e-05, "loss": 8.9561, "step": 94830 }, { "epoch": 0.47361981572573597, "grad_norm": 0.09306497126817703, "learning_rate": 1.5830934441412803e-05, "loss": 8.9495, "step": 94840 }, { "epoch": 0.47366975455067545, "grad_norm": 0.09330885857343674, "learning_rate": 1.582943252647125e-05, "loss": 8.9607, "step": 94850 }, { "epoch": 0.47371969337561487, "grad_norm": 0.0966872051358223, "learning_rate": 1.58279306115297e-05, "loss": 8.9557, "step": 94860 }, { "epoch": 0.47376963220055435, "grad_norm": 0.08871348202228546, "learning_rate": 1.582642869658815e-05, "loss": 8.9677, "step": 94870 }, { "epoch": 0.47381957102549377, "grad_norm": 0.090989850461483, "learning_rate": 1.58249267816466e-05, "loss": 8.9661, "step": 94880 }, { "epoch": 0.47386950985043325, "grad_norm": 0.0886576771736145, "learning_rate": 1.582342486670505e-05, "loss": 8.9559, "step": 94890 }, { "epoch": 0.47391944867537267, "grad_norm": 0.09537599235773087, "learning_rate": 1.5821922951763498e-05, "loss": 8.9496, "step": 94900 }, { "epoch": 0.47396938750031214, "grad_norm": 0.08645906299352646, "learning_rate": 1.5820421036821948e-05, "loss": 8.9599, "step": 94910 }, { "epoch": 0.47401932632525157, "grad_norm": 0.0953529104590416, "learning_rate": 1.5818919121880398e-05, "loss": 8.9579, "step": 94920 }, { "epoch": 0.47406926515019104, "grad_norm": 0.10060861706733704, "learning_rate": 1.5817417206938848e-05, "loss": 8.9599, "step": 94930 }, { "epoch": 0.47411920397513047, "grad_norm": 0.09143946319818497, "learning_rate": 1.58159152919973e-05, "loss": 8.9482, "step": 94940 }, { "epoch": 0.4741691428000699, "grad_norm": 0.09554131329059601, "learning_rate": 1.5814413377055745e-05, "loss": 8.9516, "step": 94950 }, { "epoch": 0.47421908162500936, "grad_norm": 0.09513738751411438, "learning_rate": 1.5812911462114195e-05, "loss": 8.9709, "step": 94960 }, { "epoch": 0.4742690204499488, "grad_norm": 0.09006723016500473, "learning_rate": 1.5811409547172646e-05, "loss": 8.9641, "step": 94970 }, { "epoch": 0.47431895927488826, "grad_norm": 0.08737257868051529, "learning_rate": 1.5809907632231096e-05, "loss": 8.9553, "step": 94980 }, { "epoch": 0.4743688980998277, "grad_norm": 0.09388820081949234, "learning_rate": 1.5808405717289546e-05, "loss": 8.9634, "step": 94990 }, { "epoch": 0.47441883692476716, "grad_norm": 0.0908634141087532, "learning_rate": 1.5806903802347993e-05, "loss": 8.954, "step": 95000 }, { "epoch": 0.4744687757497066, "grad_norm": 0.09147128462791443, "learning_rate": 1.5805401887406443e-05, "loss": 8.9601, "step": 95010 }, { "epoch": 0.47451871457464606, "grad_norm": 0.09336107224225998, "learning_rate": 1.5803899972464893e-05, "loss": 8.9496, "step": 95020 }, { "epoch": 0.4745686533995855, "grad_norm": 0.09616447985172272, "learning_rate": 1.5802398057523343e-05, "loss": 8.9433, "step": 95030 }, { "epoch": 0.47461859222452496, "grad_norm": 0.09238088130950928, "learning_rate": 1.5800896142581793e-05, "loss": 8.9607, "step": 95040 }, { "epoch": 0.4746685310494644, "grad_norm": 0.09115064889192581, "learning_rate": 1.579939422764024e-05, "loss": 8.9525, "step": 95050 }, { "epoch": 0.47471846987440386, "grad_norm": 0.09081438928842545, "learning_rate": 1.579789231269869e-05, "loss": 8.9477, "step": 95060 }, { "epoch": 0.4747684086993433, "grad_norm": 0.09441505372524261, "learning_rate": 1.579639039775714e-05, "loss": 8.9469, "step": 95070 }, { "epoch": 0.47481834752428276, "grad_norm": 0.0935746282339096, "learning_rate": 1.579488848281559e-05, "loss": 8.978, "step": 95080 }, { "epoch": 0.4748682863492222, "grad_norm": 0.09515486657619476, "learning_rate": 1.579338656787404e-05, "loss": 8.9698, "step": 95090 }, { "epoch": 0.47491822517416166, "grad_norm": 0.10014467686414719, "learning_rate": 1.5791884652932488e-05, "loss": 8.9462, "step": 95100 }, { "epoch": 0.4749681639991011, "grad_norm": 0.09181597828865051, "learning_rate": 1.579038273799094e-05, "loss": 8.9549, "step": 95110 }, { "epoch": 0.47501810282404056, "grad_norm": 0.09579970687627792, "learning_rate": 1.5788880823049388e-05, "loss": 8.9546, "step": 95120 }, { "epoch": 0.47506804164898, "grad_norm": 0.0962633490562439, "learning_rate": 1.5787378908107838e-05, "loss": 8.9442, "step": 95130 }, { "epoch": 0.47511798047391945, "grad_norm": 0.09618624299764633, "learning_rate": 1.578587699316629e-05, "loss": 8.9584, "step": 95140 }, { "epoch": 0.4751679192988589, "grad_norm": 0.08641847223043442, "learning_rate": 1.5784375078224735e-05, "loss": 8.9636, "step": 95150 }, { "epoch": 0.47521785812379835, "grad_norm": 0.0992530807852745, "learning_rate": 1.578287316328319e-05, "loss": 8.9572, "step": 95160 }, { "epoch": 0.4752677969487378, "grad_norm": 0.09517235308885574, "learning_rate": 1.5781371248341636e-05, "loss": 8.9476, "step": 95170 }, { "epoch": 0.47531773577367725, "grad_norm": 0.09363456070423126, "learning_rate": 1.5779869333400086e-05, "loss": 8.9558, "step": 95180 }, { "epoch": 0.4753676745986167, "grad_norm": 0.0897691547870636, "learning_rate": 1.5778367418458536e-05, "loss": 8.9506, "step": 95190 }, { "epoch": 0.47541761342355615, "grad_norm": 0.09347530454397202, "learning_rate": 1.5776865503516983e-05, "loss": 8.9519, "step": 95200 }, { "epoch": 0.4754675522484956, "grad_norm": 0.09927235543727875, "learning_rate": 1.5775363588575436e-05, "loss": 8.9348, "step": 95210 }, { "epoch": 0.47551749107343505, "grad_norm": 0.09206566959619522, "learning_rate": 1.5773861673633883e-05, "loss": 8.9514, "step": 95220 }, { "epoch": 0.47556742989837447, "grad_norm": 0.09086683392524719, "learning_rate": 1.5772359758692333e-05, "loss": 8.9393, "step": 95230 }, { "epoch": 0.47561736872331395, "grad_norm": 0.0883910059928894, "learning_rate": 1.5770857843750783e-05, "loss": 8.9475, "step": 95240 }, { "epoch": 0.47566730754825337, "grad_norm": 0.09318321943283081, "learning_rate": 1.576935592880923e-05, "loss": 8.9376, "step": 95250 }, { "epoch": 0.47571724637319285, "grad_norm": 0.09458793699741364, "learning_rate": 1.5767854013867684e-05, "loss": 8.9374, "step": 95260 }, { "epoch": 0.47576718519813227, "grad_norm": 0.09189481288194656, "learning_rate": 1.576635209892613e-05, "loss": 8.946, "step": 95270 }, { "epoch": 0.47581712402307175, "grad_norm": 0.09554219245910645, "learning_rate": 1.576485018398458e-05, "loss": 8.9526, "step": 95280 }, { "epoch": 0.47586706284801117, "grad_norm": 0.09450355917215347, "learning_rate": 1.576334826904303e-05, "loss": 8.957, "step": 95290 }, { "epoch": 0.47591700167295065, "grad_norm": 0.08791901171207428, "learning_rate": 1.5761846354101478e-05, "loss": 8.9516, "step": 95300 }, { "epoch": 0.47596694049789007, "grad_norm": 0.08911871910095215, "learning_rate": 1.576034443915993e-05, "loss": 8.9511, "step": 95310 }, { "epoch": 0.47601687932282954, "grad_norm": 0.09467475861310959, "learning_rate": 1.5758842524218378e-05, "loss": 8.9439, "step": 95320 }, { "epoch": 0.47606681814776897, "grad_norm": 0.09240403026342392, "learning_rate": 1.5757340609276828e-05, "loss": 8.9443, "step": 95330 }, { "epoch": 0.47611675697270844, "grad_norm": 0.09284543991088867, "learning_rate": 1.575583869433528e-05, "loss": 8.9489, "step": 95340 }, { "epoch": 0.47616669579764787, "grad_norm": 0.0898863673210144, "learning_rate": 1.5754336779393725e-05, "loss": 8.9601, "step": 95350 }, { "epoch": 0.47621663462258734, "grad_norm": 0.09170831739902496, "learning_rate": 1.575283486445218e-05, "loss": 8.9568, "step": 95360 }, { "epoch": 0.47626657344752676, "grad_norm": 0.0939890593290329, "learning_rate": 1.5751332949510626e-05, "loss": 8.9392, "step": 95370 }, { "epoch": 0.47631651227246624, "grad_norm": 0.08675019443035126, "learning_rate": 1.5749831034569076e-05, "loss": 8.9485, "step": 95380 }, { "epoch": 0.47636645109740566, "grad_norm": 0.08740539848804474, "learning_rate": 1.5748329119627526e-05, "loss": 8.9461, "step": 95390 }, { "epoch": 0.47641638992234514, "grad_norm": 0.08891706168651581, "learning_rate": 1.5746827204685973e-05, "loss": 8.9638, "step": 95400 }, { "epoch": 0.47646632874728456, "grad_norm": 0.09209989011287689, "learning_rate": 1.5745325289744426e-05, "loss": 8.9543, "step": 95410 }, { "epoch": 0.47651626757222404, "grad_norm": 0.09303548187017441, "learning_rate": 1.5743823374802873e-05, "loss": 8.9472, "step": 95420 }, { "epoch": 0.47656620639716346, "grad_norm": 0.09217501431703568, "learning_rate": 1.5742321459861327e-05, "loss": 8.9547, "step": 95430 }, { "epoch": 0.47661614522210294, "grad_norm": 0.08850811421871185, "learning_rate": 1.5740819544919773e-05, "loss": 8.9523, "step": 95440 }, { "epoch": 0.47666608404704236, "grad_norm": 0.09346584975719452, "learning_rate": 1.573931762997822e-05, "loss": 8.9549, "step": 95450 }, { "epoch": 0.47671602287198184, "grad_norm": 0.09290006756782532, "learning_rate": 1.5737815715036674e-05, "loss": 8.9536, "step": 95460 }, { "epoch": 0.47676596169692126, "grad_norm": 0.09277894347906113, "learning_rate": 1.573631380009512e-05, "loss": 8.9525, "step": 95470 }, { "epoch": 0.47681590052186074, "grad_norm": 0.09250402450561523, "learning_rate": 1.5734811885153574e-05, "loss": 8.9305, "step": 95480 }, { "epoch": 0.47686583934680016, "grad_norm": 0.08638934046030045, "learning_rate": 1.573330997021202e-05, "loss": 8.949, "step": 95490 }, { "epoch": 0.47691577817173963, "grad_norm": 0.10017383098602295, "learning_rate": 1.5731808055270468e-05, "loss": 8.9447, "step": 95500 }, { "epoch": 0.47696571699667906, "grad_norm": 0.09218341112136841, "learning_rate": 1.573030614032892e-05, "loss": 8.9477, "step": 95510 }, { "epoch": 0.47701565582161853, "grad_norm": 0.09426803141832352, "learning_rate": 1.5728804225387368e-05, "loss": 8.9439, "step": 95520 }, { "epoch": 0.47706559464655796, "grad_norm": 0.08649339526891708, "learning_rate": 1.572730231044582e-05, "loss": 8.9515, "step": 95530 }, { "epoch": 0.47711553347149743, "grad_norm": 0.09477028250694275, "learning_rate": 1.572580039550427e-05, "loss": 8.958, "step": 95540 }, { "epoch": 0.47716547229643685, "grad_norm": 0.09427261352539062, "learning_rate": 1.5724298480562715e-05, "loss": 8.9478, "step": 95550 }, { "epoch": 0.47721541112137633, "grad_norm": 0.09864936023950577, "learning_rate": 1.572279656562117e-05, "loss": 8.9301, "step": 95560 }, { "epoch": 0.47726534994631575, "grad_norm": 0.09512222558259964, "learning_rate": 1.5721294650679616e-05, "loss": 8.9521, "step": 95570 }, { "epoch": 0.47731528877125523, "grad_norm": 0.09052599221467972, "learning_rate": 1.571979273573807e-05, "loss": 8.949, "step": 95580 }, { "epoch": 0.47736522759619465, "grad_norm": 0.09813720732927322, "learning_rate": 1.5718290820796516e-05, "loss": 8.9293, "step": 95590 }, { "epoch": 0.47741516642113413, "grad_norm": 0.09309673309326172, "learning_rate": 1.5716788905854963e-05, "loss": 8.9349, "step": 95600 }, { "epoch": 0.47746510524607355, "grad_norm": 0.08683782070875168, "learning_rate": 1.5715286990913416e-05, "loss": 8.9493, "step": 95610 }, { "epoch": 0.47751504407101303, "grad_norm": 0.09030024707317352, "learning_rate": 1.5713785075971863e-05, "loss": 8.9459, "step": 95620 }, { "epoch": 0.47756498289595245, "grad_norm": 0.09140963852405548, "learning_rate": 1.5712283161030317e-05, "loss": 8.9456, "step": 95630 }, { "epoch": 0.4776149217208919, "grad_norm": 0.09844744205474854, "learning_rate": 1.5710781246088763e-05, "loss": 8.9562, "step": 95640 }, { "epoch": 0.47766486054583135, "grad_norm": 0.09246145933866501, "learning_rate": 1.570927933114721e-05, "loss": 8.9507, "step": 95650 }, { "epoch": 0.4777147993707708, "grad_norm": 0.09520356357097626, "learning_rate": 1.5707777416205664e-05, "loss": 8.9419, "step": 95660 }, { "epoch": 0.47776473819571025, "grad_norm": 0.09316331148147583, "learning_rate": 1.570627550126411e-05, "loss": 8.9404, "step": 95670 }, { "epoch": 0.4778146770206497, "grad_norm": 0.09663616865873337, "learning_rate": 1.5704773586322564e-05, "loss": 8.9278, "step": 95680 }, { "epoch": 0.47786461584558915, "grad_norm": 0.09337840229272842, "learning_rate": 1.570327167138101e-05, "loss": 8.9441, "step": 95690 }, { "epoch": 0.4779145546705286, "grad_norm": 0.0952988788485527, "learning_rate": 1.5701769756439458e-05, "loss": 8.9642, "step": 95700 }, { "epoch": 0.47796449349546805, "grad_norm": 0.09496844559907913, "learning_rate": 1.570026784149791e-05, "loss": 8.9474, "step": 95710 }, { "epoch": 0.4780144323204075, "grad_norm": 0.09619852900505066, "learning_rate": 1.5698765926556358e-05, "loss": 8.9422, "step": 95720 }, { "epoch": 0.47806437114534694, "grad_norm": 0.10100951045751572, "learning_rate": 1.5697264011614812e-05, "loss": 8.9483, "step": 95730 }, { "epoch": 0.4781143099702864, "grad_norm": 0.08543970435857773, "learning_rate": 1.569576209667326e-05, "loss": 8.9605, "step": 95740 }, { "epoch": 0.47816424879522584, "grad_norm": 0.09065885096788406, "learning_rate": 1.569426018173171e-05, "loss": 8.9482, "step": 95750 }, { "epoch": 0.4782141876201653, "grad_norm": 0.09514321386814117, "learning_rate": 1.569275826679016e-05, "loss": 8.9535, "step": 95760 }, { "epoch": 0.47826412644510474, "grad_norm": 0.08945092558860779, "learning_rate": 1.5691256351848606e-05, "loss": 8.9462, "step": 95770 }, { "epoch": 0.4783140652700442, "grad_norm": 0.09275572746992111, "learning_rate": 1.568975443690706e-05, "loss": 8.9564, "step": 95780 }, { "epoch": 0.47836400409498364, "grad_norm": 0.09393645077943802, "learning_rate": 1.5688252521965506e-05, "loss": 8.9396, "step": 95790 }, { "epoch": 0.4784139429199231, "grad_norm": 0.08892881125211716, "learning_rate": 1.5686750607023956e-05, "loss": 8.9488, "step": 95800 }, { "epoch": 0.47846388174486254, "grad_norm": 0.09092603623867035, "learning_rate": 1.5685248692082406e-05, "loss": 8.9466, "step": 95810 }, { "epoch": 0.478513820569802, "grad_norm": 0.09448976814746857, "learning_rate": 1.5683746777140853e-05, "loss": 8.9469, "step": 95820 }, { "epoch": 0.47856375939474144, "grad_norm": 0.0917850136756897, "learning_rate": 1.5682244862199307e-05, "loss": 8.9422, "step": 95830 }, { "epoch": 0.4786136982196809, "grad_norm": 0.09401079267263412, "learning_rate": 1.5680742947257754e-05, "loss": 8.9421, "step": 95840 }, { "epoch": 0.47866363704462034, "grad_norm": 0.09111663699150085, "learning_rate": 1.5679241032316204e-05, "loss": 8.9468, "step": 95850 }, { "epoch": 0.4787135758695598, "grad_norm": 0.09453728049993515, "learning_rate": 1.5677739117374654e-05, "loss": 8.962, "step": 95860 }, { "epoch": 0.47876351469449924, "grad_norm": 0.09403866529464722, "learning_rate": 1.56762372024331e-05, "loss": 8.9499, "step": 95870 }, { "epoch": 0.4788134535194387, "grad_norm": 0.09163626283407211, "learning_rate": 1.5674735287491554e-05, "loss": 8.9528, "step": 95880 }, { "epoch": 0.47886339234437814, "grad_norm": 0.09393519908189774, "learning_rate": 1.567323337255e-05, "loss": 8.9393, "step": 95890 }, { "epoch": 0.4789133311693176, "grad_norm": 0.09879069030284882, "learning_rate": 1.567173145760845e-05, "loss": 8.9523, "step": 95900 }, { "epoch": 0.47896326999425703, "grad_norm": 0.09257882833480835, "learning_rate": 1.56702295426669e-05, "loss": 8.9472, "step": 95910 }, { "epoch": 0.47901320881919646, "grad_norm": 0.09598603844642639, "learning_rate": 1.5668727627725348e-05, "loss": 8.9522, "step": 95920 }, { "epoch": 0.47906314764413593, "grad_norm": 0.09990739077329636, "learning_rate": 1.5667225712783802e-05, "loss": 8.9547, "step": 95930 }, { "epoch": 0.47911308646907536, "grad_norm": 0.09603408724069595, "learning_rate": 1.566572379784225e-05, "loss": 8.9491, "step": 95940 }, { "epoch": 0.47916302529401483, "grad_norm": 0.09766265004873276, "learning_rate": 1.56642218829007e-05, "loss": 8.9592, "step": 95950 }, { "epoch": 0.47921296411895425, "grad_norm": 0.08897819370031357, "learning_rate": 1.566271996795915e-05, "loss": 8.9378, "step": 95960 }, { "epoch": 0.47926290294389373, "grad_norm": 0.09036344289779663, "learning_rate": 1.5661218053017596e-05, "loss": 8.9585, "step": 95970 }, { "epoch": 0.47931284176883315, "grad_norm": 0.0929788202047348, "learning_rate": 1.565971613807605e-05, "loss": 8.941, "step": 95980 }, { "epoch": 0.47936278059377263, "grad_norm": 0.08704034984111786, "learning_rate": 1.5658214223134496e-05, "loss": 8.9409, "step": 95990 }, { "epoch": 0.47941271941871205, "grad_norm": 0.08739351481199265, "learning_rate": 1.5656712308192946e-05, "loss": 8.9567, "step": 96000 }, { "epoch": 0.47946265824365153, "grad_norm": 0.0888032615184784, "learning_rate": 1.5655210393251396e-05, "loss": 8.95, "step": 96010 }, { "epoch": 0.47951259706859095, "grad_norm": 0.09704812616109848, "learning_rate": 1.5653708478309843e-05, "loss": 8.9473, "step": 96020 }, { "epoch": 0.47956253589353043, "grad_norm": 0.0939079076051712, "learning_rate": 1.5652206563368297e-05, "loss": 8.939, "step": 96030 }, { "epoch": 0.47961247471846985, "grad_norm": 0.09814861416816711, "learning_rate": 1.5650704648426744e-05, "loss": 8.9571, "step": 96040 }, { "epoch": 0.4796624135434093, "grad_norm": 0.09225606173276901, "learning_rate": 1.5649202733485194e-05, "loss": 8.9364, "step": 96050 }, { "epoch": 0.47971235236834875, "grad_norm": 0.08847086131572723, "learning_rate": 1.5647700818543644e-05, "loss": 8.9359, "step": 96060 }, { "epoch": 0.4797622911932882, "grad_norm": 0.09193941950798035, "learning_rate": 1.5646198903602094e-05, "loss": 8.9464, "step": 96070 }, { "epoch": 0.47981223001822765, "grad_norm": 0.09449310600757599, "learning_rate": 1.5644696988660544e-05, "loss": 8.9496, "step": 96080 }, { "epoch": 0.4798621688431671, "grad_norm": 0.08825480937957764, "learning_rate": 1.564319507371899e-05, "loss": 8.9294, "step": 96090 }, { "epoch": 0.47991210766810655, "grad_norm": 0.10034892708063126, "learning_rate": 1.564169315877744e-05, "loss": 8.9352, "step": 96100 }, { "epoch": 0.479962046493046, "grad_norm": 0.08834687620401382, "learning_rate": 1.564019124383589e-05, "loss": 8.9406, "step": 96110 }, { "epoch": 0.48001198531798545, "grad_norm": 0.09365418553352356, "learning_rate": 1.563868932889434e-05, "loss": 8.942, "step": 96120 }, { "epoch": 0.4800619241429249, "grad_norm": 0.09072919934988022, "learning_rate": 1.5637187413952792e-05, "loss": 8.9519, "step": 96130 }, { "epoch": 0.48011186296786434, "grad_norm": 0.09022905677556992, "learning_rate": 1.563568549901124e-05, "loss": 8.9463, "step": 96140 }, { "epoch": 0.4801618017928038, "grad_norm": 0.09697787463665009, "learning_rate": 1.563418358406969e-05, "loss": 8.9448, "step": 96150 }, { "epoch": 0.48021174061774324, "grad_norm": 0.09243085980415344, "learning_rate": 1.563268166912814e-05, "loss": 8.9423, "step": 96160 }, { "epoch": 0.4802616794426827, "grad_norm": 0.08860752731561661, "learning_rate": 1.563117975418659e-05, "loss": 8.9517, "step": 96170 }, { "epoch": 0.48031161826762214, "grad_norm": 0.09040497988462448, "learning_rate": 1.562967783924504e-05, "loss": 8.9399, "step": 96180 }, { "epoch": 0.4803615570925616, "grad_norm": 0.09311524033546448, "learning_rate": 1.5628175924303486e-05, "loss": 8.929, "step": 96190 }, { "epoch": 0.48041149591750104, "grad_norm": 0.08853588253259659, "learning_rate": 1.5626674009361936e-05, "loss": 8.9327, "step": 96200 }, { "epoch": 0.4804614347424405, "grad_norm": 0.09366882592439651, "learning_rate": 1.5625172094420386e-05, "loss": 8.9437, "step": 96210 }, { "epoch": 0.48051137356737994, "grad_norm": 0.09654311090707779, "learning_rate": 1.5623670179478837e-05, "loss": 8.9322, "step": 96220 }, { "epoch": 0.4805613123923194, "grad_norm": 0.09593677520751953, "learning_rate": 1.5622168264537287e-05, "loss": 8.9382, "step": 96230 }, { "epoch": 0.48061125121725884, "grad_norm": 0.09233121573925018, "learning_rate": 1.5620666349595734e-05, "loss": 8.9288, "step": 96240 }, { "epoch": 0.4806611900421983, "grad_norm": 0.0946589857339859, "learning_rate": 1.5619164434654184e-05, "loss": 8.9519, "step": 96250 }, { "epoch": 0.48071112886713774, "grad_norm": 0.09263359010219574, "learning_rate": 1.5617662519712634e-05, "loss": 8.9297, "step": 96260 }, { "epoch": 0.4807610676920772, "grad_norm": 0.09187857806682587, "learning_rate": 1.5616160604771084e-05, "loss": 8.9482, "step": 96270 }, { "epoch": 0.48081100651701664, "grad_norm": 0.09805098176002502, "learning_rate": 1.5614658689829534e-05, "loss": 8.9409, "step": 96280 }, { "epoch": 0.4808609453419561, "grad_norm": 0.09072684496641159, "learning_rate": 1.561315677488798e-05, "loss": 8.9378, "step": 96290 }, { "epoch": 0.48091088416689554, "grad_norm": 0.0932784229516983, "learning_rate": 1.561165485994643e-05, "loss": 8.9335, "step": 96300 }, { "epoch": 0.480960822991835, "grad_norm": 0.09350146353244781, "learning_rate": 1.561015294500488e-05, "loss": 8.9404, "step": 96310 }, { "epoch": 0.48101076181677443, "grad_norm": 0.09393427520990372, "learning_rate": 1.560865103006333e-05, "loss": 8.937, "step": 96320 }, { "epoch": 0.4810607006417139, "grad_norm": 0.09077678620815277, "learning_rate": 1.5607149115121782e-05, "loss": 8.9297, "step": 96330 }, { "epoch": 0.48111063946665333, "grad_norm": 0.08859416097402573, "learning_rate": 1.560564720018023e-05, "loss": 8.9511, "step": 96340 }, { "epoch": 0.4811605782915928, "grad_norm": 0.09834136813879013, "learning_rate": 1.560414528523868e-05, "loss": 8.9462, "step": 96350 }, { "epoch": 0.48121051711653223, "grad_norm": 0.09708964824676514, "learning_rate": 1.560264337029713e-05, "loss": 8.9358, "step": 96360 }, { "epoch": 0.4812604559414717, "grad_norm": 0.09646966308355331, "learning_rate": 1.560114145535558e-05, "loss": 8.9394, "step": 96370 }, { "epoch": 0.48131039476641113, "grad_norm": 0.09074628353118896, "learning_rate": 1.559963954041403e-05, "loss": 8.9441, "step": 96380 }, { "epoch": 0.4813603335913506, "grad_norm": 0.09401838481426239, "learning_rate": 1.559813762547248e-05, "loss": 8.9365, "step": 96390 }, { "epoch": 0.48141027241629003, "grad_norm": 0.09368253499269485, "learning_rate": 1.5596635710530926e-05, "loss": 8.9477, "step": 96400 }, { "epoch": 0.4814602112412295, "grad_norm": 0.09199754893779755, "learning_rate": 1.5595133795589376e-05, "loss": 8.9274, "step": 96410 }, { "epoch": 0.48151015006616893, "grad_norm": 0.09040980041027069, "learning_rate": 1.5593631880647827e-05, "loss": 8.9531, "step": 96420 }, { "epoch": 0.4815600888911084, "grad_norm": 0.09219523519277573, "learning_rate": 1.5592129965706277e-05, "loss": 8.9384, "step": 96430 }, { "epoch": 0.48161002771604783, "grad_norm": 0.09335026144981384, "learning_rate": 1.5590628050764727e-05, "loss": 8.9469, "step": 96440 }, { "epoch": 0.4816599665409873, "grad_norm": 0.09123118221759796, "learning_rate": 1.5589126135823174e-05, "loss": 8.9421, "step": 96450 }, { "epoch": 0.4817099053659267, "grad_norm": 0.09461282938718796, "learning_rate": 1.5587624220881624e-05, "loss": 8.9259, "step": 96460 }, { "epoch": 0.4817598441908662, "grad_norm": 0.09215594083070755, "learning_rate": 1.5586122305940074e-05, "loss": 8.9334, "step": 96470 }, { "epoch": 0.4818097830158056, "grad_norm": 0.09032759815454483, "learning_rate": 1.5584620390998524e-05, "loss": 8.946, "step": 96480 }, { "epoch": 0.4818597218407451, "grad_norm": 0.09267091751098633, "learning_rate": 1.5583118476056974e-05, "loss": 8.9375, "step": 96490 }, { "epoch": 0.4819096606656845, "grad_norm": 0.09548508375883102, "learning_rate": 1.558161656111542e-05, "loss": 8.9397, "step": 96500 }, { "epoch": 0.481959599490624, "grad_norm": 0.09211142361164093, "learning_rate": 1.558011464617387e-05, "loss": 8.943, "step": 96510 }, { "epoch": 0.4820095383155634, "grad_norm": 0.09218604117631912, "learning_rate": 1.557861273123232e-05, "loss": 8.9385, "step": 96520 }, { "epoch": 0.4820594771405029, "grad_norm": 0.09151312708854675, "learning_rate": 1.5577110816290772e-05, "loss": 8.9425, "step": 96530 }, { "epoch": 0.4821094159654423, "grad_norm": 0.09401395171880722, "learning_rate": 1.5575608901349222e-05, "loss": 8.9411, "step": 96540 }, { "epoch": 0.4821593547903818, "grad_norm": 0.09752167761325836, "learning_rate": 1.557410698640767e-05, "loss": 8.954, "step": 96550 }, { "epoch": 0.4822092936153212, "grad_norm": 0.0942562073469162, "learning_rate": 1.557260507146612e-05, "loss": 8.9508, "step": 96560 }, { "epoch": 0.4822592324402607, "grad_norm": 0.09495299309492111, "learning_rate": 1.557110315652457e-05, "loss": 8.9436, "step": 96570 }, { "epoch": 0.4823091712652001, "grad_norm": 0.08862724155187607, "learning_rate": 1.556960124158302e-05, "loss": 8.9357, "step": 96580 }, { "epoch": 0.4823591100901396, "grad_norm": 0.09534230828285217, "learning_rate": 1.556809932664147e-05, "loss": 8.938, "step": 96590 }, { "epoch": 0.482409048915079, "grad_norm": 0.08949341624975204, "learning_rate": 1.5566597411699916e-05, "loss": 8.9301, "step": 96600 }, { "epoch": 0.4824589877400185, "grad_norm": 0.08901667594909668, "learning_rate": 1.5565095496758366e-05, "loss": 8.9381, "step": 96610 }, { "epoch": 0.4825089265649579, "grad_norm": 0.09051734954118729, "learning_rate": 1.5563593581816817e-05, "loss": 8.9413, "step": 96620 }, { "epoch": 0.4825588653898974, "grad_norm": 0.09591720253229141, "learning_rate": 1.5562091666875267e-05, "loss": 8.9389, "step": 96630 }, { "epoch": 0.4826088042148368, "grad_norm": 0.09589020907878876, "learning_rate": 1.5560589751933717e-05, "loss": 8.9373, "step": 96640 }, { "epoch": 0.4826587430397763, "grad_norm": 0.0880039632320404, "learning_rate": 1.5559087836992164e-05, "loss": 8.9322, "step": 96650 }, { "epoch": 0.4827086818647157, "grad_norm": 0.09445958584547043, "learning_rate": 1.5557585922050614e-05, "loss": 8.9557, "step": 96660 }, { "epoch": 0.4827586206896552, "grad_norm": 0.09995859116315842, "learning_rate": 1.5556084007109064e-05, "loss": 8.9236, "step": 96670 }, { "epoch": 0.4828085595145946, "grad_norm": 0.09632241725921631, "learning_rate": 1.5554582092167514e-05, "loss": 8.9337, "step": 96680 }, { "epoch": 0.4828584983395341, "grad_norm": 0.09016299247741699, "learning_rate": 1.5553080177225964e-05, "loss": 8.9381, "step": 96690 }, { "epoch": 0.4829084371644735, "grad_norm": 0.08898003399372101, "learning_rate": 1.555157826228441e-05, "loss": 8.9435, "step": 96700 }, { "epoch": 0.482958375989413, "grad_norm": 0.09666922688484192, "learning_rate": 1.5550076347342865e-05, "loss": 8.9445, "step": 96710 }, { "epoch": 0.4830083148143524, "grad_norm": 0.09168431162834167, "learning_rate": 1.554857443240131e-05, "loss": 8.9312, "step": 96720 }, { "epoch": 0.4830582536392919, "grad_norm": 0.09085845202207565, "learning_rate": 1.5547072517459762e-05, "loss": 8.9415, "step": 96730 }, { "epoch": 0.4831081924642313, "grad_norm": 0.09779160469770432, "learning_rate": 1.5545570602518212e-05, "loss": 8.9521, "step": 96740 }, { "epoch": 0.4831581312891708, "grad_norm": 0.09065205603837967, "learning_rate": 1.554406868757666e-05, "loss": 8.9444, "step": 96750 }, { "epoch": 0.4832080701141102, "grad_norm": 0.0941033810377121, "learning_rate": 1.5542566772635112e-05, "loss": 8.9362, "step": 96760 }, { "epoch": 0.4832580089390497, "grad_norm": 0.09128523617982864, "learning_rate": 1.554106485769356e-05, "loss": 8.9498, "step": 96770 }, { "epoch": 0.4833079477639891, "grad_norm": 0.09456328302621841, "learning_rate": 1.553956294275201e-05, "loss": 8.9385, "step": 96780 }, { "epoch": 0.4833578865889286, "grad_norm": 0.090219646692276, "learning_rate": 1.553806102781046e-05, "loss": 8.9453, "step": 96790 }, { "epoch": 0.483407825413868, "grad_norm": 0.09211535006761551, "learning_rate": 1.5536559112868906e-05, "loss": 8.957, "step": 96800 }, { "epoch": 0.4834577642388075, "grad_norm": 0.09447281807661057, "learning_rate": 1.553505719792736e-05, "loss": 8.9422, "step": 96810 }, { "epoch": 0.4835077030637469, "grad_norm": 0.09818176925182343, "learning_rate": 1.5533555282985807e-05, "loss": 8.9414, "step": 96820 }, { "epoch": 0.4835576418886864, "grad_norm": 0.0923561379313469, "learning_rate": 1.5532053368044257e-05, "loss": 8.9497, "step": 96830 }, { "epoch": 0.4836075807136258, "grad_norm": 0.09403201192617416, "learning_rate": 1.5530551453102707e-05, "loss": 8.9408, "step": 96840 }, { "epoch": 0.4836575195385653, "grad_norm": 0.08915037661790848, "learning_rate": 1.5529049538161154e-05, "loss": 8.9423, "step": 96850 }, { "epoch": 0.4837074583635047, "grad_norm": 0.09625165164470673, "learning_rate": 1.5527547623219607e-05, "loss": 8.948, "step": 96860 }, { "epoch": 0.4837573971884442, "grad_norm": 0.0959184393286705, "learning_rate": 1.5526045708278054e-05, "loss": 8.9383, "step": 96870 }, { "epoch": 0.4838073360133836, "grad_norm": 0.09560030698776245, "learning_rate": 1.5524543793336504e-05, "loss": 8.9419, "step": 96880 }, { "epoch": 0.4838572748383231, "grad_norm": 0.09560041129589081, "learning_rate": 1.5523041878394954e-05, "loss": 8.9391, "step": 96890 }, { "epoch": 0.4839072136632625, "grad_norm": 0.09270980209112167, "learning_rate": 1.55215399634534e-05, "loss": 8.9466, "step": 96900 }, { "epoch": 0.4839571524882019, "grad_norm": 0.08851654082536697, "learning_rate": 1.5520038048511855e-05, "loss": 8.9342, "step": 96910 }, { "epoch": 0.4840070913131414, "grad_norm": 0.08883985877037048, "learning_rate": 1.55185361335703e-05, "loss": 8.9362, "step": 96920 }, { "epoch": 0.4840570301380808, "grad_norm": 0.09435227513313293, "learning_rate": 1.5517034218628752e-05, "loss": 8.9312, "step": 96930 }, { "epoch": 0.4841069689630203, "grad_norm": 0.09650822728872299, "learning_rate": 1.5515532303687202e-05, "loss": 8.9235, "step": 96940 }, { "epoch": 0.4841569077879597, "grad_norm": 0.09342892467975616, "learning_rate": 1.551403038874565e-05, "loss": 8.9339, "step": 96950 }, { "epoch": 0.4842068466128992, "grad_norm": 0.0937606543302536, "learning_rate": 1.5512528473804102e-05, "loss": 8.9385, "step": 96960 }, { "epoch": 0.4842567854378386, "grad_norm": 0.09186763316392899, "learning_rate": 1.551102655886255e-05, "loss": 8.9308, "step": 96970 }, { "epoch": 0.4843067242627781, "grad_norm": 0.09301009774208069, "learning_rate": 1.5509524643921e-05, "loss": 8.9326, "step": 96980 }, { "epoch": 0.4843566630877175, "grad_norm": 0.09419457614421844, "learning_rate": 1.550802272897945e-05, "loss": 8.9292, "step": 96990 }, { "epoch": 0.484406601912657, "grad_norm": 0.09167234599590302, "learning_rate": 1.5506520814037896e-05, "loss": 8.9431, "step": 97000 }, { "epoch": 0.4844565407375964, "grad_norm": 0.0926911011338234, "learning_rate": 1.550501889909635e-05, "loss": 8.9269, "step": 97010 }, { "epoch": 0.4845064795625359, "grad_norm": 0.09434627741575241, "learning_rate": 1.5503516984154797e-05, "loss": 8.9336, "step": 97020 }, { "epoch": 0.4845564183874753, "grad_norm": 0.09322822839021683, "learning_rate": 1.5502015069213247e-05, "loss": 8.9313, "step": 97030 }, { "epoch": 0.4846063572124148, "grad_norm": 0.09197692573070526, "learning_rate": 1.5500513154271697e-05, "loss": 8.9304, "step": 97040 }, { "epoch": 0.4846562960373542, "grad_norm": 0.09229908883571625, "learning_rate": 1.5499011239330144e-05, "loss": 8.9265, "step": 97050 }, { "epoch": 0.4847062348622937, "grad_norm": 0.09997537732124329, "learning_rate": 1.5497509324388597e-05, "loss": 8.9292, "step": 97060 }, { "epoch": 0.4847561736872331, "grad_norm": 0.09335023909807205, "learning_rate": 1.5496007409447044e-05, "loss": 8.9234, "step": 97070 }, { "epoch": 0.4848061125121726, "grad_norm": 0.09383931756019592, "learning_rate": 1.5494505494505498e-05, "loss": 8.9355, "step": 97080 }, { "epoch": 0.484856051337112, "grad_norm": 0.09556584805250168, "learning_rate": 1.5493003579563944e-05, "loss": 8.9404, "step": 97090 }, { "epoch": 0.4849059901620515, "grad_norm": 0.09331072121858597, "learning_rate": 1.549150166462239e-05, "loss": 8.935, "step": 97100 }, { "epoch": 0.4849559289869909, "grad_norm": 0.09537593275308609, "learning_rate": 1.5489999749680845e-05, "loss": 8.9442, "step": 97110 }, { "epoch": 0.4850058678119304, "grad_norm": 0.0932040587067604, "learning_rate": 1.548849783473929e-05, "loss": 8.9356, "step": 97120 }, { "epoch": 0.4850558066368698, "grad_norm": 0.09548848122358322, "learning_rate": 1.5486995919797745e-05, "loss": 8.9351, "step": 97130 }, { "epoch": 0.4851057454618093, "grad_norm": 0.088288314640522, "learning_rate": 1.5485494004856192e-05, "loss": 8.9277, "step": 97140 }, { "epoch": 0.4851556842867487, "grad_norm": 0.0967058390378952, "learning_rate": 1.548399208991464e-05, "loss": 8.9245, "step": 97150 }, { "epoch": 0.4852056231116882, "grad_norm": 0.0918177142739296, "learning_rate": 1.5482490174973092e-05, "loss": 8.9246, "step": 97160 }, { "epoch": 0.4852555619366276, "grad_norm": 0.09275839477777481, "learning_rate": 1.548098826003154e-05, "loss": 8.9332, "step": 97170 }, { "epoch": 0.4853055007615671, "grad_norm": 0.09571007639169693, "learning_rate": 1.5479486345089993e-05, "loss": 8.9214, "step": 97180 }, { "epoch": 0.4853554395865065, "grad_norm": 0.09551675617694855, "learning_rate": 1.547798443014844e-05, "loss": 8.9341, "step": 97190 }, { "epoch": 0.485405378411446, "grad_norm": 0.09668447822332382, "learning_rate": 1.5476482515206886e-05, "loss": 8.9389, "step": 97200 }, { "epoch": 0.4854553172363854, "grad_norm": 0.09511365741491318, "learning_rate": 1.547498060026534e-05, "loss": 8.9348, "step": 97210 }, { "epoch": 0.4855052560613249, "grad_norm": 0.09372630715370178, "learning_rate": 1.5473478685323787e-05, "loss": 8.9325, "step": 97220 }, { "epoch": 0.4855551948862643, "grad_norm": 0.09546605497598648, "learning_rate": 1.547197677038224e-05, "loss": 8.9311, "step": 97230 }, { "epoch": 0.4856051337112038, "grad_norm": 0.09527111798524857, "learning_rate": 1.5470474855440687e-05, "loss": 8.929, "step": 97240 }, { "epoch": 0.4856550725361432, "grad_norm": 0.09080222994089127, "learning_rate": 1.5468972940499134e-05, "loss": 8.931, "step": 97250 }, { "epoch": 0.4857050113610827, "grad_norm": 0.09486258774995804, "learning_rate": 1.5467471025557587e-05, "loss": 8.9347, "step": 97260 }, { "epoch": 0.4857549501860221, "grad_norm": 0.09705396741628647, "learning_rate": 1.5465969110616034e-05, "loss": 8.9462, "step": 97270 }, { "epoch": 0.4858048890109616, "grad_norm": 0.09238271415233612, "learning_rate": 1.5464467195674488e-05, "loss": 8.922, "step": 97280 }, { "epoch": 0.485854827835901, "grad_norm": 0.09559265524148941, "learning_rate": 1.5462965280732934e-05, "loss": 8.9331, "step": 97290 }, { "epoch": 0.4859047666608405, "grad_norm": 0.09076651930809021, "learning_rate": 1.546146336579138e-05, "loss": 8.9372, "step": 97300 }, { "epoch": 0.4859547054857799, "grad_norm": 0.09629295766353607, "learning_rate": 1.5459961450849835e-05, "loss": 8.9217, "step": 97310 }, { "epoch": 0.4860046443107194, "grad_norm": 0.09511082619428635, "learning_rate": 1.545845953590828e-05, "loss": 8.9201, "step": 97320 }, { "epoch": 0.4860545831356588, "grad_norm": 0.09342075139284134, "learning_rate": 1.5456957620966735e-05, "loss": 8.9417, "step": 97330 }, { "epoch": 0.4861045219605983, "grad_norm": 0.09244676679372787, "learning_rate": 1.5455455706025182e-05, "loss": 8.9413, "step": 97340 }, { "epoch": 0.4861544607855377, "grad_norm": 0.08962726593017578, "learning_rate": 1.545395379108363e-05, "loss": 8.9414, "step": 97350 }, { "epoch": 0.4862043996104772, "grad_norm": 0.09315306693315506, "learning_rate": 1.5452451876142082e-05, "loss": 8.9322, "step": 97360 }, { "epoch": 0.4862543384354166, "grad_norm": 0.0958314910531044, "learning_rate": 1.545094996120053e-05, "loss": 8.93, "step": 97370 }, { "epoch": 0.4863042772603561, "grad_norm": 0.09707090258598328, "learning_rate": 1.5449448046258983e-05, "loss": 8.9318, "step": 97380 }, { "epoch": 0.4863542160852955, "grad_norm": 0.09278113394975662, "learning_rate": 1.544794613131743e-05, "loss": 8.9208, "step": 97390 }, { "epoch": 0.486404154910235, "grad_norm": 0.09409945458173752, "learning_rate": 1.544644421637588e-05, "loss": 8.9421, "step": 97400 }, { "epoch": 0.4864540937351744, "grad_norm": 0.08893690258264542, "learning_rate": 1.544494230143433e-05, "loss": 8.9299, "step": 97410 }, { "epoch": 0.4865040325601139, "grad_norm": 0.09356432408094406, "learning_rate": 1.5443440386492777e-05, "loss": 8.9403, "step": 97420 }, { "epoch": 0.4865539713850533, "grad_norm": 0.09327803552150726, "learning_rate": 1.544193847155123e-05, "loss": 8.9288, "step": 97430 }, { "epoch": 0.4866039102099928, "grad_norm": 0.0947476327419281, "learning_rate": 1.5440436556609677e-05, "loss": 8.9193, "step": 97440 }, { "epoch": 0.4866538490349322, "grad_norm": 0.09564851969480515, "learning_rate": 1.5438934641668127e-05, "loss": 8.9157, "step": 97450 }, { "epoch": 0.4867037878598717, "grad_norm": 0.09227867424488068, "learning_rate": 1.5437432726726577e-05, "loss": 8.9285, "step": 97460 }, { "epoch": 0.4867537266848111, "grad_norm": 0.09659763425588608, "learning_rate": 1.5435930811785024e-05, "loss": 8.9351, "step": 97470 }, { "epoch": 0.48680366550975057, "grad_norm": 0.09299914538860321, "learning_rate": 1.5434428896843478e-05, "loss": 8.9386, "step": 97480 }, { "epoch": 0.48685360433469, "grad_norm": 0.09589099138975143, "learning_rate": 1.5432926981901925e-05, "loss": 8.9294, "step": 97490 }, { "epoch": 0.48690354315962947, "grad_norm": 0.08684620261192322, "learning_rate": 1.5431425066960375e-05, "loss": 8.9267, "step": 97500 }, { "epoch": 0.4869534819845689, "grad_norm": 0.09718476235866547, "learning_rate": 1.5429923152018825e-05, "loss": 8.9199, "step": 97510 }, { "epoch": 0.48700342080950837, "grad_norm": 0.09026718139648438, "learning_rate": 1.542842123707727e-05, "loss": 8.9492, "step": 97520 }, { "epoch": 0.4870533596344478, "grad_norm": 0.09231576323509216, "learning_rate": 1.5426919322135725e-05, "loss": 8.931, "step": 97530 }, { "epoch": 0.48710329845938727, "grad_norm": 0.09282558411359787, "learning_rate": 1.5425417407194172e-05, "loss": 8.9311, "step": 97540 }, { "epoch": 0.4871532372843267, "grad_norm": 0.10046909749507904, "learning_rate": 1.5423915492252622e-05, "loss": 8.9389, "step": 97550 }, { "epoch": 0.48720317610926617, "grad_norm": 0.09517937153577805, "learning_rate": 1.5422413577311072e-05, "loss": 8.9321, "step": 97560 }, { "epoch": 0.4872531149342056, "grad_norm": 0.09505096822977066, "learning_rate": 1.542091166236952e-05, "loss": 8.9198, "step": 97570 }, { "epoch": 0.48730305375914507, "grad_norm": 0.08916744589805603, "learning_rate": 1.5419409747427973e-05, "loss": 8.9174, "step": 97580 }, { "epoch": 0.4873529925840845, "grad_norm": 0.08957254886627197, "learning_rate": 1.541790783248642e-05, "loss": 8.9441, "step": 97590 }, { "epoch": 0.48740293140902397, "grad_norm": 0.08980575203895569, "learning_rate": 1.541640591754487e-05, "loss": 8.9318, "step": 97600 }, { "epoch": 0.4874528702339634, "grad_norm": 0.09375443309545517, "learning_rate": 1.541490400260332e-05, "loss": 8.9393, "step": 97610 }, { "epoch": 0.48750280905890286, "grad_norm": 0.08852649480104446, "learning_rate": 1.5413402087661767e-05, "loss": 8.9289, "step": 97620 }, { "epoch": 0.4875527478838423, "grad_norm": 0.09620748460292816, "learning_rate": 1.541190017272022e-05, "loss": 8.9414, "step": 97630 }, { "epoch": 0.48760268670878176, "grad_norm": 0.09633340686559677, "learning_rate": 1.5410398257778667e-05, "loss": 8.9263, "step": 97640 }, { "epoch": 0.4876526255337212, "grad_norm": 0.0938490703701973, "learning_rate": 1.5408896342837117e-05, "loss": 8.932, "step": 97650 }, { "epoch": 0.48770256435866066, "grad_norm": 0.08980219811201096, "learning_rate": 1.5407394427895567e-05, "loss": 8.913, "step": 97660 }, { "epoch": 0.4877525031836001, "grad_norm": 0.09491949528455734, "learning_rate": 1.5405892512954014e-05, "loss": 8.9317, "step": 97670 }, { "epoch": 0.48780244200853956, "grad_norm": 0.0898803249001503, "learning_rate": 1.5404390598012468e-05, "loss": 8.938, "step": 97680 }, { "epoch": 0.487852380833479, "grad_norm": 0.09298001229763031, "learning_rate": 1.5402888683070915e-05, "loss": 8.9168, "step": 97690 }, { "epoch": 0.48790231965841846, "grad_norm": 0.09501669555902481, "learning_rate": 1.5401386768129365e-05, "loss": 8.9396, "step": 97700 }, { "epoch": 0.4879522584833579, "grad_norm": 0.09508873522281647, "learning_rate": 1.5399884853187815e-05, "loss": 8.9306, "step": 97710 }, { "epoch": 0.48800219730829736, "grad_norm": 0.0931331217288971, "learning_rate": 1.5398382938246265e-05, "loss": 8.9261, "step": 97720 }, { "epoch": 0.4880521361332368, "grad_norm": 0.0985851138830185, "learning_rate": 1.5396881023304715e-05, "loss": 8.9298, "step": 97730 }, { "epoch": 0.48810207495817626, "grad_norm": 0.08908767253160477, "learning_rate": 1.5395379108363162e-05, "loss": 8.9204, "step": 97740 }, { "epoch": 0.4881520137831157, "grad_norm": 0.09742926806211472, "learning_rate": 1.5393877193421612e-05, "loss": 8.9244, "step": 97750 }, { "epoch": 0.48820195260805516, "grad_norm": 0.09041773527860641, "learning_rate": 1.5392375278480062e-05, "loss": 8.9151, "step": 97760 }, { "epoch": 0.4882518914329946, "grad_norm": 0.094319187104702, "learning_rate": 1.5390873363538513e-05, "loss": 8.9254, "step": 97770 }, { "epoch": 0.48830183025793406, "grad_norm": 0.09709497541189194, "learning_rate": 1.5389371448596963e-05, "loss": 8.9349, "step": 97780 }, { "epoch": 0.4883517690828735, "grad_norm": 0.09084603935480118, "learning_rate": 1.538786953365541e-05, "loss": 8.9242, "step": 97790 }, { "epoch": 0.48840170790781295, "grad_norm": 0.09305722266435623, "learning_rate": 1.538636761871386e-05, "loss": 8.9348, "step": 97800 }, { "epoch": 0.4884516467327524, "grad_norm": 0.09015587717294693, "learning_rate": 1.538486570377231e-05, "loss": 8.9306, "step": 97810 }, { "epoch": 0.48850158555769185, "grad_norm": 0.0915970653295517, "learning_rate": 1.538336378883076e-05, "loss": 8.9168, "step": 97820 }, { "epoch": 0.4885515243826313, "grad_norm": 0.0917646586894989, "learning_rate": 1.538186187388921e-05, "loss": 8.9245, "step": 97830 }, { "epoch": 0.48860146320757075, "grad_norm": 0.09395934641361237, "learning_rate": 1.5380359958947657e-05, "loss": 8.9253, "step": 97840 }, { "epoch": 0.4886514020325102, "grad_norm": 0.09345404803752899, "learning_rate": 1.5378858044006107e-05, "loss": 8.9186, "step": 97850 }, { "epoch": 0.48870134085744965, "grad_norm": 0.0977400466799736, "learning_rate": 1.5377356129064557e-05, "loss": 8.9381, "step": 97860 }, { "epoch": 0.4887512796823891, "grad_norm": 0.09431370347738266, "learning_rate": 1.5375854214123008e-05, "loss": 8.925, "step": 97870 }, { "epoch": 0.48880121850732855, "grad_norm": 0.09120336174964905, "learning_rate": 1.5374352299181458e-05, "loss": 8.9313, "step": 97880 }, { "epoch": 0.48885115733226797, "grad_norm": 0.09345964342355728, "learning_rate": 1.5372850384239905e-05, "loss": 8.9307, "step": 97890 }, { "epoch": 0.4889010961572074, "grad_norm": 0.09451073408126831, "learning_rate": 1.5371348469298355e-05, "loss": 8.9228, "step": 97900 }, { "epoch": 0.48895103498214687, "grad_norm": 0.08943965286016464, "learning_rate": 1.5369846554356805e-05, "loss": 8.9363, "step": 97910 }, { "epoch": 0.4890009738070863, "grad_norm": 0.09674328565597534, "learning_rate": 1.5368344639415255e-05, "loss": 8.9236, "step": 97920 }, { "epoch": 0.48905091263202577, "grad_norm": 0.09601275622844696, "learning_rate": 1.5366842724473705e-05, "loss": 8.9287, "step": 97930 }, { "epoch": 0.4891008514569652, "grad_norm": 0.09064171463251114, "learning_rate": 1.5365340809532152e-05, "loss": 8.9479, "step": 97940 }, { "epoch": 0.48915079028190467, "grad_norm": 0.09523607045412064, "learning_rate": 1.5363838894590602e-05, "loss": 8.9308, "step": 97950 }, { "epoch": 0.4892007291068441, "grad_norm": 0.09979385882616043, "learning_rate": 1.5362336979649052e-05, "loss": 8.9273, "step": 97960 }, { "epoch": 0.48925066793178357, "grad_norm": 0.09213593602180481, "learning_rate": 1.5360835064707503e-05, "loss": 8.9341, "step": 97970 }, { "epoch": 0.489300606756723, "grad_norm": 0.09432204067707062, "learning_rate": 1.5359333149765953e-05, "loss": 8.9229, "step": 97980 }, { "epoch": 0.48935054558166247, "grad_norm": 0.09289470314979553, "learning_rate": 1.53578312348244e-05, "loss": 8.9194, "step": 97990 }, { "epoch": 0.4894004844066019, "grad_norm": 0.09178686141967773, "learning_rate": 1.535632931988285e-05, "loss": 8.9441, "step": 98000 }, { "epoch": 0.48945042323154136, "grad_norm": 0.08877766132354736, "learning_rate": 1.53548274049413e-05, "loss": 8.9273, "step": 98010 }, { "epoch": 0.4895003620564808, "grad_norm": 0.09612318128347397, "learning_rate": 1.535332548999975e-05, "loss": 8.9139, "step": 98020 }, { "epoch": 0.48955030088142026, "grad_norm": 0.09008832275867462, "learning_rate": 1.53518235750582e-05, "loss": 8.9228, "step": 98030 }, { "epoch": 0.4896002397063597, "grad_norm": 0.0915464237332344, "learning_rate": 1.535032166011665e-05, "loss": 8.9361, "step": 98040 }, { "epoch": 0.48965017853129916, "grad_norm": 0.093266561627388, "learning_rate": 1.5348819745175097e-05, "loss": 8.9302, "step": 98050 }, { "epoch": 0.4897001173562386, "grad_norm": 0.08844608813524246, "learning_rate": 1.5347317830233547e-05, "loss": 8.9241, "step": 98060 }, { "epoch": 0.48975005618117806, "grad_norm": 0.0893082469701767, "learning_rate": 1.5345815915291998e-05, "loss": 8.927, "step": 98070 }, { "epoch": 0.4897999950061175, "grad_norm": 0.09313119947910309, "learning_rate": 1.5344314000350448e-05, "loss": 8.9324, "step": 98080 }, { "epoch": 0.48984993383105696, "grad_norm": 0.09175945073366165, "learning_rate": 1.5342812085408898e-05, "loss": 8.9129, "step": 98090 }, { "epoch": 0.4898998726559964, "grad_norm": 0.09570962190628052, "learning_rate": 1.5341310170467345e-05, "loss": 8.9264, "step": 98100 }, { "epoch": 0.48994981148093586, "grad_norm": 0.09470510482788086, "learning_rate": 1.5339808255525795e-05, "loss": 8.9235, "step": 98110 }, { "epoch": 0.4899997503058753, "grad_norm": 0.09118285030126572, "learning_rate": 1.5338306340584245e-05, "loss": 8.9291, "step": 98120 }, { "epoch": 0.49004968913081476, "grad_norm": 0.09788258373737335, "learning_rate": 1.5336804425642695e-05, "loss": 8.919, "step": 98130 }, { "epoch": 0.4900996279557542, "grad_norm": 0.09753192961215973, "learning_rate": 1.5335302510701145e-05, "loss": 8.9069, "step": 98140 }, { "epoch": 0.49014956678069366, "grad_norm": 0.09691204130649567, "learning_rate": 1.5333800595759592e-05, "loss": 8.9225, "step": 98150 }, { "epoch": 0.4901995056056331, "grad_norm": 0.0969519168138504, "learning_rate": 1.5332298680818042e-05, "loss": 8.9203, "step": 98160 }, { "epoch": 0.49024944443057256, "grad_norm": 0.09179043769836426, "learning_rate": 1.5330796765876493e-05, "loss": 8.9177, "step": 98170 }, { "epoch": 0.490299383255512, "grad_norm": 0.09258826822042465, "learning_rate": 1.5329294850934943e-05, "loss": 8.9295, "step": 98180 }, { "epoch": 0.49034932208045146, "grad_norm": 0.09368129819631577, "learning_rate": 1.5327792935993393e-05, "loss": 8.9187, "step": 98190 }, { "epoch": 0.4903992609053909, "grad_norm": 0.09183964878320694, "learning_rate": 1.5326291021051843e-05, "loss": 8.9198, "step": 98200 }, { "epoch": 0.49044919973033035, "grad_norm": 0.0940631628036499, "learning_rate": 1.532478910611029e-05, "loss": 8.9311, "step": 98210 }, { "epoch": 0.4904991385552698, "grad_norm": 0.09668076038360596, "learning_rate": 1.532328719116874e-05, "loss": 8.9115, "step": 98220 }, { "epoch": 0.49054907738020925, "grad_norm": 0.09049348533153534, "learning_rate": 1.532178527622719e-05, "loss": 8.9309, "step": 98230 }, { "epoch": 0.4905990162051487, "grad_norm": 0.08599221706390381, "learning_rate": 1.532028336128564e-05, "loss": 8.9212, "step": 98240 }, { "epoch": 0.49064895503008815, "grad_norm": 0.09467752277851105, "learning_rate": 1.531878144634409e-05, "loss": 8.9257, "step": 98250 }, { "epoch": 0.4906988938550276, "grad_norm": 0.10057593882083893, "learning_rate": 1.5317279531402537e-05, "loss": 8.9434, "step": 98260 }, { "epoch": 0.49074883267996705, "grad_norm": 0.09494142979383469, "learning_rate": 1.5315777616460988e-05, "loss": 8.9185, "step": 98270 }, { "epoch": 0.4907987715049065, "grad_norm": 0.09219949692487717, "learning_rate": 1.5314275701519438e-05, "loss": 8.9139, "step": 98280 }, { "epoch": 0.49084871032984595, "grad_norm": 0.09445098787546158, "learning_rate": 1.5312773786577888e-05, "loss": 8.9353, "step": 98290 }, { "epoch": 0.49089864915478537, "grad_norm": 0.09249778091907501, "learning_rate": 1.5311271871636338e-05, "loss": 8.9202, "step": 98300 }, { "epoch": 0.49094858797972485, "grad_norm": 0.09504956007003784, "learning_rate": 1.5309769956694785e-05, "loss": 8.9148, "step": 98310 }, { "epoch": 0.49099852680466427, "grad_norm": 0.09462475776672363, "learning_rate": 1.5308268041753235e-05, "loss": 8.9387, "step": 98320 }, { "epoch": 0.49104846562960375, "grad_norm": 0.09308874607086182, "learning_rate": 1.5306766126811685e-05, "loss": 8.9143, "step": 98330 }, { "epoch": 0.49109840445454317, "grad_norm": 0.09277541190385818, "learning_rate": 1.5305264211870135e-05, "loss": 8.9223, "step": 98340 }, { "epoch": 0.49114834327948265, "grad_norm": 0.09326548874378204, "learning_rate": 1.5303762296928586e-05, "loss": 8.926, "step": 98350 }, { "epoch": 0.49119828210442207, "grad_norm": 0.09251368790864944, "learning_rate": 1.5302260381987036e-05, "loss": 8.94, "step": 98360 }, { "epoch": 0.49124822092936155, "grad_norm": 0.0935811847448349, "learning_rate": 1.5300758467045483e-05, "loss": 8.9264, "step": 98370 }, { "epoch": 0.49129815975430097, "grad_norm": 0.09226620942354202, "learning_rate": 1.5299256552103933e-05, "loss": 8.9254, "step": 98380 }, { "epoch": 0.49134809857924044, "grad_norm": 0.09316042810678482, "learning_rate": 1.5297754637162383e-05, "loss": 8.9175, "step": 98390 }, { "epoch": 0.49139803740417987, "grad_norm": 0.09391681849956512, "learning_rate": 1.5296252722220833e-05, "loss": 8.9204, "step": 98400 }, { "epoch": 0.49144797622911934, "grad_norm": 0.09176868200302124, "learning_rate": 1.5294750807279283e-05, "loss": 8.919, "step": 98410 }, { "epoch": 0.49149791505405876, "grad_norm": 0.09538892656564713, "learning_rate": 1.529324889233773e-05, "loss": 8.9293, "step": 98420 }, { "epoch": 0.49154785387899824, "grad_norm": 0.09601672738790512, "learning_rate": 1.529174697739618e-05, "loss": 8.9258, "step": 98430 }, { "epoch": 0.49159779270393766, "grad_norm": 0.09024684131145477, "learning_rate": 1.529024506245463e-05, "loss": 8.9218, "step": 98440 }, { "epoch": 0.49164773152887714, "grad_norm": 0.09166863560676575, "learning_rate": 1.528874314751308e-05, "loss": 8.9383, "step": 98450 }, { "epoch": 0.49169767035381656, "grad_norm": 0.09486201405525208, "learning_rate": 1.528724123257153e-05, "loss": 8.9158, "step": 98460 }, { "epoch": 0.49174760917875604, "grad_norm": 0.09765074402093887, "learning_rate": 1.5285739317629978e-05, "loss": 8.9163, "step": 98470 }, { "epoch": 0.49179754800369546, "grad_norm": 0.09160161763429642, "learning_rate": 1.5284237402688428e-05, "loss": 8.9048, "step": 98480 }, { "epoch": 0.49184748682863494, "grad_norm": 0.09425348788499832, "learning_rate": 1.5282735487746878e-05, "loss": 8.923, "step": 98490 }, { "epoch": 0.49189742565357436, "grad_norm": 0.09459834545850754, "learning_rate": 1.5281233572805328e-05, "loss": 8.9128, "step": 98500 }, { "epoch": 0.49194736447851384, "grad_norm": 0.08558713644742966, "learning_rate": 1.527973165786378e-05, "loss": 8.9286, "step": 98510 }, { "epoch": 0.49199730330345326, "grad_norm": 0.09137829393148422, "learning_rate": 1.5278229742922225e-05, "loss": 8.9298, "step": 98520 }, { "epoch": 0.49204724212839274, "grad_norm": 0.09085921198129654, "learning_rate": 1.5276727827980675e-05, "loss": 8.8977, "step": 98530 }, { "epoch": 0.49209718095333216, "grad_norm": 0.08780453354120255, "learning_rate": 1.5275225913039125e-05, "loss": 8.9185, "step": 98540 }, { "epoch": 0.49214711977827164, "grad_norm": 0.09832119941711426, "learning_rate": 1.5273723998097576e-05, "loss": 8.9146, "step": 98550 }, { "epoch": 0.49219705860321106, "grad_norm": 0.09514303505420685, "learning_rate": 1.5272222083156026e-05, "loss": 8.9141, "step": 98560 }, { "epoch": 0.49224699742815053, "grad_norm": 0.09371118247509003, "learning_rate": 1.5270720168214473e-05, "loss": 8.9301, "step": 98570 }, { "epoch": 0.49229693625308996, "grad_norm": 0.09175911545753479, "learning_rate": 1.5269218253272923e-05, "loss": 8.9148, "step": 98580 }, { "epoch": 0.49234687507802943, "grad_norm": 0.09370813518762589, "learning_rate": 1.5267716338331373e-05, "loss": 8.917, "step": 98590 }, { "epoch": 0.49239681390296886, "grad_norm": 0.09525982290506363, "learning_rate": 1.5266214423389823e-05, "loss": 8.9276, "step": 98600 }, { "epoch": 0.49244675272790833, "grad_norm": 0.09428591281175613, "learning_rate": 1.5264712508448273e-05, "loss": 8.9137, "step": 98610 }, { "epoch": 0.49249669155284775, "grad_norm": 0.09742698073387146, "learning_rate": 1.526321059350672e-05, "loss": 8.9217, "step": 98620 }, { "epoch": 0.49254663037778723, "grad_norm": 0.09069808572530746, "learning_rate": 1.526170867856517e-05, "loss": 8.9151, "step": 98630 }, { "epoch": 0.49259656920272665, "grad_norm": 0.08807901293039322, "learning_rate": 1.526020676362362e-05, "loss": 8.9236, "step": 98640 }, { "epoch": 0.49264650802766613, "grad_norm": 0.10254672169685364, "learning_rate": 1.5258704848682069e-05, "loss": 8.9263, "step": 98650 }, { "epoch": 0.49269644685260555, "grad_norm": 0.09022139757871628, "learning_rate": 1.525720293374052e-05, "loss": 8.937, "step": 98660 }, { "epoch": 0.49274638567754503, "grad_norm": 0.0958266630768776, "learning_rate": 1.525570101879897e-05, "loss": 8.9133, "step": 98670 }, { "epoch": 0.49279632450248445, "grad_norm": 0.09768004715442657, "learning_rate": 1.525419910385742e-05, "loss": 8.9149, "step": 98680 }, { "epoch": 0.49284626332742393, "grad_norm": 0.09794740378856659, "learning_rate": 1.5252697188915868e-05, "loss": 8.9157, "step": 98690 }, { "epoch": 0.49289620215236335, "grad_norm": 0.09140866249799728, "learning_rate": 1.5251195273974316e-05, "loss": 8.931, "step": 98700 }, { "epoch": 0.4929461409773028, "grad_norm": 0.09415680915117264, "learning_rate": 1.5249693359032768e-05, "loss": 8.9115, "step": 98710 }, { "epoch": 0.49299607980224225, "grad_norm": 0.09118063002824783, "learning_rate": 1.5248191444091217e-05, "loss": 8.9061, "step": 98720 }, { "epoch": 0.4930460186271817, "grad_norm": 0.09346728771924973, "learning_rate": 1.5246689529149667e-05, "loss": 8.9145, "step": 98730 }, { "epoch": 0.49309595745212115, "grad_norm": 0.0916360467672348, "learning_rate": 1.5245187614208115e-05, "loss": 8.9112, "step": 98740 }, { "epoch": 0.4931458962770606, "grad_norm": 0.09017834067344666, "learning_rate": 1.5243685699266564e-05, "loss": 8.9181, "step": 98750 }, { "epoch": 0.49319583510200005, "grad_norm": 0.09475754201412201, "learning_rate": 1.5242183784325016e-05, "loss": 8.9081, "step": 98760 }, { "epoch": 0.4932457739269395, "grad_norm": 0.09371718764305115, "learning_rate": 1.5240681869383464e-05, "loss": 8.9163, "step": 98770 }, { "epoch": 0.49329571275187895, "grad_norm": 0.09911168366670609, "learning_rate": 1.5239179954441915e-05, "loss": 8.9001, "step": 98780 }, { "epoch": 0.4933456515768184, "grad_norm": 0.09791523963212967, "learning_rate": 1.5237678039500363e-05, "loss": 8.9206, "step": 98790 }, { "epoch": 0.49339559040175784, "grad_norm": 0.09776368737220764, "learning_rate": 1.5236176124558811e-05, "loss": 8.9072, "step": 98800 }, { "epoch": 0.4934455292266973, "grad_norm": 0.09171529114246368, "learning_rate": 1.5234674209617263e-05, "loss": 8.9162, "step": 98810 }, { "epoch": 0.49349546805163674, "grad_norm": 0.08931485563516617, "learning_rate": 1.5233172294675712e-05, "loss": 8.9238, "step": 98820 }, { "epoch": 0.4935454068765762, "grad_norm": 0.09580953419208527, "learning_rate": 1.5231670379734162e-05, "loss": 8.9204, "step": 98830 }, { "epoch": 0.49359534570151564, "grad_norm": 0.09444009512662888, "learning_rate": 1.523016846479261e-05, "loss": 8.9138, "step": 98840 }, { "epoch": 0.4936452845264551, "grad_norm": 0.0973835289478302, "learning_rate": 1.5228666549851059e-05, "loss": 8.8994, "step": 98850 }, { "epoch": 0.49369522335139454, "grad_norm": 0.0919685810804367, "learning_rate": 1.5227164634909511e-05, "loss": 8.9286, "step": 98860 }, { "epoch": 0.493745162176334, "grad_norm": 0.09695620089769363, "learning_rate": 1.522566271996796e-05, "loss": 8.9083, "step": 98870 }, { "epoch": 0.49379510100127344, "grad_norm": 0.08825941383838654, "learning_rate": 1.522416080502641e-05, "loss": 8.9236, "step": 98880 }, { "epoch": 0.49384503982621286, "grad_norm": 0.08976638317108154, "learning_rate": 1.5222658890084858e-05, "loss": 8.9358, "step": 98890 }, { "epoch": 0.49389497865115234, "grad_norm": 0.09463369101285934, "learning_rate": 1.5221156975143306e-05, "loss": 8.9231, "step": 98900 }, { "epoch": 0.49394491747609176, "grad_norm": 0.09321442991495132, "learning_rate": 1.5219655060201758e-05, "loss": 8.9213, "step": 98910 }, { "epoch": 0.49399485630103124, "grad_norm": 0.0944615826010704, "learning_rate": 1.5218153145260207e-05, "loss": 8.9208, "step": 98920 }, { "epoch": 0.49404479512597066, "grad_norm": 0.09346310794353485, "learning_rate": 1.5216651230318657e-05, "loss": 8.9182, "step": 98930 }, { "epoch": 0.49409473395091014, "grad_norm": 0.09217849373817444, "learning_rate": 1.5215149315377106e-05, "loss": 8.9081, "step": 98940 }, { "epoch": 0.49414467277584956, "grad_norm": 0.09414231032133102, "learning_rate": 1.5213647400435554e-05, "loss": 8.9238, "step": 98950 }, { "epoch": 0.49419461160078904, "grad_norm": 0.09089777618646622, "learning_rate": 1.5212145485494006e-05, "loss": 8.9218, "step": 98960 }, { "epoch": 0.49424455042572846, "grad_norm": 0.09277298301458359, "learning_rate": 1.5210643570552454e-05, "loss": 8.9305, "step": 98970 }, { "epoch": 0.49429448925066793, "grad_norm": 0.09524578601121902, "learning_rate": 1.5209141655610905e-05, "loss": 8.9063, "step": 98980 }, { "epoch": 0.49434442807560736, "grad_norm": 0.09183613210916519, "learning_rate": 1.5207639740669353e-05, "loss": 8.9204, "step": 98990 }, { "epoch": 0.49439436690054683, "grad_norm": 0.09120965749025345, "learning_rate": 1.5206137825727805e-05, "loss": 8.92, "step": 99000 }, { "epoch": 0.49444430572548626, "grad_norm": 0.09432142972946167, "learning_rate": 1.5204635910786253e-05, "loss": 8.9168, "step": 99010 }, { "epoch": 0.49449424455042573, "grad_norm": 0.0890686959028244, "learning_rate": 1.5203133995844702e-05, "loss": 8.9168, "step": 99020 }, { "epoch": 0.49454418337536515, "grad_norm": 0.09807530790567398, "learning_rate": 1.5201632080903152e-05, "loss": 8.9078, "step": 99030 }, { "epoch": 0.49459412220030463, "grad_norm": 0.09491509944200516, "learning_rate": 1.52001301659616e-05, "loss": 8.909, "step": 99040 }, { "epoch": 0.49464406102524405, "grad_norm": 0.09278037399053574, "learning_rate": 1.5198628251020052e-05, "loss": 8.9086, "step": 99050 }, { "epoch": 0.49469399985018353, "grad_norm": 0.09126697480678558, "learning_rate": 1.5197126336078501e-05, "loss": 8.9129, "step": 99060 }, { "epoch": 0.49474393867512295, "grad_norm": 0.0916236862540245, "learning_rate": 1.519562442113695e-05, "loss": 8.9122, "step": 99070 }, { "epoch": 0.49479387750006243, "grad_norm": 0.08973243832588196, "learning_rate": 1.51941225061954e-05, "loss": 8.9116, "step": 99080 }, { "epoch": 0.49484381632500185, "grad_norm": 0.09231727570295334, "learning_rate": 1.5192620591253848e-05, "loss": 8.9159, "step": 99090 }, { "epoch": 0.49489375514994133, "grad_norm": 0.09403295069932938, "learning_rate": 1.51911186763123e-05, "loss": 8.9249, "step": 99100 }, { "epoch": 0.49494369397488075, "grad_norm": 0.09361027926206589, "learning_rate": 1.5189616761370748e-05, "loss": 8.9205, "step": 99110 }, { "epoch": 0.4949936327998202, "grad_norm": 0.09127476811408997, "learning_rate": 1.5188114846429197e-05, "loss": 8.9237, "step": 99120 }, { "epoch": 0.49504357162475965, "grad_norm": 0.090853251516819, "learning_rate": 1.5186612931487647e-05, "loss": 8.9382, "step": 99130 }, { "epoch": 0.4950935104496991, "grad_norm": 0.09697920083999634, "learning_rate": 1.5185111016546096e-05, "loss": 8.9156, "step": 99140 }, { "epoch": 0.49514344927463855, "grad_norm": 0.09126929193735123, "learning_rate": 1.5183609101604547e-05, "loss": 8.9194, "step": 99150 }, { "epoch": 0.495193388099578, "grad_norm": 0.0899655893445015, "learning_rate": 1.5182107186662996e-05, "loss": 8.9068, "step": 99160 }, { "epoch": 0.49524332692451745, "grad_norm": 0.08864665776491165, "learning_rate": 1.5180605271721444e-05, "loss": 8.8933, "step": 99170 }, { "epoch": 0.4952932657494569, "grad_norm": 0.09634734690189362, "learning_rate": 1.5179103356779895e-05, "loss": 8.9322, "step": 99180 }, { "epoch": 0.49534320457439635, "grad_norm": 0.0953470766544342, "learning_rate": 1.5177601441838343e-05, "loss": 8.9094, "step": 99190 }, { "epoch": 0.4953931433993358, "grad_norm": 0.0901167094707489, "learning_rate": 1.5176099526896795e-05, "loss": 8.9126, "step": 99200 }, { "epoch": 0.49544308222427524, "grad_norm": 0.09353036433458328, "learning_rate": 1.5174597611955243e-05, "loss": 8.9145, "step": 99210 }, { "epoch": 0.4954930210492147, "grad_norm": 0.0913802906870842, "learning_rate": 1.5173095697013692e-05, "loss": 8.9135, "step": 99220 }, { "epoch": 0.49554295987415414, "grad_norm": 0.09367360174655914, "learning_rate": 1.5171593782072142e-05, "loss": 8.9098, "step": 99230 }, { "epoch": 0.4955928986990936, "grad_norm": 0.08964614570140839, "learning_rate": 1.517009186713059e-05, "loss": 8.9137, "step": 99240 }, { "epoch": 0.49564283752403304, "grad_norm": 0.0972425788640976, "learning_rate": 1.5168589952189042e-05, "loss": 8.9079, "step": 99250 }, { "epoch": 0.4956927763489725, "grad_norm": 0.08969489485025406, "learning_rate": 1.5167088037247491e-05, "loss": 8.9144, "step": 99260 }, { "epoch": 0.49574271517391194, "grad_norm": 0.09431774914264679, "learning_rate": 1.516558612230594e-05, "loss": 8.9184, "step": 99270 }, { "epoch": 0.4957926539988514, "grad_norm": 0.0908089205622673, "learning_rate": 1.516408420736439e-05, "loss": 8.9106, "step": 99280 }, { "epoch": 0.49584259282379084, "grad_norm": 0.08945730328559875, "learning_rate": 1.5162582292422838e-05, "loss": 8.9202, "step": 99290 }, { "epoch": 0.4958925316487303, "grad_norm": 0.09563491493463516, "learning_rate": 1.516108037748129e-05, "loss": 8.8976, "step": 99300 }, { "epoch": 0.49594247047366974, "grad_norm": 0.0948091596364975, "learning_rate": 1.5159578462539738e-05, "loss": 8.916, "step": 99310 }, { "epoch": 0.4959924092986092, "grad_norm": 0.09705643355846405, "learning_rate": 1.515807654759819e-05, "loss": 8.9191, "step": 99320 }, { "epoch": 0.49604234812354864, "grad_norm": 0.08930245041847229, "learning_rate": 1.5156574632656639e-05, "loss": 8.9, "step": 99330 }, { "epoch": 0.4960922869484881, "grad_norm": 0.09772713482379913, "learning_rate": 1.5155072717715086e-05, "loss": 8.9179, "step": 99340 }, { "epoch": 0.49614222577342754, "grad_norm": 0.0901922956109047, "learning_rate": 1.5153570802773537e-05, "loss": 8.9004, "step": 99350 }, { "epoch": 0.496192164598367, "grad_norm": 0.09526271373033524, "learning_rate": 1.5152068887831986e-05, "loss": 8.9225, "step": 99360 }, { "epoch": 0.49624210342330644, "grad_norm": 0.09357525408267975, "learning_rate": 1.5150566972890438e-05, "loss": 8.9093, "step": 99370 }, { "epoch": 0.4962920422482459, "grad_norm": 0.09719167649745941, "learning_rate": 1.5149065057948886e-05, "loss": 8.9092, "step": 99380 }, { "epoch": 0.49634198107318533, "grad_norm": 0.09308057278394699, "learning_rate": 1.5147563143007333e-05, "loss": 8.8939, "step": 99390 }, { "epoch": 0.4963919198981248, "grad_norm": 0.0987476035952568, "learning_rate": 1.5146061228065785e-05, "loss": 8.9098, "step": 99400 }, { "epoch": 0.49644185872306423, "grad_norm": 0.09410141408443451, "learning_rate": 1.5144559313124233e-05, "loss": 8.9034, "step": 99410 }, { "epoch": 0.4964917975480037, "grad_norm": 0.09079299867153168, "learning_rate": 1.5143057398182685e-05, "loss": 8.9117, "step": 99420 }, { "epoch": 0.49654173637294313, "grad_norm": 0.0934637263417244, "learning_rate": 1.5141555483241134e-05, "loss": 8.9108, "step": 99430 }, { "epoch": 0.4965916751978826, "grad_norm": 0.09323903918266296, "learning_rate": 1.514005356829958e-05, "loss": 8.9113, "step": 99440 }, { "epoch": 0.49664161402282203, "grad_norm": 0.09298430383205414, "learning_rate": 1.5138551653358032e-05, "loss": 8.9119, "step": 99450 }, { "epoch": 0.4966915528477615, "grad_norm": 0.09651913493871689, "learning_rate": 1.5137049738416481e-05, "loss": 8.8993, "step": 99460 }, { "epoch": 0.49674149167270093, "grad_norm": 0.0914125069975853, "learning_rate": 1.5135547823474933e-05, "loss": 8.9252, "step": 99470 }, { "epoch": 0.4967914304976404, "grad_norm": 0.09153977036476135, "learning_rate": 1.5134045908533381e-05, "loss": 8.9252, "step": 99480 }, { "epoch": 0.49684136932257983, "grad_norm": 0.08837665617465973, "learning_rate": 1.5132543993591828e-05, "loss": 8.8961, "step": 99490 }, { "epoch": 0.4968913081475193, "grad_norm": 0.0936015248298645, "learning_rate": 1.513104207865028e-05, "loss": 8.9022, "step": 99500 }, { "epoch": 0.49694124697245873, "grad_norm": 0.09733084589242935, "learning_rate": 1.5129540163708728e-05, "loss": 8.8925, "step": 99510 }, { "epoch": 0.4969911857973982, "grad_norm": 0.09360146522521973, "learning_rate": 1.512803824876718e-05, "loss": 8.903, "step": 99520 }, { "epoch": 0.4970411246223376, "grad_norm": 0.09262694418430328, "learning_rate": 1.5126536333825629e-05, "loss": 8.9168, "step": 99530 }, { "epoch": 0.4970910634472771, "grad_norm": 0.0910312682390213, "learning_rate": 1.5125034418884076e-05, "loss": 8.9155, "step": 99540 }, { "epoch": 0.4971410022722165, "grad_norm": 0.09175118058919907, "learning_rate": 1.5123532503942527e-05, "loss": 8.905, "step": 99550 }, { "epoch": 0.497190941097156, "grad_norm": 0.08740215748548508, "learning_rate": 1.5122030589000976e-05, "loss": 8.9235, "step": 99560 }, { "epoch": 0.4972408799220954, "grad_norm": 0.09946295619010925, "learning_rate": 1.5120528674059428e-05, "loss": 8.9026, "step": 99570 }, { "epoch": 0.4972908187470349, "grad_norm": 0.09400948882102966, "learning_rate": 1.5119026759117876e-05, "loss": 8.9128, "step": 99580 }, { "epoch": 0.4973407575719743, "grad_norm": 0.09512431174516678, "learning_rate": 1.5117524844176323e-05, "loss": 8.9141, "step": 99590 }, { "epoch": 0.4973906963969138, "grad_norm": 0.09360211342573166, "learning_rate": 1.5116022929234775e-05, "loss": 8.9206, "step": 99600 }, { "epoch": 0.4974406352218532, "grad_norm": 0.08853219449520111, "learning_rate": 1.5114521014293223e-05, "loss": 8.9061, "step": 99610 }, { "epoch": 0.4974905740467927, "grad_norm": 0.09550435096025467, "learning_rate": 1.5113019099351675e-05, "loss": 8.909, "step": 99620 }, { "epoch": 0.4975405128717321, "grad_norm": 0.09092508256435394, "learning_rate": 1.5111517184410124e-05, "loss": 8.9078, "step": 99630 }, { "epoch": 0.4975904516966716, "grad_norm": 0.09557131677865982, "learning_rate": 1.5110015269468574e-05, "loss": 8.9048, "step": 99640 }, { "epoch": 0.497640390521611, "grad_norm": 0.09490132331848145, "learning_rate": 1.5108513354527022e-05, "loss": 8.9167, "step": 99650 }, { "epoch": 0.4976903293465505, "grad_norm": 0.09305530786514282, "learning_rate": 1.5107011439585471e-05, "loss": 8.9106, "step": 99660 }, { "epoch": 0.4977402681714899, "grad_norm": 0.09075649082660675, "learning_rate": 1.5105509524643923e-05, "loss": 8.9161, "step": 99670 }, { "epoch": 0.4977902069964294, "grad_norm": 0.0945330560207367, "learning_rate": 1.5104007609702371e-05, "loss": 8.9061, "step": 99680 }, { "epoch": 0.4978401458213688, "grad_norm": 0.09516996145248413, "learning_rate": 1.5102505694760821e-05, "loss": 8.9028, "step": 99690 }, { "epoch": 0.4978900846463083, "grad_norm": 0.09055611491203308, "learning_rate": 1.510100377981927e-05, "loss": 8.9215, "step": 99700 }, { "epoch": 0.4979400234712477, "grad_norm": 0.0914728119969368, "learning_rate": 1.5099501864877718e-05, "loss": 8.9231, "step": 99710 }, { "epoch": 0.4979899622961872, "grad_norm": 0.08997715264558792, "learning_rate": 1.509799994993617e-05, "loss": 8.9054, "step": 99720 }, { "epoch": 0.4980399011211266, "grad_norm": 0.10086820274591446, "learning_rate": 1.5096498034994619e-05, "loss": 8.9084, "step": 99730 }, { "epoch": 0.4980898399460661, "grad_norm": 0.09388542920351028, "learning_rate": 1.5094996120053069e-05, "loss": 8.9193, "step": 99740 }, { "epoch": 0.4981397787710055, "grad_norm": 0.09304860979318619, "learning_rate": 1.5093494205111517e-05, "loss": 8.9002, "step": 99750 }, { "epoch": 0.498189717595945, "grad_norm": 0.0892772451043129, "learning_rate": 1.5091992290169966e-05, "loss": 8.8994, "step": 99760 }, { "epoch": 0.4982396564208844, "grad_norm": 0.09076282382011414, "learning_rate": 1.5090490375228418e-05, "loss": 8.9014, "step": 99770 }, { "epoch": 0.4982895952458239, "grad_norm": 0.09353473037481308, "learning_rate": 1.5088988460286866e-05, "loss": 8.9208, "step": 99780 }, { "epoch": 0.4983395340707633, "grad_norm": 0.09274925291538239, "learning_rate": 1.5087486545345316e-05, "loss": 8.9072, "step": 99790 }, { "epoch": 0.4983894728957028, "grad_norm": 0.09765703231096268, "learning_rate": 1.5085984630403765e-05, "loss": 8.9014, "step": 99800 }, { "epoch": 0.4984394117206422, "grad_norm": 0.08879239112138748, "learning_rate": 1.5084482715462213e-05, "loss": 8.9185, "step": 99810 }, { "epoch": 0.4984893505455817, "grad_norm": 0.09869293868541718, "learning_rate": 1.5082980800520665e-05, "loss": 8.9048, "step": 99820 }, { "epoch": 0.4985392893705211, "grad_norm": 0.09303834289312363, "learning_rate": 1.5081478885579114e-05, "loss": 8.9107, "step": 99830 }, { "epoch": 0.4985892281954606, "grad_norm": 0.09813646227121353, "learning_rate": 1.5079976970637564e-05, "loss": 8.9082, "step": 99840 }, { "epoch": 0.4986391670204, "grad_norm": 0.09101364761590958, "learning_rate": 1.5078475055696012e-05, "loss": 8.8986, "step": 99850 }, { "epoch": 0.4986891058453395, "grad_norm": 0.09561823308467865, "learning_rate": 1.5076973140754461e-05, "loss": 8.901, "step": 99860 }, { "epoch": 0.4987390446702789, "grad_norm": 0.09230560064315796, "learning_rate": 1.5075471225812913e-05, "loss": 8.9104, "step": 99870 }, { "epoch": 0.49878898349521833, "grad_norm": 0.10049329698085785, "learning_rate": 1.5073969310871361e-05, "loss": 8.9125, "step": 99880 }, { "epoch": 0.4988389223201578, "grad_norm": 0.08787727355957031, "learning_rate": 1.5072467395929811e-05, "loss": 8.9013, "step": 99890 }, { "epoch": 0.49888886114509723, "grad_norm": 0.09034966677427292, "learning_rate": 1.507096548098826e-05, "loss": 8.9037, "step": 99900 }, { "epoch": 0.4989387999700367, "grad_norm": 0.09500369429588318, "learning_rate": 1.5069463566046708e-05, "loss": 8.8933, "step": 99910 }, { "epoch": 0.4989887387949761, "grad_norm": 0.09762393683195114, "learning_rate": 1.506796165110516e-05, "loss": 8.9136, "step": 99920 }, { "epoch": 0.4990386776199156, "grad_norm": 0.09636267274618149, "learning_rate": 1.5066459736163609e-05, "loss": 8.9171, "step": 99930 }, { "epoch": 0.499088616444855, "grad_norm": 0.09169818460941315, "learning_rate": 1.5064957821222059e-05, "loss": 8.9056, "step": 99940 }, { "epoch": 0.4991385552697945, "grad_norm": 0.09505058825016022, "learning_rate": 1.5063455906280507e-05, "loss": 8.9185, "step": 99950 }, { "epoch": 0.4991884940947339, "grad_norm": 0.09004746377468109, "learning_rate": 1.5061953991338956e-05, "loss": 8.9026, "step": 99960 }, { "epoch": 0.4992384329196734, "grad_norm": 0.08870364725589752, "learning_rate": 1.5060452076397408e-05, "loss": 8.9053, "step": 99970 }, { "epoch": 0.4992883717446128, "grad_norm": 0.08827803283929825, "learning_rate": 1.5058950161455856e-05, "loss": 8.9115, "step": 99980 }, { "epoch": 0.4993383105695523, "grad_norm": 0.09465722739696503, "learning_rate": 1.5057448246514306e-05, "loss": 8.9056, "step": 99990 }, { "epoch": 0.4993882493944917, "grad_norm": 0.0972822979092598, "learning_rate": 1.5055946331572755e-05, "loss": 8.9103, "step": 100000 }, { "epoch": 0.4994381882194312, "grad_norm": 0.0914982259273529, "learning_rate": 1.5054444416631207e-05, "loss": 8.9047, "step": 100010 }, { "epoch": 0.4994881270443706, "grad_norm": 0.09300156682729721, "learning_rate": 1.5052942501689655e-05, "loss": 8.9028, "step": 100020 }, { "epoch": 0.4995380658693101, "grad_norm": 0.0923018679022789, "learning_rate": 1.5051440586748104e-05, "loss": 8.9091, "step": 100030 }, { "epoch": 0.4995880046942495, "grad_norm": 0.09287258982658386, "learning_rate": 1.5049938671806554e-05, "loss": 8.9109, "step": 100040 }, { "epoch": 0.499637943519189, "grad_norm": 0.0904991626739502, "learning_rate": 1.5048436756865002e-05, "loss": 8.9109, "step": 100050 }, { "epoch": 0.4996878823441284, "grad_norm": 0.09570524841547012, "learning_rate": 1.5046934841923454e-05, "loss": 8.9005, "step": 100060 }, { "epoch": 0.4997378211690679, "grad_norm": 0.09132358431816101, "learning_rate": 1.5045432926981903e-05, "loss": 8.9182, "step": 100070 }, { "epoch": 0.4997877599940073, "grad_norm": 0.09124463051557541, "learning_rate": 1.5043931012040351e-05, "loss": 8.9059, "step": 100080 }, { "epoch": 0.4998376988189468, "grad_norm": 0.08917669951915741, "learning_rate": 1.5042429097098801e-05, "loss": 8.9083, "step": 100090 }, { "epoch": 0.4998876376438862, "grad_norm": 0.09558670222759247, "learning_rate": 1.504092718215725e-05, "loss": 8.9074, "step": 100100 }, { "epoch": 0.4999375764688257, "grad_norm": 0.09686680883169174, "learning_rate": 1.5039425267215702e-05, "loss": 8.9087, "step": 100110 }, { "epoch": 0.4999875152937651, "grad_norm": 0.08919398486614227, "learning_rate": 1.503792335227415e-05, "loss": 8.911, "step": 100120 }, { "epoch": 0.5000374541187046, "grad_norm": 0.09795615077018738, "learning_rate": 1.5036421437332599e-05, "loss": 8.8899, "step": 100130 }, { "epoch": 0.500087392943644, "grad_norm": 0.09374431520700455, "learning_rate": 1.5034919522391049e-05, "loss": 8.896, "step": 100140 }, { "epoch": 0.5001373317685834, "grad_norm": 0.09322655200958252, "learning_rate": 1.5033417607449497e-05, "loss": 8.901, "step": 100150 }, { "epoch": 0.500187270593523, "grad_norm": 0.0963103175163269, "learning_rate": 1.503191569250795e-05, "loss": 8.9022, "step": 100160 }, { "epoch": 0.5002372094184624, "grad_norm": 0.08915145695209503, "learning_rate": 1.5030413777566398e-05, "loss": 8.902, "step": 100170 }, { "epoch": 0.5002871482434018, "grad_norm": 0.08930926024913788, "learning_rate": 1.5028911862624846e-05, "loss": 8.9118, "step": 100180 }, { "epoch": 0.5003370870683412, "grad_norm": 0.09767061471939087, "learning_rate": 1.5027409947683296e-05, "loss": 8.9057, "step": 100190 }, { "epoch": 0.5003870258932808, "grad_norm": 0.09693524241447449, "learning_rate": 1.5025908032741745e-05, "loss": 8.8938, "step": 100200 }, { "epoch": 0.5004369647182202, "grad_norm": 0.09122538566589355, "learning_rate": 1.5024406117800197e-05, "loss": 8.9038, "step": 100210 }, { "epoch": 0.5004869035431596, "grad_norm": 0.0925898402929306, "learning_rate": 1.5022904202858645e-05, "loss": 8.9147, "step": 100220 }, { "epoch": 0.500536842368099, "grad_norm": 0.08967383205890656, "learning_rate": 1.5021402287917094e-05, "loss": 8.9091, "step": 100230 }, { "epoch": 0.5005867811930386, "grad_norm": 0.09207455813884735, "learning_rate": 1.5019900372975544e-05, "loss": 8.9131, "step": 100240 }, { "epoch": 0.500636720017978, "grad_norm": 0.09674876183271408, "learning_rate": 1.5018398458033992e-05, "loss": 8.9054, "step": 100250 }, { "epoch": 0.5006866588429174, "grad_norm": 0.09284425526857376, "learning_rate": 1.5016896543092444e-05, "loss": 8.9048, "step": 100260 }, { "epoch": 0.5007365976678568, "grad_norm": 0.09239511936903, "learning_rate": 1.5015394628150893e-05, "loss": 8.9162, "step": 100270 }, { "epoch": 0.5007865364927964, "grad_norm": 0.0956454798579216, "learning_rate": 1.5013892713209341e-05, "loss": 8.9134, "step": 100280 }, { "epoch": 0.5008364753177358, "grad_norm": 0.0959239974617958, "learning_rate": 1.5012390798267791e-05, "loss": 8.8996, "step": 100290 }, { "epoch": 0.5008864141426752, "grad_norm": 0.0976717472076416, "learning_rate": 1.501088888332624e-05, "loss": 8.9049, "step": 100300 }, { "epoch": 0.5009363529676146, "grad_norm": 0.09285130351781845, "learning_rate": 1.5009386968384692e-05, "loss": 8.9057, "step": 100310 }, { "epoch": 0.5009862917925542, "grad_norm": 0.08767900615930557, "learning_rate": 1.500788505344314e-05, "loss": 8.9035, "step": 100320 }, { "epoch": 0.5010362306174936, "grad_norm": 0.09420838207006454, "learning_rate": 1.5006383138501592e-05, "loss": 8.8962, "step": 100330 }, { "epoch": 0.501086169442433, "grad_norm": 0.09375480562448502, "learning_rate": 1.5004881223560039e-05, "loss": 8.9055, "step": 100340 }, { "epoch": 0.5011361082673724, "grad_norm": 0.09297312796115875, "learning_rate": 1.5003379308618487e-05, "loss": 8.9098, "step": 100350 }, { "epoch": 0.501186047092312, "grad_norm": 0.09020781517028809, "learning_rate": 1.500187739367694e-05, "loss": 8.9313, "step": 100360 }, { "epoch": 0.5012359859172514, "grad_norm": 0.09823616594076157, "learning_rate": 1.5000375478735388e-05, "loss": 8.9211, "step": 100370 }, { "epoch": 0.5012859247421908, "grad_norm": 0.09264297038316727, "learning_rate": 1.4998873563793838e-05, "loss": 8.9147, "step": 100380 }, { "epoch": 0.5013358635671302, "grad_norm": 0.09266399592161179, "learning_rate": 1.4997371648852286e-05, "loss": 8.9005, "step": 100390 }, { "epoch": 0.5013858023920698, "grad_norm": 0.0926387682557106, "learning_rate": 1.4995869733910737e-05, "loss": 8.9014, "step": 100400 }, { "epoch": 0.5014357412170092, "grad_norm": 0.09379039704799652, "learning_rate": 1.4994367818969187e-05, "loss": 8.9031, "step": 100410 }, { "epoch": 0.5014856800419486, "grad_norm": 0.09653208404779434, "learning_rate": 1.4992865904027635e-05, "loss": 8.9117, "step": 100420 }, { "epoch": 0.501535618866888, "grad_norm": 0.09672179073095322, "learning_rate": 1.4991363989086086e-05, "loss": 8.9151, "step": 100430 }, { "epoch": 0.5015855576918276, "grad_norm": 0.09220828860998154, "learning_rate": 1.4989862074144534e-05, "loss": 8.918, "step": 100440 }, { "epoch": 0.501635496516767, "grad_norm": 0.08738303184509277, "learning_rate": 1.4988360159202984e-05, "loss": 8.8946, "step": 100450 }, { "epoch": 0.5016854353417064, "grad_norm": 0.0968918427824974, "learning_rate": 1.4986858244261434e-05, "loss": 8.9172, "step": 100460 }, { "epoch": 0.5017353741666458, "grad_norm": 0.08795435726642609, "learning_rate": 1.4985356329319883e-05, "loss": 8.8957, "step": 100470 }, { "epoch": 0.5017853129915854, "grad_norm": 0.09090543538331985, "learning_rate": 1.4983854414378333e-05, "loss": 8.9172, "step": 100480 }, { "epoch": 0.5018352518165248, "grad_norm": 0.09485392272472382, "learning_rate": 1.4982352499436782e-05, "loss": 8.908, "step": 100490 }, { "epoch": 0.5018851906414642, "grad_norm": 0.094363734126091, "learning_rate": 1.4980850584495232e-05, "loss": 8.9137, "step": 100500 }, { "epoch": 0.5019351294664036, "grad_norm": 0.08762264251708984, "learning_rate": 1.4979348669553682e-05, "loss": 8.916, "step": 100510 }, { "epoch": 0.5019850682913432, "grad_norm": 0.09093135595321655, "learning_rate": 1.497784675461213e-05, "loss": 8.9092, "step": 100520 }, { "epoch": 0.5020350071162826, "grad_norm": 0.08741986751556396, "learning_rate": 1.497634483967058e-05, "loss": 8.9032, "step": 100530 }, { "epoch": 0.502084945941222, "grad_norm": 0.09750941395759583, "learning_rate": 1.4974842924729029e-05, "loss": 8.908, "step": 100540 }, { "epoch": 0.5021348847661614, "grad_norm": 0.09141114354133606, "learning_rate": 1.497334100978748e-05, "loss": 8.8956, "step": 100550 }, { "epoch": 0.502184823591101, "grad_norm": 0.08919312804937363, "learning_rate": 1.497183909484593e-05, "loss": 8.9119, "step": 100560 }, { "epoch": 0.5022347624160404, "grad_norm": 0.09717653691768646, "learning_rate": 1.497033717990438e-05, "loss": 8.8956, "step": 100570 }, { "epoch": 0.5022847012409798, "grad_norm": 0.09845616668462753, "learning_rate": 1.4968835264962828e-05, "loss": 8.8947, "step": 100580 }, { "epoch": 0.5023346400659192, "grad_norm": 0.09342017769813538, "learning_rate": 1.4967333350021277e-05, "loss": 8.903, "step": 100590 }, { "epoch": 0.5023845788908587, "grad_norm": 0.08772523701190948, "learning_rate": 1.4965831435079727e-05, "loss": 8.9031, "step": 100600 }, { "epoch": 0.5024345177157982, "grad_norm": 0.09380695968866348, "learning_rate": 1.4964329520138177e-05, "loss": 8.9074, "step": 100610 }, { "epoch": 0.5024844565407376, "grad_norm": 0.09516066312789917, "learning_rate": 1.4962827605196627e-05, "loss": 8.8997, "step": 100620 }, { "epoch": 0.502534395365677, "grad_norm": 0.0951061025261879, "learning_rate": 1.4961325690255076e-05, "loss": 8.8925, "step": 100630 }, { "epoch": 0.5025843341906165, "grad_norm": 0.09456291794776917, "learning_rate": 1.4959823775313524e-05, "loss": 8.8921, "step": 100640 }, { "epoch": 0.502634273015556, "grad_norm": 0.09144452959299088, "learning_rate": 1.4958321860371974e-05, "loss": 8.8916, "step": 100650 }, { "epoch": 0.5026842118404954, "grad_norm": 0.09398289024829865, "learning_rate": 1.4956819945430424e-05, "loss": 8.9007, "step": 100660 }, { "epoch": 0.5027341506654348, "grad_norm": 0.09931785613298416, "learning_rate": 1.4955318030488875e-05, "loss": 8.903, "step": 100670 }, { "epoch": 0.5027840894903743, "grad_norm": 0.09348601847887039, "learning_rate": 1.4953816115547323e-05, "loss": 8.9051, "step": 100680 }, { "epoch": 0.5028340283153138, "grad_norm": 0.08900678902864456, "learning_rate": 1.4952314200605772e-05, "loss": 8.9082, "step": 100690 }, { "epoch": 0.5028839671402532, "grad_norm": 0.09216514974832535, "learning_rate": 1.4950812285664222e-05, "loss": 8.9075, "step": 100700 }, { "epoch": 0.5029339059651926, "grad_norm": 0.09171604365110397, "learning_rate": 1.4949310370722672e-05, "loss": 8.9241, "step": 100710 }, { "epoch": 0.5029838447901321, "grad_norm": 0.08902275562286377, "learning_rate": 1.4947808455781122e-05, "loss": 8.9145, "step": 100720 }, { "epoch": 0.5030337836150716, "grad_norm": 0.09136269986629486, "learning_rate": 1.4946306540839572e-05, "loss": 8.9118, "step": 100730 }, { "epoch": 0.503083722440011, "grad_norm": 0.09541922062635422, "learning_rate": 1.4944804625898019e-05, "loss": 8.9042, "step": 100740 }, { "epoch": 0.5031336612649504, "grad_norm": 0.08974391967058182, "learning_rate": 1.494330271095647e-05, "loss": 8.9063, "step": 100750 }, { "epoch": 0.5031836000898899, "grad_norm": 0.0941217839717865, "learning_rate": 1.494180079601492e-05, "loss": 8.9088, "step": 100760 }, { "epoch": 0.5032335389148294, "grad_norm": 0.0899343267083168, "learning_rate": 1.494029888107337e-05, "loss": 8.8967, "step": 100770 }, { "epoch": 0.5032834777397688, "grad_norm": 0.09517884254455566, "learning_rate": 1.493879696613182e-05, "loss": 8.8919, "step": 100780 }, { "epoch": 0.5033334165647082, "grad_norm": 0.08698021620512009, "learning_rate": 1.4937295051190267e-05, "loss": 8.9165, "step": 100790 }, { "epoch": 0.5033833553896477, "grad_norm": 0.09504187107086182, "learning_rate": 1.4935793136248717e-05, "loss": 8.9146, "step": 100800 }, { "epoch": 0.5034332942145872, "grad_norm": 0.09907368570566177, "learning_rate": 1.4934291221307167e-05, "loss": 8.8958, "step": 100810 }, { "epoch": 0.5034832330395266, "grad_norm": 0.09145436435937881, "learning_rate": 1.4932789306365617e-05, "loss": 8.9008, "step": 100820 }, { "epoch": 0.503533171864466, "grad_norm": 0.09592849761247635, "learning_rate": 1.4931287391424067e-05, "loss": 8.9033, "step": 100830 }, { "epoch": 0.5035831106894055, "grad_norm": 0.0957927331328392, "learning_rate": 1.4929785476482514e-05, "loss": 8.91, "step": 100840 }, { "epoch": 0.503633049514345, "grad_norm": 0.09893249720335007, "learning_rate": 1.4928283561540964e-05, "loss": 8.9059, "step": 100850 }, { "epoch": 0.5036829883392844, "grad_norm": 0.0861205905675888, "learning_rate": 1.4926781646599414e-05, "loss": 8.9054, "step": 100860 }, { "epoch": 0.5037329271642238, "grad_norm": 0.08853903412818909, "learning_rate": 1.4925279731657865e-05, "loss": 8.9096, "step": 100870 }, { "epoch": 0.5037828659891632, "grad_norm": 0.09132624417543411, "learning_rate": 1.4923777816716315e-05, "loss": 8.8916, "step": 100880 }, { "epoch": 0.5038328048141028, "grad_norm": 0.08741272985935211, "learning_rate": 1.4922275901774763e-05, "loss": 8.8881, "step": 100890 }, { "epoch": 0.5038827436390422, "grad_norm": 0.09232524782419205, "learning_rate": 1.4920773986833212e-05, "loss": 8.9061, "step": 100900 }, { "epoch": 0.5039326824639816, "grad_norm": 0.09237165004014969, "learning_rate": 1.4919272071891662e-05, "loss": 8.9059, "step": 100910 }, { "epoch": 0.503982621288921, "grad_norm": 0.09467703849077225, "learning_rate": 1.4917770156950112e-05, "loss": 8.9001, "step": 100920 }, { "epoch": 0.5040325601138606, "grad_norm": 0.09434116631746292, "learning_rate": 1.4916268242008562e-05, "loss": 8.8948, "step": 100930 }, { "epoch": 0.5040824989388, "grad_norm": 0.0881483182311058, "learning_rate": 1.491476632706701e-05, "loss": 8.8947, "step": 100940 }, { "epoch": 0.5041324377637394, "grad_norm": 0.09188562631607056, "learning_rate": 1.491326441212546e-05, "loss": 8.9034, "step": 100950 }, { "epoch": 0.5041823765886788, "grad_norm": 0.09108608961105347, "learning_rate": 1.491176249718391e-05, "loss": 8.8958, "step": 100960 }, { "epoch": 0.5042323154136183, "grad_norm": 0.09667716175317764, "learning_rate": 1.491026058224236e-05, "loss": 8.8876, "step": 100970 }, { "epoch": 0.5042822542385578, "grad_norm": 0.0894923135638237, "learning_rate": 1.490875866730081e-05, "loss": 8.8906, "step": 100980 }, { "epoch": 0.5043321930634972, "grad_norm": 0.08996430784463882, "learning_rate": 1.4907256752359258e-05, "loss": 8.8851, "step": 100990 }, { "epoch": 0.5043821318884366, "grad_norm": 0.0906483381986618, "learning_rate": 1.4905754837417707e-05, "loss": 8.8891, "step": 101000 }, { "epoch": 0.5044320707133761, "grad_norm": 0.09572287648916245, "learning_rate": 1.4904252922476157e-05, "loss": 8.906, "step": 101010 }, { "epoch": 0.5044820095383156, "grad_norm": 0.09146282821893692, "learning_rate": 1.4902751007534607e-05, "loss": 8.9083, "step": 101020 }, { "epoch": 0.504531948363255, "grad_norm": 0.10019837319850922, "learning_rate": 1.4901249092593057e-05, "loss": 8.9084, "step": 101030 }, { "epoch": 0.5045818871881944, "grad_norm": 0.09378591179847717, "learning_rate": 1.4899747177651506e-05, "loss": 8.899, "step": 101040 }, { "epoch": 0.5046318260131339, "grad_norm": 0.0919695645570755, "learning_rate": 1.4898245262709956e-05, "loss": 8.899, "step": 101050 }, { "epoch": 0.5046817648380734, "grad_norm": 0.09100176393985748, "learning_rate": 1.4896743347768404e-05, "loss": 8.9073, "step": 101060 }, { "epoch": 0.5047317036630128, "grad_norm": 0.09281841665506363, "learning_rate": 1.4895241432826855e-05, "loss": 8.9013, "step": 101070 }, { "epoch": 0.5047816424879522, "grad_norm": 0.09110787510871887, "learning_rate": 1.4893739517885305e-05, "loss": 8.885, "step": 101080 }, { "epoch": 0.5048315813128917, "grad_norm": 0.09434214234352112, "learning_rate": 1.4892237602943753e-05, "loss": 8.9101, "step": 101090 }, { "epoch": 0.5048815201378312, "grad_norm": 0.09112465381622314, "learning_rate": 1.4890735688002203e-05, "loss": 8.8961, "step": 101100 }, { "epoch": 0.5049314589627706, "grad_norm": 0.0931396409869194, "learning_rate": 1.4889233773060652e-05, "loss": 8.9027, "step": 101110 }, { "epoch": 0.50498139778771, "grad_norm": 0.0969899371266365, "learning_rate": 1.4887731858119102e-05, "loss": 8.8754, "step": 101120 }, { "epoch": 0.5050313366126495, "grad_norm": 0.09737983345985413, "learning_rate": 1.4886229943177552e-05, "loss": 8.896, "step": 101130 }, { "epoch": 0.505081275437589, "grad_norm": 0.0903887152671814, "learning_rate": 1.4884728028236e-05, "loss": 8.8901, "step": 101140 }, { "epoch": 0.5051312142625284, "grad_norm": 0.09329243004322052, "learning_rate": 1.4883226113294451e-05, "loss": 8.9111, "step": 101150 }, { "epoch": 0.5051811530874678, "grad_norm": 0.09709962457418442, "learning_rate": 1.48817241983529e-05, "loss": 8.9032, "step": 101160 }, { "epoch": 0.5052310919124073, "grad_norm": 0.09550347924232483, "learning_rate": 1.488022228341135e-05, "loss": 8.8953, "step": 101170 }, { "epoch": 0.5052810307373468, "grad_norm": 0.09034687280654907, "learning_rate": 1.48787203684698e-05, "loss": 8.8965, "step": 101180 }, { "epoch": 0.5053309695622862, "grad_norm": 0.08858486264944077, "learning_rate": 1.4877218453528248e-05, "loss": 8.8872, "step": 101190 }, { "epoch": 0.5053809083872256, "grad_norm": 0.09409047663211823, "learning_rate": 1.4875716538586698e-05, "loss": 8.8993, "step": 101200 }, { "epoch": 0.5054308472121651, "grad_norm": 0.09161051362752914, "learning_rate": 1.4874214623645149e-05, "loss": 8.8977, "step": 101210 }, { "epoch": 0.5054807860371046, "grad_norm": 0.09429701417684555, "learning_rate": 1.4872712708703597e-05, "loss": 8.9064, "step": 101220 }, { "epoch": 0.505530724862044, "grad_norm": 0.09554371237754822, "learning_rate": 1.4871210793762047e-05, "loss": 8.8981, "step": 101230 }, { "epoch": 0.5055806636869834, "grad_norm": 0.10310646891593933, "learning_rate": 1.4869708878820496e-05, "loss": 8.899, "step": 101240 }, { "epoch": 0.5056306025119229, "grad_norm": 0.09066363424062729, "learning_rate": 1.4868206963878946e-05, "loss": 8.9038, "step": 101250 }, { "epoch": 0.5056805413368624, "grad_norm": 0.09133642911911011, "learning_rate": 1.4866705048937396e-05, "loss": 8.893, "step": 101260 }, { "epoch": 0.5057304801618018, "grad_norm": 0.09266974031925201, "learning_rate": 1.4865203133995845e-05, "loss": 8.8918, "step": 101270 }, { "epoch": 0.5057804189867412, "grad_norm": 0.09828782081604004, "learning_rate": 1.4863701219054295e-05, "loss": 8.9079, "step": 101280 }, { "epoch": 0.5058303578116807, "grad_norm": 0.09121903032064438, "learning_rate": 1.4862199304112743e-05, "loss": 8.9018, "step": 101290 }, { "epoch": 0.5058802966366202, "grad_norm": 0.09173334389925003, "learning_rate": 1.4860697389171193e-05, "loss": 8.8951, "step": 101300 }, { "epoch": 0.5059302354615596, "grad_norm": 0.09970010817050934, "learning_rate": 1.4859195474229644e-05, "loss": 8.8949, "step": 101310 }, { "epoch": 0.505980174286499, "grad_norm": 0.0909634530544281, "learning_rate": 1.4857693559288092e-05, "loss": 8.8874, "step": 101320 }, { "epoch": 0.5060301131114385, "grad_norm": 0.09572920203208923, "learning_rate": 1.4856191644346542e-05, "loss": 8.882, "step": 101330 }, { "epoch": 0.506080051936378, "grad_norm": 0.08662997931241989, "learning_rate": 1.485468972940499e-05, "loss": 8.9034, "step": 101340 }, { "epoch": 0.5061299907613174, "grad_norm": 0.09258969128131866, "learning_rate": 1.4853187814463441e-05, "loss": 8.8739, "step": 101350 }, { "epoch": 0.5061799295862568, "grad_norm": 0.08739626407623291, "learning_rate": 1.4851685899521891e-05, "loss": 8.8976, "step": 101360 }, { "epoch": 0.5062298684111963, "grad_norm": 0.09354405850172043, "learning_rate": 1.4850183984580341e-05, "loss": 8.8916, "step": 101370 }, { "epoch": 0.5062798072361357, "grad_norm": 0.0959988608956337, "learning_rate": 1.484868206963879e-05, "loss": 8.9009, "step": 101380 }, { "epoch": 0.5063297460610752, "grad_norm": 0.09055614471435547, "learning_rate": 1.4847180154697238e-05, "loss": 8.8916, "step": 101390 }, { "epoch": 0.5063796848860146, "grad_norm": 0.09432981163263321, "learning_rate": 1.4845678239755688e-05, "loss": 8.9087, "step": 101400 }, { "epoch": 0.5064296237109541, "grad_norm": 0.09564248472452164, "learning_rate": 1.4844176324814139e-05, "loss": 8.891, "step": 101410 }, { "epoch": 0.5064795625358935, "grad_norm": 0.09236615151166916, "learning_rate": 1.4842674409872589e-05, "loss": 8.8975, "step": 101420 }, { "epoch": 0.506529501360833, "grad_norm": 0.09159623831510544, "learning_rate": 1.4841172494931037e-05, "loss": 8.9015, "step": 101430 }, { "epoch": 0.5065794401857724, "grad_norm": 0.09691286832094193, "learning_rate": 1.4839670579989487e-05, "loss": 8.8919, "step": 101440 }, { "epoch": 0.5066293790107119, "grad_norm": 0.09283226728439331, "learning_rate": 1.4838168665047936e-05, "loss": 8.8946, "step": 101450 }, { "epoch": 0.5066793178356513, "grad_norm": 0.09346681088209152, "learning_rate": 1.4836666750106386e-05, "loss": 8.8962, "step": 101460 }, { "epoch": 0.5067292566605908, "grad_norm": 0.09086382389068604, "learning_rate": 1.4835164835164836e-05, "loss": 8.8942, "step": 101470 }, { "epoch": 0.5067791954855302, "grad_norm": 0.09305468201637268, "learning_rate": 1.4833662920223285e-05, "loss": 8.8986, "step": 101480 }, { "epoch": 0.5068291343104697, "grad_norm": 0.09416045248508453, "learning_rate": 1.4832161005281735e-05, "loss": 8.8897, "step": 101490 }, { "epoch": 0.5068790731354091, "grad_norm": 0.09527656435966492, "learning_rate": 1.4830659090340183e-05, "loss": 8.8883, "step": 101500 }, { "epoch": 0.5069290119603486, "grad_norm": 0.09679878503084183, "learning_rate": 1.4829157175398634e-05, "loss": 8.8931, "step": 101510 }, { "epoch": 0.506978950785288, "grad_norm": 0.09845301508903503, "learning_rate": 1.4827655260457084e-05, "loss": 8.8859, "step": 101520 }, { "epoch": 0.5070288896102275, "grad_norm": 0.09258875995874405, "learning_rate": 1.4826153345515534e-05, "loss": 8.8809, "step": 101530 }, { "epoch": 0.5070788284351669, "grad_norm": 0.08814681321382523, "learning_rate": 1.4824651430573982e-05, "loss": 8.8913, "step": 101540 }, { "epoch": 0.5071287672601064, "grad_norm": 0.09510982036590576, "learning_rate": 1.4823149515632431e-05, "loss": 8.9071, "step": 101550 }, { "epoch": 0.5071787060850458, "grad_norm": 0.09275613725185394, "learning_rate": 1.4821647600690881e-05, "loss": 8.9034, "step": 101560 }, { "epoch": 0.5072286449099853, "grad_norm": 0.09569612890481949, "learning_rate": 1.4820145685749331e-05, "loss": 8.8981, "step": 101570 }, { "epoch": 0.5072785837349247, "grad_norm": 0.10368134826421738, "learning_rate": 1.4818643770807781e-05, "loss": 8.8932, "step": 101580 }, { "epoch": 0.5073285225598642, "grad_norm": 0.0873536765575409, "learning_rate": 1.481714185586623e-05, "loss": 8.8866, "step": 101590 }, { "epoch": 0.5073784613848036, "grad_norm": 0.0927792489528656, "learning_rate": 1.4815639940924678e-05, "loss": 8.8916, "step": 101600 }, { "epoch": 0.5074284002097431, "grad_norm": 0.09179840981960297, "learning_rate": 1.4814138025983129e-05, "loss": 8.8925, "step": 101610 }, { "epoch": 0.5074783390346825, "grad_norm": 0.09306514263153076, "learning_rate": 1.4812636111041579e-05, "loss": 8.9127, "step": 101620 }, { "epoch": 0.507528277859622, "grad_norm": 0.09609080106019974, "learning_rate": 1.4811134196100029e-05, "loss": 8.8901, "step": 101630 }, { "epoch": 0.5075782166845614, "grad_norm": 0.09421303868293762, "learning_rate": 1.4809632281158477e-05, "loss": 8.8944, "step": 101640 }, { "epoch": 0.5076281555095009, "grad_norm": 0.09363938122987747, "learning_rate": 1.4808130366216926e-05, "loss": 8.9101, "step": 101650 }, { "epoch": 0.5076780943344403, "grad_norm": 0.09263626486063004, "learning_rate": 1.4806628451275376e-05, "loss": 8.8904, "step": 101660 }, { "epoch": 0.5077280331593798, "grad_norm": 0.08773566037416458, "learning_rate": 1.4805126536333826e-05, "loss": 8.8914, "step": 101670 }, { "epoch": 0.5077779719843192, "grad_norm": 0.09525804966688156, "learning_rate": 1.4803624621392276e-05, "loss": 8.8931, "step": 101680 }, { "epoch": 0.5078279108092587, "grad_norm": 0.09385735541582108, "learning_rate": 1.4802122706450727e-05, "loss": 8.904, "step": 101690 }, { "epoch": 0.5078778496341981, "grad_norm": 0.0941053256392479, "learning_rate": 1.4800620791509173e-05, "loss": 8.8882, "step": 101700 }, { "epoch": 0.5079277884591376, "grad_norm": 0.10088019073009491, "learning_rate": 1.4799118876567624e-05, "loss": 8.8875, "step": 101710 }, { "epoch": 0.507977727284077, "grad_norm": 0.09125470370054245, "learning_rate": 1.4797616961626074e-05, "loss": 8.8858, "step": 101720 }, { "epoch": 0.5080276661090165, "grad_norm": 0.08886610716581345, "learning_rate": 1.4796115046684524e-05, "loss": 8.8908, "step": 101730 }, { "epoch": 0.5080776049339559, "grad_norm": 0.09195410460233688, "learning_rate": 1.4794613131742974e-05, "loss": 8.8764, "step": 101740 }, { "epoch": 0.5081275437588954, "grad_norm": 0.09641274064779282, "learning_rate": 1.4793111216801421e-05, "loss": 8.88, "step": 101750 }, { "epoch": 0.5081774825838348, "grad_norm": 0.08937504887580872, "learning_rate": 1.4791609301859871e-05, "loss": 8.8912, "step": 101760 }, { "epoch": 0.5082274214087743, "grad_norm": 0.10038053244352341, "learning_rate": 1.4790107386918321e-05, "loss": 8.8898, "step": 101770 }, { "epoch": 0.5082773602337137, "grad_norm": 0.08841981738805771, "learning_rate": 1.4788605471976772e-05, "loss": 8.902, "step": 101780 }, { "epoch": 0.5083272990586531, "grad_norm": 0.09414731711149216, "learning_rate": 1.4787103557035222e-05, "loss": 8.8862, "step": 101790 }, { "epoch": 0.5083772378835926, "grad_norm": 0.09005799144506454, "learning_rate": 1.4785601642093668e-05, "loss": 8.884, "step": 101800 }, { "epoch": 0.5084271767085321, "grad_norm": 0.09526651352643967, "learning_rate": 1.4784099727152119e-05, "loss": 8.8958, "step": 101810 }, { "epoch": 0.5084771155334715, "grad_norm": 0.094487763941288, "learning_rate": 1.4782597812210569e-05, "loss": 8.9035, "step": 101820 }, { "epoch": 0.508527054358411, "grad_norm": 0.0912264958024025, "learning_rate": 1.4781095897269019e-05, "loss": 8.8972, "step": 101830 }, { "epoch": 0.5085769931833504, "grad_norm": 0.09153269231319427, "learning_rate": 1.477959398232747e-05, "loss": 8.8956, "step": 101840 }, { "epoch": 0.5086269320082899, "grad_norm": 0.09265515953302383, "learning_rate": 1.4778092067385918e-05, "loss": 8.8747, "step": 101850 }, { "epoch": 0.5086768708332293, "grad_norm": 0.09390241652727127, "learning_rate": 1.4776590152444366e-05, "loss": 8.9175, "step": 101860 }, { "epoch": 0.5087268096581687, "grad_norm": 0.09035301208496094, "learning_rate": 1.4775088237502816e-05, "loss": 8.8821, "step": 101870 }, { "epoch": 0.5087767484831082, "grad_norm": 0.08922744542360306, "learning_rate": 1.4773586322561267e-05, "loss": 8.8883, "step": 101880 }, { "epoch": 0.5088266873080476, "grad_norm": 0.09103063493967056, "learning_rate": 1.4772084407619717e-05, "loss": 8.8571, "step": 101890 }, { "epoch": 0.5088766261329871, "grad_norm": 0.09650906175374985, "learning_rate": 1.4770582492678165e-05, "loss": 8.884, "step": 101900 }, { "epoch": 0.5089265649579265, "grad_norm": 0.09376221150159836, "learning_rate": 1.4769080577736614e-05, "loss": 8.8981, "step": 101910 }, { "epoch": 0.508976503782866, "grad_norm": 0.0935838520526886, "learning_rate": 1.4767578662795064e-05, "loss": 8.8788, "step": 101920 }, { "epoch": 0.5090264426078054, "grad_norm": 0.09474118798971176, "learning_rate": 1.4766076747853514e-05, "loss": 8.8921, "step": 101930 }, { "epoch": 0.5090763814327449, "grad_norm": 0.0944557636976242, "learning_rate": 1.4764574832911964e-05, "loss": 8.9048, "step": 101940 }, { "epoch": 0.5091263202576843, "grad_norm": 0.08811389654874802, "learning_rate": 1.4763072917970413e-05, "loss": 8.9097, "step": 101950 }, { "epoch": 0.5091762590826238, "grad_norm": 0.0878419429063797, "learning_rate": 1.4761571003028861e-05, "loss": 8.8932, "step": 101960 }, { "epoch": 0.5092261979075632, "grad_norm": 0.09334632754325867, "learning_rate": 1.4760069088087311e-05, "loss": 8.8877, "step": 101970 }, { "epoch": 0.5092761367325027, "grad_norm": 0.09611135721206665, "learning_rate": 1.4758567173145762e-05, "loss": 8.8956, "step": 101980 }, { "epoch": 0.5093260755574421, "grad_norm": 0.09940996766090393, "learning_rate": 1.4757065258204212e-05, "loss": 8.8933, "step": 101990 }, { "epoch": 0.5093760143823816, "grad_norm": 0.08453790843486786, "learning_rate": 1.475556334326266e-05, "loss": 8.8861, "step": 102000 }, { "epoch": 0.509425953207321, "grad_norm": 0.09275045245885849, "learning_rate": 1.475406142832111e-05, "loss": 8.8927, "step": 102010 }, { "epoch": 0.5094758920322605, "grad_norm": 0.09172289073467255, "learning_rate": 1.4752559513379559e-05, "loss": 8.8762, "step": 102020 }, { "epoch": 0.5095258308571999, "grad_norm": 0.09009222686290741, "learning_rate": 1.4751057598438009e-05, "loss": 8.9011, "step": 102030 }, { "epoch": 0.5095757696821394, "grad_norm": 0.09388335049152374, "learning_rate": 1.474955568349646e-05, "loss": 8.8875, "step": 102040 }, { "epoch": 0.5096257085070788, "grad_norm": 0.09411590546369553, "learning_rate": 1.4748053768554908e-05, "loss": 8.8942, "step": 102050 }, { "epoch": 0.5096756473320183, "grad_norm": 0.08968400955200195, "learning_rate": 1.4746551853613358e-05, "loss": 8.8953, "step": 102060 }, { "epoch": 0.5097255861569577, "grad_norm": 0.09362632036209106, "learning_rate": 1.4745049938671806e-05, "loss": 8.9011, "step": 102070 }, { "epoch": 0.5097755249818972, "grad_norm": 0.093928761780262, "learning_rate": 1.4743548023730257e-05, "loss": 8.8858, "step": 102080 }, { "epoch": 0.5098254638068366, "grad_norm": 0.09098640829324722, "learning_rate": 1.4742046108788707e-05, "loss": 8.8899, "step": 102090 }, { "epoch": 0.5098754026317761, "grad_norm": 0.09484419226646423, "learning_rate": 1.4740544193847155e-05, "loss": 8.8725, "step": 102100 }, { "epoch": 0.5099253414567155, "grad_norm": 0.0966237410902977, "learning_rate": 1.4739042278905605e-05, "loss": 8.8903, "step": 102110 }, { "epoch": 0.509975280281655, "grad_norm": 0.09729520231485367, "learning_rate": 1.4737540363964054e-05, "loss": 8.8814, "step": 102120 }, { "epoch": 0.5100252191065944, "grad_norm": 0.09406805038452148, "learning_rate": 1.4736038449022504e-05, "loss": 8.8763, "step": 102130 }, { "epoch": 0.5100751579315339, "grad_norm": 0.0967145785689354, "learning_rate": 1.4734536534080954e-05, "loss": 8.8686, "step": 102140 }, { "epoch": 0.5101250967564733, "grad_norm": 0.10264673829078674, "learning_rate": 1.4733034619139403e-05, "loss": 8.8916, "step": 102150 }, { "epoch": 0.5101750355814128, "grad_norm": 0.09559258073568344, "learning_rate": 1.4731532704197853e-05, "loss": 8.889, "step": 102160 }, { "epoch": 0.5102249744063522, "grad_norm": 0.09675032645463943, "learning_rate": 1.4730030789256303e-05, "loss": 8.8761, "step": 102170 }, { "epoch": 0.5102749132312917, "grad_norm": 0.09106460958719254, "learning_rate": 1.4728528874314752e-05, "loss": 8.89, "step": 102180 }, { "epoch": 0.5103248520562311, "grad_norm": 0.09537220746278763, "learning_rate": 1.4727026959373202e-05, "loss": 8.892, "step": 102190 }, { "epoch": 0.5103747908811705, "grad_norm": 0.08975797146558762, "learning_rate": 1.472552504443165e-05, "loss": 8.8853, "step": 102200 }, { "epoch": 0.51042472970611, "grad_norm": 0.09914405643939972, "learning_rate": 1.47240231294901e-05, "loss": 8.8851, "step": 102210 }, { "epoch": 0.5104746685310495, "grad_norm": 0.09528080374002457, "learning_rate": 1.472252121454855e-05, "loss": 8.9045, "step": 102220 }, { "epoch": 0.5105246073559889, "grad_norm": 0.09561271965503693, "learning_rate": 1.4721019299606999e-05, "loss": 8.8875, "step": 102230 }, { "epoch": 0.5105745461809283, "grad_norm": 0.09324440360069275, "learning_rate": 1.471951738466545e-05, "loss": 8.882, "step": 102240 }, { "epoch": 0.5106244850058678, "grad_norm": 0.10084079951047897, "learning_rate": 1.4718015469723898e-05, "loss": 8.8891, "step": 102250 }, { "epoch": 0.5106744238308073, "grad_norm": 0.09249073266983032, "learning_rate": 1.4716513554782348e-05, "loss": 8.8773, "step": 102260 }, { "epoch": 0.5107243626557467, "grad_norm": 0.09199045598506927, "learning_rate": 1.4715011639840798e-05, "loss": 8.8801, "step": 102270 }, { "epoch": 0.5107743014806861, "grad_norm": 0.09335954487323761, "learning_rate": 1.4713509724899247e-05, "loss": 8.8854, "step": 102280 }, { "epoch": 0.5108242403056256, "grad_norm": 0.09267157316207886, "learning_rate": 1.4712007809957697e-05, "loss": 8.8834, "step": 102290 }, { "epoch": 0.5108741791305651, "grad_norm": 0.0931268110871315, "learning_rate": 1.4710505895016145e-05, "loss": 8.8788, "step": 102300 }, { "epoch": 0.5109241179555045, "grad_norm": 0.09288343042135239, "learning_rate": 1.4709003980074595e-05, "loss": 8.8813, "step": 102310 }, { "epoch": 0.5109740567804439, "grad_norm": 0.0869889035820961, "learning_rate": 1.4707502065133046e-05, "loss": 8.8893, "step": 102320 }, { "epoch": 0.5110239956053834, "grad_norm": 0.0896022841334343, "learning_rate": 1.4706000150191496e-05, "loss": 8.8793, "step": 102330 }, { "epoch": 0.5110739344303229, "grad_norm": 0.09461673349142075, "learning_rate": 1.4704498235249944e-05, "loss": 8.8852, "step": 102340 }, { "epoch": 0.5111238732552623, "grad_norm": 0.09535450488328934, "learning_rate": 1.4702996320308393e-05, "loss": 8.8921, "step": 102350 }, { "epoch": 0.5111738120802017, "grad_norm": 0.09272834658622742, "learning_rate": 1.4701494405366843e-05, "loss": 8.9021, "step": 102360 }, { "epoch": 0.5112237509051412, "grad_norm": 0.09534373134374619, "learning_rate": 1.4699992490425293e-05, "loss": 8.8848, "step": 102370 }, { "epoch": 0.5112736897300807, "grad_norm": 0.09193330258131027, "learning_rate": 1.4698490575483743e-05, "loss": 8.8843, "step": 102380 }, { "epoch": 0.5113236285550201, "grad_norm": 0.09175775945186615, "learning_rate": 1.4696988660542192e-05, "loss": 8.895, "step": 102390 }, { "epoch": 0.5113735673799595, "grad_norm": 0.09013816714286804, "learning_rate": 1.469548674560064e-05, "loss": 8.8983, "step": 102400 }, { "epoch": 0.511423506204899, "grad_norm": 0.09730950742959976, "learning_rate": 1.469398483065909e-05, "loss": 8.8866, "step": 102410 }, { "epoch": 0.5114734450298385, "grad_norm": 0.09352081269025803, "learning_rate": 1.469248291571754e-05, "loss": 8.8842, "step": 102420 }, { "epoch": 0.5115233838547779, "grad_norm": 0.09047748893499374, "learning_rate": 1.469098100077599e-05, "loss": 8.8971, "step": 102430 }, { "epoch": 0.5115733226797173, "grad_norm": 0.09623833000659943, "learning_rate": 1.468947908583444e-05, "loss": 8.8938, "step": 102440 }, { "epoch": 0.5116232615046568, "grad_norm": 0.09208162873983383, "learning_rate": 1.4687977170892888e-05, "loss": 8.902, "step": 102450 }, { "epoch": 0.5116732003295963, "grad_norm": 0.0962912067770958, "learning_rate": 1.4686475255951338e-05, "loss": 8.8727, "step": 102460 }, { "epoch": 0.5117231391545357, "grad_norm": 0.0914090946316719, "learning_rate": 1.4684973341009788e-05, "loss": 8.8823, "step": 102470 }, { "epoch": 0.5117730779794751, "grad_norm": 0.09146260470151901, "learning_rate": 1.4683471426068238e-05, "loss": 8.9058, "step": 102480 }, { "epoch": 0.5118230168044146, "grad_norm": 0.09242929518222809, "learning_rate": 1.4681969511126688e-05, "loss": 8.8762, "step": 102490 }, { "epoch": 0.5118729556293541, "grad_norm": 0.09700184315443039, "learning_rate": 1.4680467596185135e-05, "loss": 8.8787, "step": 102500 }, { "epoch": 0.5119228944542935, "grad_norm": 0.08677946031093597, "learning_rate": 1.4678965681243585e-05, "loss": 8.8883, "step": 102510 }, { "epoch": 0.5119728332792329, "grad_norm": 0.09197265654802322, "learning_rate": 1.4677463766302036e-05, "loss": 8.8903, "step": 102520 }, { "epoch": 0.5120227721041724, "grad_norm": 0.08945990353822708, "learning_rate": 1.4675961851360486e-05, "loss": 8.8724, "step": 102530 }, { "epoch": 0.5120727109291119, "grad_norm": 0.08991879224777222, "learning_rate": 1.4674459936418936e-05, "loss": 8.8745, "step": 102540 }, { "epoch": 0.5121226497540513, "grad_norm": 0.09267672151327133, "learning_rate": 1.4672958021477383e-05, "loss": 8.8855, "step": 102550 }, { "epoch": 0.5121725885789907, "grad_norm": 0.09635213762521744, "learning_rate": 1.4671456106535833e-05, "loss": 8.8905, "step": 102560 }, { "epoch": 0.5122225274039302, "grad_norm": 0.0958060547709465, "learning_rate": 1.4669954191594283e-05, "loss": 8.9029, "step": 102570 }, { "epoch": 0.5122724662288697, "grad_norm": 0.09407208114862442, "learning_rate": 1.4668452276652733e-05, "loss": 8.8742, "step": 102580 }, { "epoch": 0.5123224050538091, "grad_norm": 0.09680324047803879, "learning_rate": 1.4666950361711183e-05, "loss": 8.8967, "step": 102590 }, { "epoch": 0.5123723438787485, "grad_norm": 0.09674806892871857, "learning_rate": 1.466544844676963e-05, "loss": 8.8878, "step": 102600 }, { "epoch": 0.512422282703688, "grad_norm": 0.08954492211341858, "learning_rate": 1.466394653182808e-05, "loss": 8.8912, "step": 102610 }, { "epoch": 0.5124722215286275, "grad_norm": 0.08924522995948792, "learning_rate": 1.466244461688653e-05, "loss": 8.8811, "step": 102620 }, { "epoch": 0.5125221603535669, "grad_norm": 0.09569413959980011, "learning_rate": 1.466094270194498e-05, "loss": 8.8828, "step": 102630 }, { "epoch": 0.5125720991785063, "grad_norm": 0.09132692217826843, "learning_rate": 1.4659440787003431e-05, "loss": 8.8799, "step": 102640 }, { "epoch": 0.5126220380034457, "grad_norm": 0.08919345587491989, "learning_rate": 1.4657938872061878e-05, "loss": 8.8885, "step": 102650 }, { "epoch": 0.5126719768283853, "grad_norm": 0.09804876893758774, "learning_rate": 1.4656436957120328e-05, "loss": 8.8798, "step": 102660 }, { "epoch": 0.5127219156533247, "grad_norm": 0.10200927406549454, "learning_rate": 1.4654935042178778e-05, "loss": 8.8672, "step": 102670 }, { "epoch": 0.5127718544782641, "grad_norm": 0.09064894169569016, "learning_rate": 1.4653433127237228e-05, "loss": 8.8719, "step": 102680 }, { "epoch": 0.5128217933032035, "grad_norm": 0.09494546800851822, "learning_rate": 1.4651931212295678e-05, "loss": 8.8996, "step": 102690 }, { "epoch": 0.5128717321281431, "grad_norm": 0.09214048832654953, "learning_rate": 1.4650429297354127e-05, "loss": 8.8922, "step": 102700 }, { "epoch": 0.5129216709530825, "grad_norm": 0.09070252627134323, "learning_rate": 1.4648927382412575e-05, "loss": 8.8887, "step": 102710 }, { "epoch": 0.5129716097780219, "grad_norm": 0.09582309424877167, "learning_rate": 1.4647425467471026e-05, "loss": 8.8841, "step": 102720 }, { "epoch": 0.5130215486029613, "grad_norm": 0.0906309261918068, "learning_rate": 1.4645923552529476e-05, "loss": 8.8794, "step": 102730 }, { "epoch": 0.5130714874279009, "grad_norm": 0.0934000313282013, "learning_rate": 1.4644421637587926e-05, "loss": 8.8729, "step": 102740 }, { "epoch": 0.5131214262528403, "grad_norm": 0.09085663408041, "learning_rate": 1.4642919722646374e-05, "loss": 8.8923, "step": 102750 }, { "epoch": 0.5131713650777797, "grad_norm": 0.0955960750579834, "learning_rate": 1.4641417807704823e-05, "loss": 8.8932, "step": 102760 }, { "epoch": 0.5132213039027191, "grad_norm": 0.09510260075330734, "learning_rate": 1.4639915892763273e-05, "loss": 8.8844, "step": 102770 }, { "epoch": 0.5132712427276587, "grad_norm": 0.0931367352604866, "learning_rate": 1.4638413977821723e-05, "loss": 8.8895, "step": 102780 }, { "epoch": 0.5133211815525981, "grad_norm": 0.09127708524465561, "learning_rate": 1.4636912062880173e-05, "loss": 8.8706, "step": 102790 }, { "epoch": 0.5133711203775375, "grad_norm": 0.09185762703418732, "learning_rate": 1.4635410147938622e-05, "loss": 8.8691, "step": 102800 }, { "epoch": 0.5134210592024769, "grad_norm": 0.08858191221952438, "learning_rate": 1.463390823299707e-05, "loss": 8.8763, "step": 102810 }, { "epoch": 0.5134709980274165, "grad_norm": 0.08831470459699631, "learning_rate": 1.463240631805552e-05, "loss": 8.8851, "step": 102820 }, { "epoch": 0.5135209368523559, "grad_norm": 0.08874993771314621, "learning_rate": 1.463090440311397e-05, "loss": 8.8826, "step": 102830 }, { "epoch": 0.5135708756772953, "grad_norm": 0.09777554869651794, "learning_rate": 1.4629402488172421e-05, "loss": 8.888, "step": 102840 }, { "epoch": 0.5136208145022347, "grad_norm": 0.08795005083084106, "learning_rate": 1.462790057323087e-05, "loss": 8.8808, "step": 102850 }, { "epoch": 0.5136707533271742, "grad_norm": 0.08897390216588974, "learning_rate": 1.462639865828932e-05, "loss": 8.8769, "step": 102860 }, { "epoch": 0.5137206921521137, "grad_norm": 0.09718848019838333, "learning_rate": 1.4624896743347768e-05, "loss": 8.8757, "step": 102870 }, { "epoch": 0.5137706309770531, "grad_norm": 0.09486740082502365, "learning_rate": 1.4623394828406218e-05, "loss": 8.8906, "step": 102880 }, { "epoch": 0.5138205698019925, "grad_norm": 0.09383400529623032, "learning_rate": 1.4621892913464668e-05, "loss": 8.8932, "step": 102890 }, { "epoch": 0.513870508626932, "grad_norm": 0.09229724854230881, "learning_rate": 1.4620390998523117e-05, "loss": 8.8677, "step": 102900 }, { "epoch": 0.5139204474518715, "grad_norm": 0.09365616738796234, "learning_rate": 1.4618889083581567e-05, "loss": 8.8787, "step": 102910 }, { "epoch": 0.5139703862768109, "grad_norm": 0.09437093883752823, "learning_rate": 1.4617387168640016e-05, "loss": 8.9, "step": 102920 }, { "epoch": 0.5140203251017503, "grad_norm": 0.09300998598337173, "learning_rate": 1.4615885253698466e-05, "loss": 8.88, "step": 102930 }, { "epoch": 0.5140702639266898, "grad_norm": 0.09167800098657608, "learning_rate": 1.4614383338756916e-05, "loss": 8.8927, "step": 102940 }, { "epoch": 0.5141202027516293, "grad_norm": 0.09113366156816483, "learning_rate": 1.4612881423815364e-05, "loss": 8.8802, "step": 102950 }, { "epoch": 0.5141701415765687, "grad_norm": 0.09514357149600983, "learning_rate": 1.4611379508873815e-05, "loss": 8.8725, "step": 102960 }, { "epoch": 0.5142200804015081, "grad_norm": 0.09392548352479935, "learning_rate": 1.4609877593932263e-05, "loss": 8.869, "step": 102970 }, { "epoch": 0.5142700192264476, "grad_norm": 0.08608120679855347, "learning_rate": 1.4608375678990713e-05, "loss": 8.8806, "step": 102980 }, { "epoch": 0.5143199580513871, "grad_norm": 0.09436332434415817, "learning_rate": 1.4606873764049163e-05, "loss": 8.8717, "step": 102990 }, { "epoch": 0.5143698968763265, "grad_norm": 0.09002230316400528, "learning_rate": 1.4605371849107612e-05, "loss": 8.8916, "step": 103000 }, { "epoch": 0.5144198357012659, "grad_norm": 0.09780488163232803, "learning_rate": 1.4603869934166062e-05, "loss": 8.8725, "step": 103010 }, { "epoch": 0.5144697745262053, "grad_norm": 0.09558107703924179, "learning_rate": 1.4602368019224512e-05, "loss": 8.8836, "step": 103020 }, { "epoch": 0.5145197133511449, "grad_norm": 0.09473099559545517, "learning_rate": 1.460086610428296e-05, "loss": 8.8739, "step": 103030 }, { "epoch": 0.5145696521760843, "grad_norm": 0.09364248812198639, "learning_rate": 1.4599364189341411e-05, "loss": 8.8709, "step": 103040 }, { "epoch": 0.5146195910010237, "grad_norm": 0.09198643267154694, "learning_rate": 1.459786227439986e-05, "loss": 8.8838, "step": 103050 }, { "epoch": 0.5146695298259631, "grad_norm": 0.0934210941195488, "learning_rate": 1.459636035945831e-05, "loss": 8.8783, "step": 103060 }, { "epoch": 0.5147194686509027, "grad_norm": 0.09671808034181595, "learning_rate": 1.459485844451676e-05, "loss": 8.8814, "step": 103070 }, { "epoch": 0.5147694074758421, "grad_norm": 0.0925278291106224, "learning_rate": 1.4593356529575208e-05, "loss": 8.8924, "step": 103080 }, { "epoch": 0.5148193463007815, "grad_norm": 0.09130702912807465, "learning_rate": 1.4591854614633658e-05, "loss": 8.8789, "step": 103090 }, { "epoch": 0.5148692851257209, "grad_norm": 0.0948934555053711, "learning_rate": 1.4590352699692107e-05, "loss": 8.8758, "step": 103100 }, { "epoch": 0.5149192239506605, "grad_norm": 0.09707729518413544, "learning_rate": 1.4588850784750557e-05, "loss": 8.8773, "step": 103110 }, { "epoch": 0.5149691627755999, "grad_norm": 0.09346991032361984, "learning_rate": 1.4587348869809007e-05, "loss": 8.8743, "step": 103120 }, { "epoch": 0.5150191016005393, "grad_norm": 0.09797349572181702, "learning_rate": 1.4585846954867456e-05, "loss": 8.8825, "step": 103130 }, { "epoch": 0.5150690404254787, "grad_norm": 0.08969218283891678, "learning_rate": 1.4584345039925906e-05, "loss": 8.889, "step": 103140 }, { "epoch": 0.5151189792504183, "grad_norm": 0.0924285352230072, "learning_rate": 1.4582843124984354e-05, "loss": 8.876, "step": 103150 }, { "epoch": 0.5151689180753577, "grad_norm": 0.0896601751446724, "learning_rate": 1.4581341210042805e-05, "loss": 8.8611, "step": 103160 }, { "epoch": 0.5152188569002971, "grad_norm": 0.08862532675266266, "learning_rate": 1.4579839295101255e-05, "loss": 8.8764, "step": 103170 }, { "epoch": 0.5152687957252365, "grad_norm": 0.09978237003087997, "learning_rate": 1.4578337380159705e-05, "loss": 8.883, "step": 103180 }, { "epoch": 0.5153187345501761, "grad_norm": 0.09299599379301071, "learning_rate": 1.4576835465218153e-05, "loss": 8.8806, "step": 103190 }, { "epoch": 0.5153686733751155, "grad_norm": 0.09699616581201553, "learning_rate": 1.4575333550276602e-05, "loss": 8.8871, "step": 103200 }, { "epoch": 0.5154186122000549, "grad_norm": 0.09479953348636627, "learning_rate": 1.4573831635335052e-05, "loss": 8.8893, "step": 103210 }, { "epoch": 0.5154685510249943, "grad_norm": 0.09587261080741882, "learning_rate": 1.4572329720393502e-05, "loss": 8.8802, "step": 103220 }, { "epoch": 0.5155184898499339, "grad_norm": 0.09195046871900558, "learning_rate": 1.4570827805451952e-05, "loss": 8.8887, "step": 103230 }, { "epoch": 0.5155684286748733, "grad_norm": 0.09111469984054565, "learning_rate": 1.4569325890510401e-05, "loss": 8.868, "step": 103240 }, { "epoch": 0.5156183674998127, "grad_norm": 0.09878696501255035, "learning_rate": 1.456782397556885e-05, "loss": 8.8859, "step": 103250 }, { "epoch": 0.5156683063247521, "grad_norm": 0.09132800251245499, "learning_rate": 1.45663220606273e-05, "loss": 8.8826, "step": 103260 }, { "epoch": 0.5157182451496917, "grad_norm": 0.0929095596075058, "learning_rate": 1.456482014568575e-05, "loss": 8.8912, "step": 103270 }, { "epoch": 0.5157681839746311, "grad_norm": 0.0883350670337677, "learning_rate": 1.45633182307442e-05, "loss": 8.8899, "step": 103280 }, { "epoch": 0.5158181227995705, "grad_norm": 0.0903296247124672, "learning_rate": 1.4561816315802648e-05, "loss": 8.8713, "step": 103290 }, { "epoch": 0.5158680616245099, "grad_norm": 0.09251569956541061, "learning_rate": 1.4560314400861097e-05, "loss": 8.8755, "step": 103300 }, { "epoch": 0.5159180004494495, "grad_norm": 0.10021259635686874, "learning_rate": 1.4558812485919547e-05, "loss": 8.8717, "step": 103310 }, { "epoch": 0.5159679392743889, "grad_norm": 0.09307809174060822, "learning_rate": 1.4557310570977997e-05, "loss": 8.861, "step": 103320 }, { "epoch": 0.5160178780993283, "grad_norm": 0.09546101093292236, "learning_rate": 1.4555808656036448e-05, "loss": 8.8763, "step": 103330 }, { "epoch": 0.5160678169242677, "grad_norm": 0.09137708693742752, "learning_rate": 1.4554306741094898e-05, "loss": 8.8986, "step": 103340 }, { "epoch": 0.5161177557492073, "grad_norm": 0.09687686711549759, "learning_rate": 1.4552804826153344e-05, "loss": 8.8711, "step": 103350 }, { "epoch": 0.5161676945741467, "grad_norm": 0.09281541407108307, "learning_rate": 1.4551302911211795e-05, "loss": 8.8645, "step": 103360 }, { "epoch": 0.5162176333990861, "grad_norm": 0.08926443755626678, "learning_rate": 1.4549800996270245e-05, "loss": 8.8634, "step": 103370 }, { "epoch": 0.5162675722240255, "grad_norm": 0.09436703473329544, "learning_rate": 1.4548299081328695e-05, "loss": 8.878, "step": 103380 }, { "epoch": 0.5163175110489651, "grad_norm": 0.09582871198654175, "learning_rate": 1.4546797166387145e-05, "loss": 8.8757, "step": 103390 }, { "epoch": 0.5163674498739045, "grad_norm": 0.09050548076629639, "learning_rate": 1.4545295251445592e-05, "loss": 8.8923, "step": 103400 }, { "epoch": 0.5164173886988439, "grad_norm": 0.0901259183883667, "learning_rate": 1.4543793336504042e-05, "loss": 8.8696, "step": 103410 }, { "epoch": 0.5164673275237833, "grad_norm": 0.09428418427705765, "learning_rate": 1.4542291421562492e-05, "loss": 8.882, "step": 103420 }, { "epoch": 0.5165172663487229, "grad_norm": 0.09372241050004959, "learning_rate": 1.4540789506620943e-05, "loss": 8.874, "step": 103430 }, { "epoch": 0.5165672051736623, "grad_norm": 0.09623238444328308, "learning_rate": 1.4539287591679393e-05, "loss": 8.8679, "step": 103440 }, { "epoch": 0.5166171439986017, "grad_norm": 0.09057226777076721, "learning_rate": 1.453778567673784e-05, "loss": 8.8789, "step": 103450 }, { "epoch": 0.5166670828235411, "grad_norm": 0.09329625219106674, "learning_rate": 1.453628376179629e-05, "loss": 8.874, "step": 103460 }, { "epoch": 0.5167170216484807, "grad_norm": 0.0941077321767807, "learning_rate": 1.453478184685474e-05, "loss": 8.8716, "step": 103470 }, { "epoch": 0.5167669604734201, "grad_norm": 0.09424999356269836, "learning_rate": 1.453327993191319e-05, "loss": 8.8834, "step": 103480 }, { "epoch": 0.5168168992983595, "grad_norm": 0.0937827080488205, "learning_rate": 1.453177801697164e-05, "loss": 8.8731, "step": 103490 }, { "epoch": 0.5168668381232989, "grad_norm": 0.0896645337343216, "learning_rate": 1.4530276102030089e-05, "loss": 8.8669, "step": 103500 }, { "epoch": 0.5169167769482385, "grad_norm": 0.08836094290018082, "learning_rate": 1.4528774187088537e-05, "loss": 8.8832, "step": 103510 }, { "epoch": 0.5169667157731779, "grad_norm": 0.0928412526845932, "learning_rate": 1.4527272272146987e-05, "loss": 8.8857, "step": 103520 }, { "epoch": 0.5170166545981173, "grad_norm": 0.09539926797151566, "learning_rate": 1.4525770357205438e-05, "loss": 8.892, "step": 103530 }, { "epoch": 0.5170665934230567, "grad_norm": 0.09582491964101791, "learning_rate": 1.4524268442263888e-05, "loss": 8.8688, "step": 103540 }, { "epoch": 0.5171165322479963, "grad_norm": 0.09870574623346329, "learning_rate": 1.4522766527322336e-05, "loss": 8.8783, "step": 103550 }, { "epoch": 0.5171664710729357, "grad_norm": 0.09343771636486053, "learning_rate": 1.4521264612380785e-05, "loss": 8.8707, "step": 103560 }, { "epoch": 0.5172164098978751, "grad_norm": 0.09553004056215286, "learning_rate": 1.4519762697439235e-05, "loss": 8.8827, "step": 103570 }, { "epoch": 0.5172663487228145, "grad_norm": 0.09276316314935684, "learning_rate": 1.4518260782497685e-05, "loss": 8.8747, "step": 103580 }, { "epoch": 0.517316287547754, "grad_norm": 0.09070324152708054, "learning_rate": 1.4516758867556135e-05, "loss": 8.876, "step": 103590 }, { "epoch": 0.5173662263726935, "grad_norm": 0.09403187781572342, "learning_rate": 1.4515256952614584e-05, "loss": 8.8583, "step": 103600 }, { "epoch": 0.5174161651976329, "grad_norm": 0.08794444054365158, "learning_rate": 1.4513755037673032e-05, "loss": 8.8818, "step": 103610 }, { "epoch": 0.5174661040225723, "grad_norm": 0.09099708497524261, "learning_rate": 1.4512253122731482e-05, "loss": 8.8783, "step": 103620 }, { "epoch": 0.5175160428475118, "grad_norm": 0.09580422937870026, "learning_rate": 1.4510751207789933e-05, "loss": 8.8714, "step": 103630 }, { "epoch": 0.5175659816724513, "grad_norm": 0.09501136839389801, "learning_rate": 1.4509249292848383e-05, "loss": 8.8734, "step": 103640 }, { "epoch": 0.5176159204973907, "grad_norm": 0.09438387304544449, "learning_rate": 1.4507747377906831e-05, "loss": 8.8821, "step": 103650 }, { "epoch": 0.5176658593223301, "grad_norm": 0.09567675739526749, "learning_rate": 1.4506245462965281e-05, "loss": 8.8686, "step": 103660 }, { "epoch": 0.5177157981472696, "grad_norm": 0.09035927057266235, "learning_rate": 1.450474354802373e-05, "loss": 8.8584, "step": 103670 }, { "epoch": 0.5177657369722091, "grad_norm": 0.09373793751001358, "learning_rate": 1.450324163308218e-05, "loss": 8.8612, "step": 103680 }, { "epoch": 0.5178156757971485, "grad_norm": 0.0980074554681778, "learning_rate": 1.450173971814063e-05, "loss": 8.8687, "step": 103690 }, { "epoch": 0.5178656146220879, "grad_norm": 0.09776370227336884, "learning_rate": 1.450023780319908e-05, "loss": 8.8841, "step": 103700 }, { "epoch": 0.5179155534470274, "grad_norm": 0.0890578031539917, "learning_rate": 1.4498735888257529e-05, "loss": 8.8819, "step": 103710 }, { "epoch": 0.5179654922719669, "grad_norm": 0.09932509809732437, "learning_rate": 1.4497233973315977e-05, "loss": 8.8684, "step": 103720 }, { "epoch": 0.5180154310969063, "grad_norm": 0.09279154241085052, "learning_rate": 1.4495732058374428e-05, "loss": 8.8734, "step": 103730 }, { "epoch": 0.5180653699218457, "grad_norm": 0.09163205325603485, "learning_rate": 1.4494230143432878e-05, "loss": 8.8798, "step": 103740 }, { "epoch": 0.5181153087467852, "grad_norm": 0.08860352635383606, "learning_rate": 1.4492728228491328e-05, "loss": 8.8876, "step": 103750 }, { "epoch": 0.5181652475717247, "grad_norm": 0.08968942612409592, "learning_rate": 1.4491226313549776e-05, "loss": 8.8661, "step": 103760 }, { "epoch": 0.5182151863966641, "grad_norm": 0.09817136079072952, "learning_rate": 1.4489724398608225e-05, "loss": 8.88, "step": 103770 }, { "epoch": 0.5182651252216035, "grad_norm": 0.09469393640756607, "learning_rate": 1.4488222483666675e-05, "loss": 8.8826, "step": 103780 }, { "epoch": 0.518315064046543, "grad_norm": 0.08931383490562439, "learning_rate": 1.4486720568725125e-05, "loss": 8.874, "step": 103790 }, { "epoch": 0.5183650028714825, "grad_norm": 0.09799165278673172, "learning_rate": 1.4485218653783575e-05, "loss": 8.8699, "step": 103800 }, { "epoch": 0.5184149416964219, "grad_norm": 0.09098178893327713, "learning_rate": 1.4483716738842024e-05, "loss": 8.8739, "step": 103810 }, { "epoch": 0.5184648805213613, "grad_norm": 0.0937158390879631, "learning_rate": 1.4482214823900474e-05, "loss": 8.8743, "step": 103820 }, { "epoch": 0.5185148193463008, "grad_norm": 0.09612367302179337, "learning_rate": 1.4480712908958923e-05, "loss": 8.8872, "step": 103830 }, { "epoch": 0.5185647581712403, "grad_norm": 0.08803288638591766, "learning_rate": 1.4479210994017373e-05, "loss": 8.8751, "step": 103840 }, { "epoch": 0.5186146969961797, "grad_norm": 0.09393215924501419, "learning_rate": 1.4477709079075823e-05, "loss": 8.8681, "step": 103850 }, { "epoch": 0.5186646358211191, "grad_norm": 0.09139379113912582, "learning_rate": 1.4476207164134271e-05, "loss": 8.8752, "step": 103860 }, { "epoch": 0.5187145746460585, "grad_norm": 0.08722906559705734, "learning_rate": 1.4474705249192722e-05, "loss": 8.8791, "step": 103870 }, { "epoch": 0.5187645134709981, "grad_norm": 0.09366084635257721, "learning_rate": 1.447320333425117e-05, "loss": 8.8777, "step": 103880 }, { "epoch": 0.5188144522959375, "grad_norm": 0.08771378546953201, "learning_rate": 1.447170141930962e-05, "loss": 8.8769, "step": 103890 }, { "epoch": 0.5188643911208769, "grad_norm": 0.09123623371124268, "learning_rate": 1.447019950436807e-05, "loss": 8.8813, "step": 103900 }, { "epoch": 0.5189143299458163, "grad_norm": 0.0887846052646637, "learning_rate": 1.4468697589426519e-05, "loss": 8.8682, "step": 103910 }, { "epoch": 0.5189642687707559, "grad_norm": 0.091937355697155, "learning_rate": 1.4467195674484969e-05, "loss": 8.8748, "step": 103920 }, { "epoch": 0.5190142075956953, "grad_norm": 0.09356644749641418, "learning_rate": 1.4465693759543418e-05, "loss": 8.8539, "step": 103930 }, { "epoch": 0.5190641464206347, "grad_norm": 0.08848299831151962, "learning_rate": 1.4464191844601868e-05, "loss": 8.8697, "step": 103940 }, { "epoch": 0.5191140852455741, "grad_norm": 0.0895775780081749, "learning_rate": 1.4462689929660318e-05, "loss": 8.8636, "step": 103950 }, { "epoch": 0.5191640240705137, "grad_norm": 0.09382645785808563, "learning_rate": 1.4461188014718766e-05, "loss": 8.8704, "step": 103960 }, { "epoch": 0.5192139628954531, "grad_norm": 0.0946570485830307, "learning_rate": 1.4459686099777217e-05, "loss": 8.8538, "step": 103970 }, { "epoch": 0.5192639017203925, "grad_norm": 0.09343206882476807, "learning_rate": 1.4458184184835667e-05, "loss": 8.8671, "step": 103980 }, { "epoch": 0.5193138405453319, "grad_norm": 0.09253936260938644, "learning_rate": 1.4456682269894115e-05, "loss": 8.8672, "step": 103990 }, { "epoch": 0.5193637793702715, "grad_norm": 0.0950196385383606, "learning_rate": 1.4455180354952565e-05, "loss": 8.8815, "step": 104000 }, { "epoch": 0.5194137181952109, "grad_norm": 0.09575220942497253, "learning_rate": 1.4453678440011014e-05, "loss": 8.8704, "step": 104010 }, { "epoch": 0.5194636570201503, "grad_norm": 0.09172710031270981, "learning_rate": 1.4452176525069464e-05, "loss": 8.8797, "step": 104020 }, { "epoch": 0.5195135958450897, "grad_norm": 0.09384613484144211, "learning_rate": 1.4450674610127914e-05, "loss": 8.8628, "step": 104030 }, { "epoch": 0.5195635346700292, "grad_norm": 0.09063451737165451, "learning_rate": 1.4449172695186363e-05, "loss": 8.8736, "step": 104040 }, { "epoch": 0.5196134734949687, "grad_norm": 0.09290114045143127, "learning_rate": 1.4447670780244813e-05, "loss": 8.8818, "step": 104050 }, { "epoch": 0.5196634123199081, "grad_norm": 0.09309238195419312, "learning_rate": 1.4446168865303261e-05, "loss": 8.8818, "step": 104060 }, { "epoch": 0.5197133511448475, "grad_norm": 0.09484042227268219, "learning_rate": 1.4444666950361712e-05, "loss": 8.882, "step": 104070 }, { "epoch": 0.519763289969787, "grad_norm": 0.09208900481462479, "learning_rate": 1.4443165035420162e-05, "loss": 8.8948, "step": 104080 }, { "epoch": 0.5198132287947265, "grad_norm": 0.09136916697025299, "learning_rate": 1.444166312047861e-05, "loss": 8.8841, "step": 104090 }, { "epoch": 0.5198631676196659, "grad_norm": 0.09338723123073578, "learning_rate": 1.444016120553706e-05, "loss": 8.8688, "step": 104100 }, { "epoch": 0.5199131064446053, "grad_norm": 0.09019609540700912, "learning_rate": 1.4438659290595509e-05, "loss": 8.8557, "step": 104110 }, { "epoch": 0.5199630452695448, "grad_norm": 0.09477616846561432, "learning_rate": 1.4437157375653959e-05, "loss": 8.8702, "step": 104120 }, { "epoch": 0.5200129840944843, "grad_norm": 0.09462156146764755, "learning_rate": 1.443565546071241e-05, "loss": 8.8836, "step": 104130 }, { "epoch": 0.5200629229194237, "grad_norm": 0.09140991419553757, "learning_rate": 1.443415354577086e-05, "loss": 8.8589, "step": 104140 }, { "epoch": 0.5201128617443631, "grad_norm": 0.09621613472700119, "learning_rate": 1.4432651630829308e-05, "loss": 8.8712, "step": 104150 }, { "epoch": 0.5201628005693026, "grad_norm": 0.09273172914981842, "learning_rate": 1.4431149715887756e-05, "loss": 8.8778, "step": 104160 }, { "epoch": 0.5202127393942421, "grad_norm": 0.09596338123083115, "learning_rate": 1.4429647800946207e-05, "loss": 8.8749, "step": 104170 }, { "epoch": 0.5202626782191815, "grad_norm": 0.09606953710317612, "learning_rate": 1.4428145886004657e-05, "loss": 8.8644, "step": 104180 }, { "epoch": 0.5203126170441209, "grad_norm": 0.08977267146110535, "learning_rate": 1.4426643971063107e-05, "loss": 8.8545, "step": 104190 }, { "epoch": 0.5203625558690604, "grad_norm": 0.09220623224973679, "learning_rate": 1.4425142056121555e-05, "loss": 8.8825, "step": 104200 }, { "epoch": 0.5204124946939999, "grad_norm": 0.09000903367996216, "learning_rate": 1.4423640141180004e-05, "loss": 8.8679, "step": 104210 }, { "epoch": 0.5204624335189393, "grad_norm": 0.09188482165336609, "learning_rate": 1.4422138226238454e-05, "loss": 8.8606, "step": 104220 }, { "epoch": 0.5205123723438787, "grad_norm": 0.09331043064594269, "learning_rate": 1.4420636311296904e-05, "loss": 8.8716, "step": 104230 }, { "epoch": 0.5205623111688182, "grad_norm": 0.0948970839381218, "learning_rate": 1.4419134396355354e-05, "loss": 8.8613, "step": 104240 }, { "epoch": 0.5206122499937577, "grad_norm": 0.09408802539110184, "learning_rate": 1.4417632481413803e-05, "loss": 8.8697, "step": 104250 }, { "epoch": 0.5206621888186971, "grad_norm": 0.10012494027614594, "learning_rate": 1.4416130566472251e-05, "loss": 8.8821, "step": 104260 }, { "epoch": 0.5207121276436365, "grad_norm": 0.08951479196548462, "learning_rate": 1.4414628651530702e-05, "loss": 8.8749, "step": 104270 }, { "epoch": 0.520762066468576, "grad_norm": 0.09365096688270569, "learning_rate": 1.4413126736589152e-05, "loss": 8.8729, "step": 104280 }, { "epoch": 0.5208120052935155, "grad_norm": 0.08924220502376556, "learning_rate": 1.4411624821647602e-05, "loss": 8.8587, "step": 104290 }, { "epoch": 0.5208619441184549, "grad_norm": 0.09431713819503784, "learning_rate": 1.4410122906706052e-05, "loss": 8.8691, "step": 104300 }, { "epoch": 0.5209118829433943, "grad_norm": 0.0971921905875206, "learning_rate": 1.4408620991764499e-05, "loss": 8.8626, "step": 104310 }, { "epoch": 0.5209618217683338, "grad_norm": 0.09287826716899872, "learning_rate": 1.4407119076822949e-05, "loss": 8.8534, "step": 104320 }, { "epoch": 0.5210117605932733, "grad_norm": 0.09646140784025192, "learning_rate": 1.44056171618814e-05, "loss": 8.8684, "step": 104330 }, { "epoch": 0.5210616994182127, "grad_norm": 0.09730952978134155, "learning_rate": 1.440411524693985e-05, "loss": 8.8817, "step": 104340 }, { "epoch": 0.5211116382431521, "grad_norm": 0.09068258851766586, "learning_rate": 1.44026133319983e-05, "loss": 8.8648, "step": 104350 }, { "epoch": 0.5211615770680916, "grad_norm": 0.09530194848775864, "learning_rate": 1.4401111417056746e-05, "loss": 8.8584, "step": 104360 }, { "epoch": 0.521211515893031, "grad_norm": 0.09383071959018707, "learning_rate": 1.4399609502115197e-05, "loss": 8.8508, "step": 104370 }, { "epoch": 0.5212614547179705, "grad_norm": 0.096278615295887, "learning_rate": 1.4398107587173647e-05, "loss": 8.8657, "step": 104380 }, { "epoch": 0.5213113935429099, "grad_norm": 0.09329640120267868, "learning_rate": 1.4396605672232097e-05, "loss": 8.8752, "step": 104390 }, { "epoch": 0.5213613323678494, "grad_norm": 0.0894843116402626, "learning_rate": 1.4395103757290547e-05, "loss": 8.8745, "step": 104400 }, { "epoch": 0.5214112711927888, "grad_norm": 0.0973014086484909, "learning_rate": 1.4393601842348994e-05, "loss": 8.8633, "step": 104410 }, { "epoch": 0.5214612100177283, "grad_norm": 0.09077692031860352, "learning_rate": 1.4392099927407444e-05, "loss": 8.8766, "step": 104420 }, { "epoch": 0.5215111488426677, "grad_norm": 0.09806881844997406, "learning_rate": 1.4390598012465894e-05, "loss": 8.868, "step": 104430 }, { "epoch": 0.5215610876676072, "grad_norm": 0.09557566046714783, "learning_rate": 1.4389096097524344e-05, "loss": 8.8595, "step": 104440 }, { "epoch": 0.5216110264925466, "grad_norm": 0.09814813733100891, "learning_rate": 1.4387594182582795e-05, "loss": 8.8594, "step": 104450 }, { "epoch": 0.5216609653174861, "grad_norm": 0.09561028331518173, "learning_rate": 1.4386092267641243e-05, "loss": 8.8795, "step": 104460 }, { "epoch": 0.5217109041424255, "grad_norm": 0.0946744978427887, "learning_rate": 1.4384590352699692e-05, "loss": 8.8686, "step": 104470 }, { "epoch": 0.521760842967365, "grad_norm": 0.08944974094629288, "learning_rate": 1.4383088437758142e-05, "loss": 8.851, "step": 104480 }, { "epoch": 0.5218107817923044, "grad_norm": 0.09138962626457214, "learning_rate": 1.4381586522816592e-05, "loss": 8.8796, "step": 104490 }, { "epoch": 0.5218607206172439, "grad_norm": 0.09289447963237762, "learning_rate": 1.4380084607875042e-05, "loss": 8.8598, "step": 104500 }, { "epoch": 0.5219106594421833, "grad_norm": 0.0965084508061409, "learning_rate": 1.437858269293349e-05, "loss": 8.8582, "step": 104510 }, { "epoch": 0.5219605982671228, "grad_norm": 0.08889494836330414, "learning_rate": 1.4377080777991939e-05, "loss": 8.8847, "step": 104520 }, { "epoch": 0.5220105370920622, "grad_norm": 0.09522352367639542, "learning_rate": 1.437557886305039e-05, "loss": 8.8416, "step": 104530 }, { "epoch": 0.5220604759170017, "grad_norm": 0.09857291728258133, "learning_rate": 1.437407694810884e-05, "loss": 8.8689, "step": 104540 }, { "epoch": 0.5221104147419411, "grad_norm": 0.0958976224064827, "learning_rate": 1.437257503316729e-05, "loss": 8.859, "step": 104550 }, { "epoch": 0.5221603535668806, "grad_norm": 0.08890080451965332, "learning_rate": 1.4371073118225738e-05, "loss": 8.8703, "step": 104560 }, { "epoch": 0.52221029239182, "grad_norm": 0.09524325281381607, "learning_rate": 1.4369571203284187e-05, "loss": 8.876, "step": 104570 }, { "epoch": 0.5222602312167595, "grad_norm": 0.0954352393746376, "learning_rate": 1.4368069288342637e-05, "loss": 8.867, "step": 104580 }, { "epoch": 0.5223101700416989, "grad_norm": 0.09157583862543106, "learning_rate": 1.4366567373401087e-05, "loss": 8.8728, "step": 104590 }, { "epoch": 0.5223601088666384, "grad_norm": 0.08985961973667145, "learning_rate": 1.4365065458459537e-05, "loss": 8.8768, "step": 104600 }, { "epoch": 0.5224100476915778, "grad_norm": 0.09679489582777023, "learning_rate": 1.4363563543517986e-05, "loss": 8.8566, "step": 104610 }, { "epoch": 0.5224599865165173, "grad_norm": 0.09118127077817917, "learning_rate": 1.4362061628576436e-05, "loss": 8.8611, "step": 104620 }, { "epoch": 0.5225099253414567, "grad_norm": 0.09494442492723465, "learning_rate": 1.4360559713634884e-05, "loss": 8.8769, "step": 104630 }, { "epoch": 0.5225598641663962, "grad_norm": 0.08483417332172394, "learning_rate": 1.4359057798693334e-05, "loss": 8.8627, "step": 104640 }, { "epoch": 0.5226098029913356, "grad_norm": 0.09283437579870224, "learning_rate": 1.4357555883751785e-05, "loss": 8.8751, "step": 104650 }, { "epoch": 0.5226597418162751, "grad_norm": 0.09502757340669632, "learning_rate": 1.4356053968810233e-05, "loss": 8.8703, "step": 104660 }, { "epoch": 0.5227096806412145, "grad_norm": 0.09698113799095154, "learning_rate": 1.4354552053868683e-05, "loss": 8.8747, "step": 104670 }, { "epoch": 0.522759619466154, "grad_norm": 0.09982874244451523, "learning_rate": 1.4353050138927132e-05, "loss": 8.8763, "step": 104680 }, { "epoch": 0.5228095582910934, "grad_norm": 0.09431293606758118, "learning_rate": 1.4351548223985582e-05, "loss": 8.875, "step": 104690 }, { "epoch": 0.5228594971160329, "grad_norm": 0.09267256408929825, "learning_rate": 1.4350046309044032e-05, "loss": 8.8649, "step": 104700 }, { "epoch": 0.5229094359409723, "grad_norm": 0.09294337034225464, "learning_rate": 1.434854439410248e-05, "loss": 8.8886, "step": 104710 }, { "epoch": 0.5229593747659118, "grad_norm": 0.09335044771432877, "learning_rate": 1.434704247916093e-05, "loss": 8.862, "step": 104720 }, { "epoch": 0.5230093135908512, "grad_norm": 0.094884492456913, "learning_rate": 1.434554056421938e-05, "loss": 8.8607, "step": 104730 }, { "epoch": 0.5230592524157907, "grad_norm": 0.10318083316087723, "learning_rate": 1.434403864927783e-05, "loss": 8.8615, "step": 104740 }, { "epoch": 0.5231091912407301, "grad_norm": 0.08785218000411987, "learning_rate": 1.434253673433628e-05, "loss": 8.8801, "step": 104750 }, { "epoch": 0.5231591300656696, "grad_norm": 0.09105756133794785, "learning_rate": 1.4341034819394728e-05, "loss": 8.88, "step": 104760 }, { "epoch": 0.523209068890609, "grad_norm": 0.09258095175027847, "learning_rate": 1.4339532904453178e-05, "loss": 8.8629, "step": 104770 }, { "epoch": 0.5232590077155485, "grad_norm": 0.09019656479358673, "learning_rate": 1.4338030989511628e-05, "loss": 8.876, "step": 104780 }, { "epoch": 0.5233089465404879, "grad_norm": 0.09507337957620621, "learning_rate": 1.4336529074570077e-05, "loss": 8.8558, "step": 104790 }, { "epoch": 0.5233588853654274, "grad_norm": 0.09072145074605942, "learning_rate": 1.4335027159628527e-05, "loss": 8.8707, "step": 104800 }, { "epoch": 0.5234088241903668, "grad_norm": 0.08817720413208008, "learning_rate": 1.4333525244686976e-05, "loss": 8.8654, "step": 104810 }, { "epoch": 0.5234587630153062, "grad_norm": 0.09037192910909653, "learning_rate": 1.4332023329745426e-05, "loss": 8.8517, "step": 104820 }, { "epoch": 0.5235087018402457, "grad_norm": 0.087149478495121, "learning_rate": 1.4330521414803876e-05, "loss": 8.8782, "step": 104830 }, { "epoch": 0.5235586406651851, "grad_norm": 0.10001854598522186, "learning_rate": 1.4329019499862324e-05, "loss": 8.8626, "step": 104840 }, { "epoch": 0.5236085794901246, "grad_norm": 0.09408778697252274, "learning_rate": 1.4327517584920775e-05, "loss": 8.8606, "step": 104850 }, { "epoch": 0.523658518315064, "grad_norm": 0.092310830950737, "learning_rate": 1.4326015669979223e-05, "loss": 8.8692, "step": 104860 }, { "epoch": 0.5237084571400035, "grad_norm": 0.0982789471745491, "learning_rate": 1.4324513755037673e-05, "loss": 8.8585, "step": 104870 }, { "epoch": 0.5237583959649429, "grad_norm": 0.09160380810499191, "learning_rate": 1.4323011840096124e-05, "loss": 8.8631, "step": 104880 }, { "epoch": 0.5238083347898824, "grad_norm": 0.09572508931159973, "learning_rate": 1.4321509925154572e-05, "loss": 8.8558, "step": 104890 }, { "epoch": 0.5238582736148218, "grad_norm": 0.09284794330596924, "learning_rate": 1.4320008010213022e-05, "loss": 8.8739, "step": 104900 }, { "epoch": 0.5239082124397613, "grad_norm": 0.09144847840070724, "learning_rate": 1.431850609527147e-05, "loss": 8.8543, "step": 104910 }, { "epoch": 0.5239581512647007, "grad_norm": 0.10097398608922958, "learning_rate": 1.431700418032992e-05, "loss": 8.8667, "step": 104920 }, { "epoch": 0.5240080900896402, "grad_norm": 0.10776495188474655, "learning_rate": 1.4315502265388371e-05, "loss": 8.8659, "step": 104930 }, { "epoch": 0.5240580289145796, "grad_norm": 0.08633417636156082, "learning_rate": 1.4314000350446821e-05, "loss": 8.8753, "step": 104940 }, { "epoch": 0.5241079677395191, "grad_norm": 0.08752107620239258, "learning_rate": 1.431249843550527e-05, "loss": 8.8829, "step": 104950 }, { "epoch": 0.5241579065644585, "grad_norm": 0.08872366696596146, "learning_rate": 1.4310996520563718e-05, "loss": 8.862, "step": 104960 }, { "epoch": 0.524207845389398, "grad_norm": 0.09275726228952408, "learning_rate": 1.4309494605622168e-05, "loss": 8.8588, "step": 104970 }, { "epoch": 0.5242577842143374, "grad_norm": 0.09212324768304825, "learning_rate": 1.4307992690680619e-05, "loss": 8.8612, "step": 104980 }, { "epoch": 0.5243077230392769, "grad_norm": 0.09363345056772232, "learning_rate": 1.4306490775739069e-05, "loss": 8.8581, "step": 104990 }, { "epoch": 0.5243576618642163, "grad_norm": 0.09286009520292282, "learning_rate": 1.4304988860797517e-05, "loss": 8.8635, "step": 105000 }, { "epoch": 0.5244076006891558, "grad_norm": 0.0911487564444542, "learning_rate": 1.4303486945855966e-05, "loss": 8.8541, "step": 105010 }, { "epoch": 0.5244575395140952, "grad_norm": 0.08649508655071259, "learning_rate": 1.4301985030914416e-05, "loss": 8.8619, "step": 105020 }, { "epoch": 0.5245074783390347, "grad_norm": 0.09261025488376617, "learning_rate": 1.4300483115972866e-05, "loss": 8.8467, "step": 105030 }, { "epoch": 0.5245574171639741, "grad_norm": 0.0908532589673996, "learning_rate": 1.4298981201031316e-05, "loss": 8.8492, "step": 105040 }, { "epoch": 0.5246073559889136, "grad_norm": 0.09164856374263763, "learning_rate": 1.4297479286089765e-05, "loss": 8.863, "step": 105050 }, { "epoch": 0.524657294813853, "grad_norm": 0.09361206740140915, "learning_rate": 1.4295977371148213e-05, "loss": 8.8672, "step": 105060 }, { "epoch": 0.5247072336387925, "grad_norm": 0.09860822558403015, "learning_rate": 1.4294475456206663e-05, "loss": 8.8629, "step": 105070 }, { "epoch": 0.5247571724637319, "grad_norm": 0.09220654517412186, "learning_rate": 1.4292973541265114e-05, "loss": 8.8683, "step": 105080 }, { "epoch": 0.5248071112886714, "grad_norm": 0.09145854413509369, "learning_rate": 1.4291471626323564e-05, "loss": 8.8745, "step": 105090 }, { "epoch": 0.5248570501136108, "grad_norm": 0.09256047010421753, "learning_rate": 1.4289969711382014e-05, "loss": 8.8524, "step": 105100 }, { "epoch": 0.5249069889385503, "grad_norm": 0.09105725586414337, "learning_rate": 1.428846779644046e-05, "loss": 8.8741, "step": 105110 }, { "epoch": 0.5249569277634897, "grad_norm": 0.09319113194942474, "learning_rate": 1.428696588149891e-05, "loss": 8.8626, "step": 105120 }, { "epoch": 0.5250068665884292, "grad_norm": 0.09418758749961853, "learning_rate": 1.4285463966557361e-05, "loss": 8.8647, "step": 105130 }, { "epoch": 0.5250568054133686, "grad_norm": 0.09596774727106094, "learning_rate": 1.4283962051615811e-05, "loss": 8.8574, "step": 105140 }, { "epoch": 0.525106744238308, "grad_norm": 0.09476704150438309, "learning_rate": 1.4282460136674261e-05, "loss": 8.8575, "step": 105150 }, { "epoch": 0.5251566830632475, "grad_norm": 0.09615634381771088, "learning_rate": 1.4280958221732708e-05, "loss": 8.8502, "step": 105160 }, { "epoch": 0.525206621888187, "grad_norm": 0.09071250259876251, "learning_rate": 1.4279456306791158e-05, "loss": 8.8623, "step": 105170 }, { "epoch": 0.5252565607131264, "grad_norm": 0.08948079496622086, "learning_rate": 1.4277954391849609e-05, "loss": 8.8616, "step": 105180 }, { "epoch": 0.5253064995380659, "grad_norm": 0.09837137907743454, "learning_rate": 1.4276452476908059e-05, "loss": 8.856, "step": 105190 }, { "epoch": 0.5253564383630053, "grad_norm": 0.09280554950237274, "learning_rate": 1.4274950561966509e-05, "loss": 8.8634, "step": 105200 }, { "epoch": 0.5254063771879448, "grad_norm": 0.09170987457036972, "learning_rate": 1.4273448647024956e-05, "loss": 8.8582, "step": 105210 }, { "epoch": 0.5254563160128842, "grad_norm": 0.09427595138549805, "learning_rate": 1.4271946732083406e-05, "loss": 8.8591, "step": 105220 }, { "epoch": 0.5255062548378236, "grad_norm": 0.08826853334903717, "learning_rate": 1.4270444817141856e-05, "loss": 8.8695, "step": 105230 }, { "epoch": 0.5255561936627631, "grad_norm": 0.09086894989013672, "learning_rate": 1.4268942902200306e-05, "loss": 8.8574, "step": 105240 }, { "epoch": 0.5256061324877026, "grad_norm": 0.09478379786014557, "learning_rate": 1.4267440987258756e-05, "loss": 8.8617, "step": 105250 }, { "epoch": 0.525656071312642, "grad_norm": 0.09793008863925934, "learning_rate": 1.4265939072317205e-05, "loss": 8.8654, "step": 105260 }, { "epoch": 0.5257060101375814, "grad_norm": 0.09823063015937805, "learning_rate": 1.4264437157375653e-05, "loss": 8.8549, "step": 105270 }, { "epoch": 0.5257559489625209, "grad_norm": 0.08841749280691147, "learning_rate": 1.4262935242434104e-05, "loss": 8.865, "step": 105280 }, { "epoch": 0.5258058877874604, "grad_norm": 0.09652768075466156, "learning_rate": 1.4261433327492554e-05, "loss": 8.8668, "step": 105290 }, { "epoch": 0.5258558266123998, "grad_norm": 0.09202666580677032, "learning_rate": 1.4259931412551004e-05, "loss": 8.8702, "step": 105300 }, { "epoch": 0.5259057654373392, "grad_norm": 0.09303510934114456, "learning_rate": 1.4258429497609452e-05, "loss": 8.8646, "step": 105310 }, { "epoch": 0.5259557042622787, "grad_norm": 0.09391511976718903, "learning_rate": 1.4256927582667901e-05, "loss": 8.8435, "step": 105320 }, { "epoch": 0.5260056430872182, "grad_norm": 0.10001968592405319, "learning_rate": 1.4255425667726351e-05, "loss": 8.8473, "step": 105330 }, { "epoch": 0.5260555819121576, "grad_norm": 0.09100840985774994, "learning_rate": 1.4253923752784801e-05, "loss": 8.8602, "step": 105340 }, { "epoch": 0.526105520737097, "grad_norm": 0.0969325453042984, "learning_rate": 1.4252421837843251e-05, "loss": 8.8614, "step": 105350 }, { "epoch": 0.5261554595620365, "grad_norm": 0.09638604521751404, "learning_rate": 1.42509199229017e-05, "loss": 8.8666, "step": 105360 }, { "epoch": 0.526205398386976, "grad_norm": 0.09278382360935211, "learning_rate": 1.4249418007960148e-05, "loss": 8.8661, "step": 105370 }, { "epoch": 0.5262553372119154, "grad_norm": 0.09172243624925613, "learning_rate": 1.4247916093018599e-05, "loss": 8.8472, "step": 105380 }, { "epoch": 0.5263052760368548, "grad_norm": 0.09195327013731003, "learning_rate": 1.4246414178077049e-05, "loss": 8.8642, "step": 105390 }, { "epoch": 0.5263552148617943, "grad_norm": 0.0913047194480896, "learning_rate": 1.4244912263135499e-05, "loss": 8.8626, "step": 105400 }, { "epoch": 0.5264051536867338, "grad_norm": 0.09170885384082794, "learning_rate": 1.4243410348193947e-05, "loss": 8.8508, "step": 105410 }, { "epoch": 0.5264550925116732, "grad_norm": 0.0922761857509613, "learning_rate": 1.4241908433252398e-05, "loss": 8.8671, "step": 105420 }, { "epoch": 0.5265050313366126, "grad_norm": 0.0899137631058693, "learning_rate": 1.4240406518310846e-05, "loss": 8.866, "step": 105430 }, { "epoch": 0.5265549701615521, "grad_norm": 0.10037946701049805, "learning_rate": 1.4238904603369296e-05, "loss": 8.8532, "step": 105440 }, { "epoch": 0.5266049089864916, "grad_norm": 0.08979129791259766, "learning_rate": 1.4237402688427746e-05, "loss": 8.8657, "step": 105450 }, { "epoch": 0.526654847811431, "grad_norm": 0.08900034427642822, "learning_rate": 1.4235900773486195e-05, "loss": 8.8669, "step": 105460 }, { "epoch": 0.5267047866363704, "grad_norm": 0.08913334459066391, "learning_rate": 1.4234398858544645e-05, "loss": 8.8633, "step": 105470 }, { "epoch": 0.5267547254613099, "grad_norm": 0.09597323089838028, "learning_rate": 1.4232896943603094e-05, "loss": 8.8568, "step": 105480 }, { "epoch": 0.5268046642862494, "grad_norm": 0.09855466336011887, "learning_rate": 1.4231395028661544e-05, "loss": 8.8733, "step": 105490 }, { "epoch": 0.5268546031111888, "grad_norm": 0.096231609582901, "learning_rate": 1.4229893113719994e-05, "loss": 8.8701, "step": 105500 }, { "epoch": 0.5269045419361282, "grad_norm": 0.09357398748397827, "learning_rate": 1.4228391198778442e-05, "loss": 8.8504, "step": 105510 }, { "epoch": 0.5269544807610677, "grad_norm": 0.09753704071044922, "learning_rate": 1.4226889283836893e-05, "loss": 8.8658, "step": 105520 }, { "epoch": 0.5270044195860072, "grad_norm": 0.08956428617238998, "learning_rate": 1.4225387368895341e-05, "loss": 8.8621, "step": 105530 }, { "epoch": 0.5270543584109466, "grad_norm": 0.10666527599096298, "learning_rate": 1.4223885453953791e-05, "loss": 8.8537, "step": 105540 }, { "epoch": 0.527104297235886, "grad_norm": 0.0884803831577301, "learning_rate": 1.4222383539012241e-05, "loss": 8.8577, "step": 105550 }, { "epoch": 0.5271542360608255, "grad_norm": 0.0955420434474945, "learning_rate": 1.422088162407069e-05, "loss": 8.8548, "step": 105560 }, { "epoch": 0.527204174885765, "grad_norm": 0.09400387853384018, "learning_rate": 1.421937970912914e-05, "loss": 8.8536, "step": 105570 }, { "epoch": 0.5272541137107044, "grad_norm": 0.09268182516098022, "learning_rate": 1.4217877794187589e-05, "loss": 8.8589, "step": 105580 }, { "epoch": 0.5273040525356438, "grad_norm": 0.09028761833906174, "learning_rate": 1.4216375879246039e-05, "loss": 8.8849, "step": 105590 }, { "epoch": 0.5273539913605833, "grad_norm": 0.09481693059206009, "learning_rate": 1.4214873964304489e-05, "loss": 8.8536, "step": 105600 }, { "epoch": 0.5274039301855228, "grad_norm": 0.09226229786872864, "learning_rate": 1.4213372049362937e-05, "loss": 8.8569, "step": 105610 }, { "epoch": 0.5274538690104622, "grad_norm": 0.0896633192896843, "learning_rate": 1.4211870134421388e-05, "loss": 8.8696, "step": 105620 }, { "epoch": 0.5275038078354016, "grad_norm": 0.09037711471319199, "learning_rate": 1.4210368219479838e-05, "loss": 8.8657, "step": 105630 }, { "epoch": 0.527553746660341, "grad_norm": 0.09311304241418839, "learning_rate": 1.4208866304538286e-05, "loss": 8.8558, "step": 105640 }, { "epoch": 0.5276036854852806, "grad_norm": 0.09499665349721909, "learning_rate": 1.4207364389596736e-05, "loss": 8.8671, "step": 105650 }, { "epoch": 0.52765362431022, "grad_norm": 0.09343437850475311, "learning_rate": 1.4205862474655185e-05, "loss": 8.8469, "step": 105660 }, { "epoch": 0.5277035631351594, "grad_norm": 0.09089982509613037, "learning_rate": 1.4204360559713635e-05, "loss": 8.8575, "step": 105670 }, { "epoch": 0.5277535019600988, "grad_norm": 0.09557026624679565, "learning_rate": 1.4202858644772085e-05, "loss": 8.8554, "step": 105680 }, { "epoch": 0.5278034407850384, "grad_norm": 0.09490370750427246, "learning_rate": 1.4201356729830534e-05, "loss": 8.8517, "step": 105690 }, { "epoch": 0.5278533796099778, "grad_norm": 0.09626539051532745, "learning_rate": 1.4199854814888984e-05, "loss": 8.8529, "step": 105700 }, { "epoch": 0.5279033184349172, "grad_norm": 0.09501594305038452, "learning_rate": 1.4198352899947432e-05, "loss": 8.8488, "step": 105710 }, { "epoch": 0.5279532572598566, "grad_norm": 0.09240279346704483, "learning_rate": 1.4196850985005883e-05, "loss": 8.855, "step": 105720 }, { "epoch": 0.5280031960847962, "grad_norm": 0.09040988236665726, "learning_rate": 1.4195349070064333e-05, "loss": 8.8664, "step": 105730 }, { "epoch": 0.5280531349097356, "grad_norm": 0.0923992171883583, "learning_rate": 1.4193847155122781e-05, "loss": 8.8492, "step": 105740 }, { "epoch": 0.528103073734675, "grad_norm": 0.09252621978521347, "learning_rate": 1.4192345240181231e-05, "loss": 8.8525, "step": 105750 }, { "epoch": 0.5281530125596144, "grad_norm": 0.09642771631479263, "learning_rate": 1.419084332523968e-05, "loss": 8.8541, "step": 105760 }, { "epoch": 0.528202951384554, "grad_norm": 0.09207062423229218, "learning_rate": 1.418934141029813e-05, "loss": 8.8631, "step": 105770 }, { "epoch": 0.5282528902094934, "grad_norm": 0.09391027688980103, "learning_rate": 1.418783949535658e-05, "loss": 8.8586, "step": 105780 }, { "epoch": 0.5283028290344328, "grad_norm": 0.09609290212392807, "learning_rate": 1.418633758041503e-05, "loss": 8.8679, "step": 105790 }, { "epoch": 0.5283527678593722, "grad_norm": 0.0963875949382782, "learning_rate": 1.4184835665473479e-05, "loss": 8.8622, "step": 105800 }, { "epoch": 0.5284027066843117, "grad_norm": 0.08848047256469727, "learning_rate": 1.4183333750531929e-05, "loss": 8.8517, "step": 105810 }, { "epoch": 0.5284526455092512, "grad_norm": 0.08758027851581573, "learning_rate": 1.4181831835590378e-05, "loss": 8.8608, "step": 105820 }, { "epoch": 0.5285025843341906, "grad_norm": 0.09200868755578995, "learning_rate": 1.4180329920648828e-05, "loss": 8.8544, "step": 105830 }, { "epoch": 0.52855252315913, "grad_norm": 0.09613315761089325, "learning_rate": 1.4178828005707278e-05, "loss": 8.8563, "step": 105840 }, { "epoch": 0.5286024619840695, "grad_norm": 0.09787869453430176, "learning_rate": 1.4177326090765726e-05, "loss": 8.8435, "step": 105850 }, { "epoch": 0.528652400809009, "grad_norm": 0.09556427597999573, "learning_rate": 1.4175824175824177e-05, "loss": 8.861, "step": 105860 }, { "epoch": 0.5287023396339484, "grad_norm": 0.09230974316596985, "learning_rate": 1.4174322260882625e-05, "loss": 8.8409, "step": 105870 }, { "epoch": 0.5287522784588878, "grad_norm": 0.0904126837849617, "learning_rate": 1.4172820345941075e-05, "loss": 8.8483, "step": 105880 }, { "epoch": 0.5288022172838273, "grad_norm": 0.09018577635288239, "learning_rate": 1.4171318430999525e-05, "loss": 8.8603, "step": 105890 }, { "epoch": 0.5288521561087668, "grad_norm": 0.09044212102890015, "learning_rate": 1.4169816516057974e-05, "loss": 8.845, "step": 105900 }, { "epoch": 0.5289020949337062, "grad_norm": 0.08735643327236176, "learning_rate": 1.4168314601116424e-05, "loss": 8.8518, "step": 105910 }, { "epoch": 0.5289520337586456, "grad_norm": 0.08877028524875641, "learning_rate": 1.4166812686174873e-05, "loss": 8.8487, "step": 105920 }, { "epoch": 0.5290019725835851, "grad_norm": 0.09317787736654282, "learning_rate": 1.4165310771233323e-05, "loss": 8.8551, "step": 105930 }, { "epoch": 0.5290519114085246, "grad_norm": 0.09516378492116928, "learning_rate": 1.4163808856291773e-05, "loss": 8.8644, "step": 105940 }, { "epoch": 0.529101850233464, "grad_norm": 0.08973415195941925, "learning_rate": 1.4162306941350223e-05, "loss": 8.876, "step": 105950 }, { "epoch": 0.5291517890584034, "grad_norm": 0.09267198294401169, "learning_rate": 1.4160805026408672e-05, "loss": 8.8482, "step": 105960 }, { "epoch": 0.5292017278833429, "grad_norm": 0.0925462618470192, "learning_rate": 1.415930311146712e-05, "loss": 8.8538, "step": 105970 }, { "epoch": 0.5292516667082824, "grad_norm": 0.09187739342451096, "learning_rate": 1.415780119652557e-05, "loss": 8.8542, "step": 105980 }, { "epoch": 0.5293016055332218, "grad_norm": 0.08892559260129929, "learning_rate": 1.415629928158402e-05, "loss": 8.8422, "step": 105990 }, { "epoch": 0.5293515443581612, "grad_norm": 0.09179945290088654, "learning_rate": 1.415479736664247e-05, "loss": 8.8534, "step": 106000 }, { "epoch": 0.5294014831831007, "grad_norm": 0.09214461594820023, "learning_rate": 1.4153295451700919e-05, "loss": 8.8695, "step": 106010 }, { "epoch": 0.5294514220080402, "grad_norm": 0.09491454809904099, "learning_rate": 1.4151793536759368e-05, "loss": 8.8725, "step": 106020 }, { "epoch": 0.5295013608329796, "grad_norm": 0.09679799526929855, "learning_rate": 1.4150291621817818e-05, "loss": 8.845, "step": 106030 }, { "epoch": 0.529551299657919, "grad_norm": 0.08733250945806503, "learning_rate": 1.4148789706876268e-05, "loss": 8.8541, "step": 106040 }, { "epoch": 0.5296012384828584, "grad_norm": 0.09057143330574036, "learning_rate": 1.4147287791934718e-05, "loss": 8.8618, "step": 106050 }, { "epoch": 0.529651177307798, "grad_norm": 0.09323775768280029, "learning_rate": 1.4145785876993167e-05, "loss": 8.8594, "step": 106060 }, { "epoch": 0.5297011161327374, "grad_norm": 0.09307857602834702, "learning_rate": 1.4144283962051615e-05, "loss": 8.8627, "step": 106070 }, { "epoch": 0.5297510549576768, "grad_norm": 0.09620136767625809, "learning_rate": 1.4142782047110065e-05, "loss": 8.8487, "step": 106080 }, { "epoch": 0.5298009937826162, "grad_norm": 0.08902984112501144, "learning_rate": 1.4141280132168515e-05, "loss": 8.866, "step": 106090 }, { "epoch": 0.5298509326075558, "grad_norm": 0.09401166439056396, "learning_rate": 1.4139778217226966e-05, "loss": 8.8448, "step": 106100 }, { "epoch": 0.5299008714324952, "grad_norm": 0.09507770091295242, "learning_rate": 1.4138276302285416e-05, "loss": 8.852, "step": 106110 }, { "epoch": 0.5299508102574346, "grad_norm": 0.09667683392763138, "learning_rate": 1.4136774387343863e-05, "loss": 8.8424, "step": 106120 }, { "epoch": 0.530000749082374, "grad_norm": 0.09889981150627136, "learning_rate": 1.4135272472402313e-05, "loss": 8.8661, "step": 106130 }, { "epoch": 0.5300506879073136, "grad_norm": 0.09269087016582489, "learning_rate": 1.4133770557460763e-05, "loss": 8.8558, "step": 106140 }, { "epoch": 0.530100626732253, "grad_norm": 0.09363644570112228, "learning_rate": 1.4132268642519213e-05, "loss": 8.8576, "step": 106150 }, { "epoch": 0.5301505655571924, "grad_norm": 0.09286314994096756, "learning_rate": 1.4130766727577663e-05, "loss": 8.8647, "step": 106160 }, { "epoch": 0.5302005043821318, "grad_norm": 0.09268610924482346, "learning_rate": 1.412926481263611e-05, "loss": 8.8578, "step": 106170 }, { "epoch": 0.5302504432070714, "grad_norm": 0.09902572631835938, "learning_rate": 1.412776289769456e-05, "loss": 8.8533, "step": 106180 }, { "epoch": 0.5303003820320108, "grad_norm": 0.093776173889637, "learning_rate": 1.412626098275301e-05, "loss": 8.8647, "step": 106190 }, { "epoch": 0.5303503208569502, "grad_norm": 0.0955788791179657, "learning_rate": 1.412475906781146e-05, "loss": 8.8529, "step": 106200 }, { "epoch": 0.5304002596818896, "grad_norm": 0.09192150086164474, "learning_rate": 1.412325715286991e-05, "loss": 8.8548, "step": 106210 }, { "epoch": 0.5304501985068292, "grad_norm": 0.09892907738685608, "learning_rate": 1.4121755237928358e-05, "loss": 8.8589, "step": 106220 }, { "epoch": 0.5305001373317686, "grad_norm": 0.09268810600042343, "learning_rate": 1.4120253322986808e-05, "loss": 8.8518, "step": 106230 }, { "epoch": 0.530550076156708, "grad_norm": 0.09231319278478622, "learning_rate": 1.4118751408045258e-05, "loss": 8.8329, "step": 106240 }, { "epoch": 0.5306000149816474, "grad_norm": 0.08841013163328171, "learning_rate": 1.4117249493103708e-05, "loss": 8.8476, "step": 106250 }, { "epoch": 0.530649953806587, "grad_norm": 0.09667849540710449, "learning_rate": 1.4115747578162158e-05, "loss": 8.8692, "step": 106260 }, { "epoch": 0.5306998926315264, "grad_norm": 0.0873061865568161, "learning_rate": 1.4114245663220607e-05, "loss": 8.8684, "step": 106270 }, { "epoch": 0.5307498314564658, "grad_norm": 0.0922415629029274, "learning_rate": 1.4112743748279055e-05, "loss": 8.8375, "step": 106280 }, { "epoch": 0.5307997702814052, "grad_norm": 0.09075237065553665, "learning_rate": 1.4111241833337505e-05, "loss": 8.8494, "step": 106290 }, { "epoch": 0.5308497091063448, "grad_norm": 0.09417222440242767, "learning_rate": 1.4109739918395956e-05, "loss": 8.8416, "step": 106300 }, { "epoch": 0.5308996479312842, "grad_norm": 0.09558428823947906, "learning_rate": 1.4108238003454406e-05, "loss": 8.8383, "step": 106310 }, { "epoch": 0.5309495867562236, "grad_norm": 0.10185667872428894, "learning_rate": 1.4106736088512854e-05, "loss": 8.8451, "step": 106320 }, { "epoch": 0.530999525581163, "grad_norm": 0.09136248379945755, "learning_rate": 1.4105234173571303e-05, "loss": 8.8492, "step": 106330 }, { "epoch": 0.5310494644061026, "grad_norm": 0.09055881202220917, "learning_rate": 1.4103732258629753e-05, "loss": 8.8517, "step": 106340 }, { "epoch": 0.531099403231042, "grad_norm": 0.08744243532419205, "learning_rate": 1.4102230343688203e-05, "loss": 8.8688, "step": 106350 }, { "epoch": 0.5311493420559814, "grad_norm": 0.0952538251876831, "learning_rate": 1.4100728428746653e-05, "loss": 8.8506, "step": 106360 }, { "epoch": 0.5311992808809208, "grad_norm": 0.0906098335981369, "learning_rate": 1.4099226513805102e-05, "loss": 8.8518, "step": 106370 }, { "epoch": 0.5312492197058604, "grad_norm": 0.0909523218870163, "learning_rate": 1.409772459886355e-05, "loss": 8.8542, "step": 106380 }, { "epoch": 0.5312991585307998, "grad_norm": 0.0970349833369255, "learning_rate": 1.4096222683922e-05, "loss": 8.8624, "step": 106390 }, { "epoch": 0.5313490973557392, "grad_norm": 0.09205695241689682, "learning_rate": 1.409472076898045e-05, "loss": 8.8506, "step": 106400 }, { "epoch": 0.5313990361806786, "grad_norm": 0.09964334964752197, "learning_rate": 1.40932188540389e-05, "loss": 8.8655, "step": 106410 }, { "epoch": 0.5314489750056182, "grad_norm": 0.0929349958896637, "learning_rate": 1.409171693909735e-05, "loss": 8.8504, "step": 106420 }, { "epoch": 0.5314989138305576, "grad_norm": 0.08960101008415222, "learning_rate": 1.40902150241558e-05, "loss": 8.8597, "step": 106430 }, { "epoch": 0.531548852655497, "grad_norm": 0.09919091314077377, "learning_rate": 1.4088713109214248e-05, "loss": 8.8413, "step": 106440 }, { "epoch": 0.5315987914804364, "grad_norm": 0.10037015378475189, "learning_rate": 1.4087211194272698e-05, "loss": 8.8489, "step": 106450 }, { "epoch": 0.531648730305376, "grad_norm": 0.09269418567419052, "learning_rate": 1.4085709279331148e-05, "loss": 8.8392, "step": 106460 }, { "epoch": 0.5316986691303154, "grad_norm": 0.09641455113887787, "learning_rate": 1.4084207364389597e-05, "loss": 8.8553, "step": 106470 }, { "epoch": 0.5317486079552548, "grad_norm": 0.09411516040563583, "learning_rate": 1.4082705449448047e-05, "loss": 8.845, "step": 106480 }, { "epoch": 0.5317985467801942, "grad_norm": 0.08999986946582794, "learning_rate": 1.4081203534506495e-05, "loss": 8.8416, "step": 106490 }, { "epoch": 0.5318484856051338, "grad_norm": 0.0922091007232666, "learning_rate": 1.4079701619564946e-05, "loss": 8.8469, "step": 106500 }, { "epoch": 0.5318984244300732, "grad_norm": 0.09420941770076752, "learning_rate": 1.4078199704623396e-05, "loss": 8.862, "step": 106510 }, { "epoch": 0.5319483632550126, "grad_norm": 0.09016049653291702, "learning_rate": 1.4076697789681844e-05, "loss": 8.8476, "step": 106520 }, { "epoch": 0.531998302079952, "grad_norm": 0.104548379778862, "learning_rate": 1.4075195874740295e-05, "loss": 8.8476, "step": 106530 }, { "epoch": 0.5320482409048916, "grad_norm": 0.08957797288894653, "learning_rate": 1.4073693959798743e-05, "loss": 8.856, "step": 106540 }, { "epoch": 0.532098179729831, "grad_norm": 0.09814672917127609, "learning_rate": 1.4072192044857193e-05, "loss": 8.8517, "step": 106550 }, { "epoch": 0.5321481185547704, "grad_norm": 0.09064110368490219, "learning_rate": 1.4070690129915643e-05, "loss": 8.8456, "step": 106560 }, { "epoch": 0.5321980573797098, "grad_norm": 0.09723041951656342, "learning_rate": 1.4069188214974092e-05, "loss": 8.8518, "step": 106570 }, { "epoch": 0.5322479962046494, "grad_norm": 0.09290885925292969, "learning_rate": 1.4067686300032542e-05, "loss": 8.8574, "step": 106580 }, { "epoch": 0.5322979350295888, "grad_norm": 0.09443046897649765, "learning_rate": 1.4066184385090992e-05, "loss": 8.8631, "step": 106590 }, { "epoch": 0.5323478738545282, "grad_norm": 0.09200415760278702, "learning_rate": 1.406468247014944e-05, "loss": 8.8491, "step": 106600 }, { "epoch": 0.5323978126794676, "grad_norm": 0.09204615652561188, "learning_rate": 1.4063180555207891e-05, "loss": 8.8424, "step": 106610 }, { "epoch": 0.5324477515044072, "grad_norm": 0.0978984385728836, "learning_rate": 1.406167864026634e-05, "loss": 8.8551, "step": 106620 }, { "epoch": 0.5324976903293466, "grad_norm": 0.10067642480134964, "learning_rate": 1.406017672532479e-05, "loss": 8.8473, "step": 106630 }, { "epoch": 0.532547629154286, "grad_norm": 0.09151472896337509, "learning_rate": 1.405867481038324e-05, "loss": 8.8594, "step": 106640 }, { "epoch": 0.5325975679792254, "grad_norm": 0.09709201753139496, "learning_rate": 1.4057172895441688e-05, "loss": 8.8443, "step": 106650 }, { "epoch": 0.532647506804165, "grad_norm": 0.09483177214860916, "learning_rate": 1.4055670980500138e-05, "loss": 8.8499, "step": 106660 }, { "epoch": 0.5326974456291044, "grad_norm": 0.09062449634075165, "learning_rate": 1.4054169065558587e-05, "loss": 8.8465, "step": 106670 }, { "epoch": 0.5327473844540438, "grad_norm": 0.08802146464586258, "learning_rate": 1.4052667150617037e-05, "loss": 8.8475, "step": 106680 }, { "epoch": 0.5327973232789832, "grad_norm": 0.09027039259672165, "learning_rate": 1.4051165235675487e-05, "loss": 8.8396, "step": 106690 }, { "epoch": 0.5328472621039227, "grad_norm": 0.08948853611946106, "learning_rate": 1.4049663320733936e-05, "loss": 8.855, "step": 106700 }, { "epoch": 0.5328972009288622, "grad_norm": 0.09329523891210556, "learning_rate": 1.4048161405792386e-05, "loss": 8.8439, "step": 106710 }, { "epoch": 0.5329471397538016, "grad_norm": 0.09256135672330856, "learning_rate": 1.4046659490850834e-05, "loss": 8.8441, "step": 106720 }, { "epoch": 0.532997078578741, "grad_norm": 0.0897359699010849, "learning_rate": 1.4045157575909285e-05, "loss": 8.8476, "step": 106730 }, { "epoch": 0.5330470174036805, "grad_norm": 0.10047424584627151, "learning_rate": 1.4043655660967735e-05, "loss": 8.858, "step": 106740 }, { "epoch": 0.53309695622862, "grad_norm": 0.09750477969646454, "learning_rate": 1.4042153746026185e-05, "loss": 8.8484, "step": 106750 }, { "epoch": 0.5331468950535594, "grad_norm": 0.09380090236663818, "learning_rate": 1.4040651831084633e-05, "loss": 8.8272, "step": 106760 }, { "epoch": 0.5331968338784988, "grad_norm": 0.09145848453044891, "learning_rate": 1.4039149916143082e-05, "loss": 8.8505, "step": 106770 }, { "epoch": 0.5332467727034383, "grad_norm": 0.09653259068727493, "learning_rate": 1.4037648001201532e-05, "loss": 8.837, "step": 106780 }, { "epoch": 0.5332967115283778, "grad_norm": 0.09636209160089493, "learning_rate": 1.4036146086259982e-05, "loss": 8.8567, "step": 106790 }, { "epoch": 0.5333466503533172, "grad_norm": 0.09500285983085632, "learning_rate": 1.4034644171318432e-05, "loss": 8.8523, "step": 106800 }, { "epoch": 0.5333965891782566, "grad_norm": 0.09158430993556976, "learning_rate": 1.4033142256376881e-05, "loss": 8.8477, "step": 106810 }, { "epoch": 0.533446528003196, "grad_norm": 0.09533253312110901, "learning_rate": 1.403164034143533e-05, "loss": 8.8366, "step": 106820 }, { "epoch": 0.5334964668281356, "grad_norm": 0.09380104392766953, "learning_rate": 1.403013842649378e-05, "loss": 8.8238, "step": 106830 }, { "epoch": 0.533546405653075, "grad_norm": 0.09858004003763199, "learning_rate": 1.402863651155223e-05, "loss": 8.8324, "step": 106840 }, { "epoch": 0.5335963444780144, "grad_norm": 0.09267483651638031, "learning_rate": 1.402713459661068e-05, "loss": 8.8385, "step": 106850 }, { "epoch": 0.5336462833029538, "grad_norm": 0.0965743437409401, "learning_rate": 1.4025632681669128e-05, "loss": 8.8602, "step": 106860 }, { "epoch": 0.5336962221278934, "grad_norm": 0.09583596140146255, "learning_rate": 1.4024130766727577e-05, "loss": 8.8519, "step": 106870 }, { "epoch": 0.5337461609528328, "grad_norm": 0.095204658806324, "learning_rate": 1.4022628851786027e-05, "loss": 8.8489, "step": 106880 }, { "epoch": 0.5337960997777722, "grad_norm": 0.09433889389038086, "learning_rate": 1.4021126936844477e-05, "loss": 8.8466, "step": 106890 }, { "epoch": 0.5338460386027116, "grad_norm": 0.09470517933368683, "learning_rate": 1.4019625021902927e-05, "loss": 8.8563, "step": 106900 }, { "epoch": 0.5338959774276512, "grad_norm": 0.09337898343801498, "learning_rate": 1.4018123106961378e-05, "loss": 8.842, "step": 106910 }, { "epoch": 0.5339459162525906, "grad_norm": 0.09626980870962143, "learning_rate": 1.4016621192019824e-05, "loss": 8.8568, "step": 106920 }, { "epoch": 0.53399585507753, "grad_norm": 0.09709392488002777, "learning_rate": 1.4015119277078275e-05, "loss": 8.8477, "step": 106930 }, { "epoch": 0.5340457939024694, "grad_norm": 0.09491003304719925, "learning_rate": 1.4013617362136725e-05, "loss": 8.8506, "step": 106940 }, { "epoch": 0.534095732727409, "grad_norm": 0.09251262992620468, "learning_rate": 1.4012115447195175e-05, "loss": 8.8449, "step": 106950 }, { "epoch": 0.5341456715523484, "grad_norm": 0.0920642837882042, "learning_rate": 1.4010613532253625e-05, "loss": 8.8396, "step": 106960 }, { "epoch": 0.5341956103772878, "grad_norm": 0.09323357790708542, "learning_rate": 1.4009111617312072e-05, "loss": 8.8498, "step": 106970 }, { "epoch": 0.5342455492022272, "grad_norm": 0.09278106689453125, "learning_rate": 1.4007609702370522e-05, "loss": 8.8429, "step": 106980 }, { "epoch": 0.5342954880271668, "grad_norm": 0.09746766835451126, "learning_rate": 1.4006107787428972e-05, "loss": 8.8352, "step": 106990 }, { "epoch": 0.5343454268521062, "grad_norm": 0.10587667673826218, "learning_rate": 1.4004605872487422e-05, "loss": 8.8457, "step": 107000 }, { "epoch": 0.5343953656770456, "grad_norm": 0.09702330827713013, "learning_rate": 1.4003103957545873e-05, "loss": 8.8515, "step": 107010 }, { "epoch": 0.534445304501985, "grad_norm": 0.09104038029909134, "learning_rate": 1.400160204260432e-05, "loss": 8.847, "step": 107020 }, { "epoch": 0.5344952433269246, "grad_norm": 0.08740893006324768, "learning_rate": 1.400010012766277e-05, "loss": 8.856, "step": 107030 }, { "epoch": 0.534545182151864, "grad_norm": 0.09178484231233597, "learning_rate": 1.399859821272122e-05, "loss": 8.8457, "step": 107040 }, { "epoch": 0.5345951209768034, "grad_norm": 0.09630772471427917, "learning_rate": 1.399709629777967e-05, "loss": 8.8488, "step": 107050 }, { "epoch": 0.5346450598017428, "grad_norm": 0.09321489930152893, "learning_rate": 1.399559438283812e-05, "loss": 8.845, "step": 107060 }, { "epoch": 0.5346949986266823, "grad_norm": 0.09223134070634842, "learning_rate": 1.3994092467896569e-05, "loss": 8.8505, "step": 107070 }, { "epoch": 0.5347449374516218, "grad_norm": 0.09391648322343826, "learning_rate": 1.3992590552955017e-05, "loss": 8.8556, "step": 107080 }, { "epoch": 0.5347948762765612, "grad_norm": 0.08899156749248505, "learning_rate": 1.3991088638013467e-05, "loss": 8.8542, "step": 107090 }, { "epoch": 0.5348448151015006, "grad_norm": 0.09277118742465973, "learning_rate": 1.3989586723071917e-05, "loss": 8.8285, "step": 107100 }, { "epoch": 0.5348947539264401, "grad_norm": 0.09124419838190079, "learning_rate": 1.3988084808130368e-05, "loss": 8.8385, "step": 107110 }, { "epoch": 0.5349446927513796, "grad_norm": 0.08537127077579498, "learning_rate": 1.3986582893188816e-05, "loss": 8.8404, "step": 107120 }, { "epoch": 0.534994631576319, "grad_norm": 0.09226098656654358, "learning_rate": 1.3985080978247265e-05, "loss": 8.8535, "step": 107130 }, { "epoch": 0.5350445704012584, "grad_norm": 0.09150153398513794, "learning_rate": 1.3983579063305715e-05, "loss": 8.8599, "step": 107140 }, { "epoch": 0.535094509226198, "grad_norm": 0.09004461765289307, "learning_rate": 1.3982077148364165e-05, "loss": 8.836, "step": 107150 }, { "epoch": 0.5351444480511374, "grad_norm": 0.08848071843385696, "learning_rate": 1.3980575233422615e-05, "loss": 8.84, "step": 107160 }, { "epoch": 0.5351943868760768, "grad_norm": 0.09761686623096466, "learning_rate": 1.3979073318481064e-05, "loss": 8.8489, "step": 107170 }, { "epoch": 0.5352443257010162, "grad_norm": 0.09823184460401535, "learning_rate": 1.3977571403539512e-05, "loss": 8.8443, "step": 107180 }, { "epoch": 0.5352942645259557, "grad_norm": 0.10020698606967926, "learning_rate": 1.3976069488597962e-05, "loss": 8.8401, "step": 107190 }, { "epoch": 0.5353442033508952, "grad_norm": 0.08891124278306961, "learning_rate": 1.3974567573656412e-05, "loss": 8.8526, "step": 107200 }, { "epoch": 0.5353941421758346, "grad_norm": 0.10143271088600159, "learning_rate": 1.3973065658714863e-05, "loss": 8.8288, "step": 107210 }, { "epoch": 0.535444081000774, "grad_norm": 0.09501948952674866, "learning_rate": 1.3971563743773311e-05, "loss": 8.8283, "step": 107220 }, { "epoch": 0.5354940198257135, "grad_norm": 0.09250184148550034, "learning_rate": 1.3970061828831761e-05, "loss": 8.8371, "step": 107230 }, { "epoch": 0.535543958650653, "grad_norm": 0.10043402016162872, "learning_rate": 1.396855991389021e-05, "loss": 8.8402, "step": 107240 }, { "epoch": 0.5355938974755924, "grad_norm": 0.0956840068101883, "learning_rate": 1.396705799894866e-05, "loss": 8.8494, "step": 107250 }, { "epoch": 0.5356438363005318, "grad_norm": 0.09393565356731415, "learning_rate": 1.396555608400711e-05, "loss": 8.8335, "step": 107260 }, { "epoch": 0.5356937751254713, "grad_norm": 0.09771479666233063, "learning_rate": 1.3964054169065559e-05, "loss": 8.8376, "step": 107270 }, { "epoch": 0.5357437139504108, "grad_norm": 0.08908728510141373, "learning_rate": 1.3962552254124009e-05, "loss": 8.8401, "step": 107280 }, { "epoch": 0.5357936527753502, "grad_norm": 0.08806288987398148, "learning_rate": 1.3961050339182457e-05, "loss": 8.8491, "step": 107290 }, { "epoch": 0.5358435916002896, "grad_norm": 0.09544865041971207, "learning_rate": 1.3959548424240907e-05, "loss": 8.8399, "step": 107300 }, { "epoch": 0.5358935304252291, "grad_norm": 0.0903419628739357, "learning_rate": 1.3958046509299358e-05, "loss": 8.8217, "step": 107310 }, { "epoch": 0.5359434692501686, "grad_norm": 0.09364979714155197, "learning_rate": 1.3956544594357806e-05, "loss": 8.8447, "step": 107320 }, { "epoch": 0.535993408075108, "grad_norm": 0.09100334346294403, "learning_rate": 1.3955042679416256e-05, "loss": 8.8389, "step": 107330 }, { "epoch": 0.5360433469000474, "grad_norm": 0.09545484930276871, "learning_rate": 1.3953540764474705e-05, "loss": 8.8371, "step": 107340 }, { "epoch": 0.5360932857249869, "grad_norm": 0.09141393005847931, "learning_rate": 1.3952038849533155e-05, "loss": 8.8562, "step": 107350 }, { "epoch": 0.5361432245499264, "grad_norm": 0.09346693754196167, "learning_rate": 1.3950536934591605e-05, "loss": 8.8436, "step": 107360 }, { "epoch": 0.5361931633748658, "grad_norm": 0.09371886402368546, "learning_rate": 1.3949035019650054e-05, "loss": 8.8336, "step": 107370 }, { "epoch": 0.5362431021998052, "grad_norm": 0.08820048719644547, "learning_rate": 1.3947533104708504e-05, "loss": 8.8447, "step": 107380 }, { "epoch": 0.5362930410247447, "grad_norm": 0.09496960043907166, "learning_rate": 1.3946031189766954e-05, "loss": 8.8354, "step": 107390 }, { "epoch": 0.5363429798496842, "grad_norm": 0.09885689616203308, "learning_rate": 1.3944529274825402e-05, "loss": 8.8273, "step": 107400 }, { "epoch": 0.5363929186746236, "grad_norm": 0.09907709807157516, "learning_rate": 1.3943027359883853e-05, "loss": 8.8314, "step": 107410 }, { "epoch": 0.536442857499563, "grad_norm": 0.08787566423416138, "learning_rate": 1.3941525444942301e-05, "loss": 8.8396, "step": 107420 }, { "epoch": 0.5364927963245025, "grad_norm": 0.0915251299738884, "learning_rate": 1.3940023530000751e-05, "loss": 8.8471, "step": 107430 }, { "epoch": 0.536542735149442, "grad_norm": 0.09245522320270538, "learning_rate": 1.3938521615059201e-05, "loss": 8.8446, "step": 107440 }, { "epoch": 0.5365926739743814, "grad_norm": 0.09490872919559479, "learning_rate": 1.393701970011765e-05, "loss": 8.8361, "step": 107450 }, { "epoch": 0.5366426127993208, "grad_norm": 0.1009124219417572, "learning_rate": 1.39355177851761e-05, "loss": 8.8215, "step": 107460 }, { "epoch": 0.5366925516242603, "grad_norm": 0.10078973323106766, "learning_rate": 1.3934015870234549e-05, "loss": 8.8342, "step": 107470 }, { "epoch": 0.5367424904491997, "grad_norm": 0.09087687730789185, "learning_rate": 1.3932513955292999e-05, "loss": 8.8498, "step": 107480 }, { "epoch": 0.5367924292741392, "grad_norm": 0.09109964966773987, "learning_rate": 1.3931012040351449e-05, "loss": 8.8493, "step": 107490 }, { "epoch": 0.5368423680990786, "grad_norm": 0.09533953666687012, "learning_rate": 1.3929510125409897e-05, "loss": 8.8549, "step": 107500 }, { "epoch": 0.5368923069240181, "grad_norm": 0.09448252618312836, "learning_rate": 1.3928008210468348e-05, "loss": 8.8328, "step": 107510 }, { "epoch": 0.5369422457489575, "grad_norm": 0.0955210030078888, "learning_rate": 1.3926506295526796e-05, "loss": 8.8328, "step": 107520 }, { "epoch": 0.536992184573897, "grad_norm": 0.10150817781686783, "learning_rate": 1.3925004380585246e-05, "loss": 8.8467, "step": 107530 }, { "epoch": 0.5370421233988364, "grad_norm": 0.09742490202188492, "learning_rate": 1.3923502465643696e-05, "loss": 8.8382, "step": 107540 }, { "epoch": 0.5370920622237759, "grad_norm": 0.09283208101987839, "learning_rate": 1.3922000550702147e-05, "loss": 8.8348, "step": 107550 }, { "epoch": 0.5371420010487153, "grad_norm": 0.09196760505437851, "learning_rate": 1.3920498635760595e-05, "loss": 8.8501, "step": 107560 }, { "epoch": 0.5371919398736548, "grad_norm": 0.09395366907119751, "learning_rate": 1.3918996720819044e-05, "loss": 8.8297, "step": 107570 }, { "epoch": 0.5372418786985942, "grad_norm": 0.09952932596206665, "learning_rate": 1.3917494805877494e-05, "loss": 8.8295, "step": 107580 }, { "epoch": 0.5372918175235337, "grad_norm": 0.10399974137544632, "learning_rate": 1.3915992890935944e-05, "loss": 8.8323, "step": 107590 }, { "epoch": 0.5373417563484731, "grad_norm": 0.08807992935180664, "learning_rate": 1.3914490975994394e-05, "loss": 8.8514, "step": 107600 }, { "epoch": 0.5373916951734126, "grad_norm": 0.09528198093175888, "learning_rate": 1.3912989061052843e-05, "loss": 8.8302, "step": 107610 }, { "epoch": 0.537441633998352, "grad_norm": 0.09758550673723221, "learning_rate": 1.3911487146111291e-05, "loss": 8.8455, "step": 107620 }, { "epoch": 0.5374915728232915, "grad_norm": 0.0921962559223175, "learning_rate": 1.3909985231169741e-05, "loss": 8.8425, "step": 107630 }, { "epoch": 0.5375415116482309, "grad_norm": 0.09394194185733795, "learning_rate": 1.3908483316228191e-05, "loss": 8.8426, "step": 107640 }, { "epoch": 0.5375914504731704, "grad_norm": 0.0899246484041214, "learning_rate": 1.3906981401286642e-05, "loss": 8.8407, "step": 107650 }, { "epoch": 0.5376413892981098, "grad_norm": 0.09396055340766907, "learning_rate": 1.390547948634509e-05, "loss": 8.8325, "step": 107660 }, { "epoch": 0.5376913281230493, "grad_norm": 0.09703001379966736, "learning_rate": 1.3903977571403539e-05, "loss": 8.8322, "step": 107670 }, { "epoch": 0.5377412669479887, "grad_norm": 0.08996451646089554, "learning_rate": 1.3902475656461989e-05, "loss": 8.8422, "step": 107680 }, { "epoch": 0.5377912057729282, "grad_norm": 0.09080103039741516, "learning_rate": 1.3900973741520439e-05, "loss": 8.8472, "step": 107690 }, { "epoch": 0.5378411445978676, "grad_norm": 0.09670452773571014, "learning_rate": 1.3899471826578889e-05, "loss": 8.8514, "step": 107700 }, { "epoch": 0.5378910834228071, "grad_norm": 0.0983625054359436, "learning_rate": 1.389796991163734e-05, "loss": 8.8386, "step": 107710 }, { "epoch": 0.5379410222477465, "grad_norm": 0.09571649879217148, "learning_rate": 1.3896467996695786e-05, "loss": 8.8353, "step": 107720 }, { "epoch": 0.537990961072686, "grad_norm": 0.0922119989991188, "learning_rate": 1.3894966081754236e-05, "loss": 8.8401, "step": 107730 }, { "epoch": 0.5380408998976254, "grad_norm": 0.08857207745313644, "learning_rate": 1.3893464166812686e-05, "loss": 8.842, "step": 107740 }, { "epoch": 0.5380908387225649, "grad_norm": 0.10182986408472061, "learning_rate": 1.3891962251871137e-05, "loss": 8.8435, "step": 107750 }, { "epoch": 0.5381407775475043, "grad_norm": 0.09429343789815903, "learning_rate": 1.3890460336929587e-05, "loss": 8.8431, "step": 107760 }, { "epoch": 0.5381907163724438, "grad_norm": 0.09139162302017212, "learning_rate": 1.3888958421988034e-05, "loss": 8.8568, "step": 107770 }, { "epoch": 0.5382406551973832, "grad_norm": 0.08870648592710495, "learning_rate": 1.3887456507046484e-05, "loss": 8.8555, "step": 107780 }, { "epoch": 0.5382905940223226, "grad_norm": 0.09010998159646988, "learning_rate": 1.3885954592104934e-05, "loss": 8.8466, "step": 107790 }, { "epoch": 0.5383405328472621, "grad_norm": 0.09655839204788208, "learning_rate": 1.3884452677163384e-05, "loss": 8.8326, "step": 107800 }, { "epoch": 0.5383904716722016, "grad_norm": 0.09276573359966278, "learning_rate": 1.3882950762221834e-05, "loss": 8.8574, "step": 107810 }, { "epoch": 0.538440410497141, "grad_norm": 0.0921722874045372, "learning_rate": 1.3881448847280281e-05, "loss": 8.8477, "step": 107820 }, { "epoch": 0.5384903493220804, "grad_norm": 0.0946490541100502, "learning_rate": 1.3879946932338731e-05, "loss": 8.8401, "step": 107830 }, { "epoch": 0.5385402881470199, "grad_norm": 0.0921718031167984, "learning_rate": 1.3878445017397181e-05, "loss": 8.8461, "step": 107840 }, { "epoch": 0.5385902269719594, "grad_norm": 0.08847850561141968, "learning_rate": 1.3876943102455632e-05, "loss": 8.8462, "step": 107850 }, { "epoch": 0.5386401657968988, "grad_norm": 0.09322067350149155, "learning_rate": 1.3875441187514082e-05, "loss": 8.8422, "step": 107860 }, { "epoch": 0.5386901046218382, "grad_norm": 0.09097786247730255, "learning_rate": 1.387393927257253e-05, "loss": 8.8435, "step": 107870 }, { "epoch": 0.5387400434467777, "grad_norm": 0.09428925067186356, "learning_rate": 1.3872437357630979e-05, "loss": 8.8407, "step": 107880 }, { "epoch": 0.5387899822717171, "grad_norm": 0.09316663444042206, "learning_rate": 1.3870935442689429e-05, "loss": 8.8444, "step": 107890 }, { "epoch": 0.5388399210966566, "grad_norm": 0.08978422731161118, "learning_rate": 1.386943352774788e-05, "loss": 8.839, "step": 107900 }, { "epoch": 0.538889859921596, "grad_norm": 0.09495079517364502, "learning_rate": 1.386793161280633e-05, "loss": 8.8695, "step": 107910 }, { "epoch": 0.5389397987465355, "grad_norm": 0.09116814285516739, "learning_rate": 1.3866429697864778e-05, "loss": 8.8255, "step": 107920 }, { "epoch": 0.538989737571475, "grad_norm": 0.0922946184873581, "learning_rate": 1.3864927782923226e-05, "loss": 8.8413, "step": 107930 }, { "epoch": 0.5390396763964144, "grad_norm": 0.09681878238916397, "learning_rate": 1.3863425867981676e-05, "loss": 8.8529, "step": 107940 }, { "epoch": 0.5390896152213538, "grad_norm": 0.0935935452580452, "learning_rate": 1.3861923953040127e-05, "loss": 8.8531, "step": 107950 }, { "epoch": 0.5391395540462933, "grad_norm": 0.09184682369232178, "learning_rate": 1.3860422038098577e-05, "loss": 8.8414, "step": 107960 }, { "epoch": 0.5391894928712327, "grad_norm": 0.10028470307588577, "learning_rate": 1.3858920123157025e-05, "loss": 8.8435, "step": 107970 }, { "epoch": 0.5392394316961722, "grad_norm": 0.09753447771072388, "learning_rate": 1.3857418208215474e-05, "loss": 8.8459, "step": 107980 }, { "epoch": 0.5392893705211116, "grad_norm": 0.10108836740255356, "learning_rate": 1.3855916293273924e-05, "loss": 8.8328, "step": 107990 }, { "epoch": 0.5393393093460511, "grad_norm": 0.08985279500484467, "learning_rate": 1.3854414378332374e-05, "loss": 8.8288, "step": 108000 }, { "epoch": 0.5393892481709905, "grad_norm": 0.08839620649814606, "learning_rate": 1.3852912463390824e-05, "loss": 8.8446, "step": 108010 }, { "epoch": 0.53943918699593, "grad_norm": 0.09369973093271255, "learning_rate": 1.3851410548449273e-05, "loss": 8.8246, "step": 108020 }, { "epoch": 0.5394891258208694, "grad_norm": 0.09340804815292358, "learning_rate": 1.3849908633507723e-05, "loss": 8.8316, "step": 108030 }, { "epoch": 0.5395390646458089, "grad_norm": 0.09237315505743027, "learning_rate": 1.3848406718566171e-05, "loss": 8.8389, "step": 108040 }, { "epoch": 0.5395890034707483, "grad_norm": 0.09002888947725296, "learning_rate": 1.3846904803624622e-05, "loss": 8.8388, "step": 108050 }, { "epoch": 0.5396389422956878, "grad_norm": 0.0946909487247467, "learning_rate": 1.3845402888683072e-05, "loss": 8.8354, "step": 108060 }, { "epoch": 0.5396888811206272, "grad_norm": 0.09334079921245575, "learning_rate": 1.3843900973741522e-05, "loss": 8.8316, "step": 108070 }, { "epoch": 0.5397388199455667, "grad_norm": 0.09467971324920654, "learning_rate": 1.384239905879997e-05, "loss": 8.8425, "step": 108080 }, { "epoch": 0.5397887587705061, "grad_norm": 0.09707115590572357, "learning_rate": 1.3840897143858419e-05, "loss": 8.8327, "step": 108090 }, { "epoch": 0.5398386975954456, "grad_norm": 0.09300867468118668, "learning_rate": 1.383939522891687e-05, "loss": 8.8463, "step": 108100 }, { "epoch": 0.539888636420385, "grad_norm": 0.09528551995754242, "learning_rate": 1.383789331397532e-05, "loss": 8.8379, "step": 108110 }, { "epoch": 0.5399385752453245, "grad_norm": 0.09320961683988571, "learning_rate": 1.383639139903377e-05, "loss": 8.8346, "step": 108120 }, { "epoch": 0.5399885140702639, "grad_norm": 0.09055766463279724, "learning_rate": 1.3834889484092218e-05, "loss": 8.8463, "step": 108130 }, { "epoch": 0.5400384528952034, "grad_norm": 0.09693289548158646, "learning_rate": 1.3833387569150667e-05, "loss": 8.8457, "step": 108140 }, { "epoch": 0.5400883917201428, "grad_norm": 0.10044632852077484, "learning_rate": 1.3831885654209117e-05, "loss": 8.8156, "step": 108150 }, { "epoch": 0.5401383305450823, "grad_norm": 0.09970605373382568, "learning_rate": 1.3830383739267567e-05, "loss": 8.8469, "step": 108160 }, { "epoch": 0.5401882693700217, "grad_norm": 0.09658732265233994, "learning_rate": 1.3828881824326017e-05, "loss": 8.8184, "step": 108170 }, { "epoch": 0.5402382081949612, "grad_norm": 0.0948546975851059, "learning_rate": 1.3827379909384466e-05, "loss": 8.8466, "step": 108180 }, { "epoch": 0.5402881470199006, "grad_norm": 0.09518400579690933, "learning_rate": 1.3825877994442916e-05, "loss": 8.8373, "step": 108190 }, { "epoch": 0.5403380858448401, "grad_norm": 0.09109831601381302, "learning_rate": 1.3824376079501364e-05, "loss": 8.8272, "step": 108200 }, { "epoch": 0.5403880246697795, "grad_norm": 0.08981281518936157, "learning_rate": 1.3822874164559814e-05, "loss": 8.8456, "step": 108210 }, { "epoch": 0.540437963494719, "grad_norm": 0.10105878114700317, "learning_rate": 1.3821372249618265e-05, "loss": 8.8423, "step": 108220 }, { "epoch": 0.5404879023196584, "grad_norm": 0.08803227543830872, "learning_rate": 1.3819870334676713e-05, "loss": 8.8416, "step": 108230 }, { "epoch": 0.5405378411445979, "grad_norm": 0.09488827735185623, "learning_rate": 1.3818368419735163e-05, "loss": 8.8317, "step": 108240 }, { "epoch": 0.5405877799695373, "grad_norm": 0.0969134047627449, "learning_rate": 1.3816866504793612e-05, "loss": 8.837, "step": 108250 }, { "epoch": 0.5406377187944768, "grad_norm": 0.09399174153804779, "learning_rate": 1.3815364589852062e-05, "loss": 8.8314, "step": 108260 }, { "epoch": 0.5406876576194162, "grad_norm": 0.08893413096666336, "learning_rate": 1.3813862674910512e-05, "loss": 8.8467, "step": 108270 }, { "epoch": 0.5407375964443557, "grad_norm": 0.09072422981262207, "learning_rate": 1.381236075996896e-05, "loss": 8.8399, "step": 108280 }, { "epoch": 0.5407875352692951, "grad_norm": 0.09168332815170288, "learning_rate": 1.381085884502741e-05, "loss": 8.8355, "step": 108290 }, { "epoch": 0.5408374740942345, "grad_norm": 0.09577570855617523, "learning_rate": 1.380935693008586e-05, "loss": 8.8358, "step": 108300 }, { "epoch": 0.540887412919174, "grad_norm": 0.09399871528148651, "learning_rate": 1.380785501514431e-05, "loss": 8.8198, "step": 108310 }, { "epoch": 0.5409373517441135, "grad_norm": 0.09029237180948257, "learning_rate": 1.380635310020276e-05, "loss": 8.8317, "step": 108320 }, { "epoch": 0.5409872905690529, "grad_norm": 0.09074974060058594, "learning_rate": 1.3804851185261208e-05, "loss": 8.8355, "step": 108330 }, { "epoch": 0.5410372293939923, "grad_norm": 0.08936341851949692, "learning_rate": 1.3803349270319658e-05, "loss": 8.8351, "step": 108340 }, { "epoch": 0.5410871682189318, "grad_norm": 0.08986354619264603, "learning_rate": 1.3801847355378108e-05, "loss": 8.8396, "step": 108350 }, { "epoch": 0.5411371070438713, "grad_norm": 0.0931684747338295, "learning_rate": 1.3800345440436557e-05, "loss": 8.8502, "step": 108360 }, { "epoch": 0.5411870458688107, "grad_norm": 0.09337104111909866, "learning_rate": 1.3798843525495007e-05, "loss": 8.841, "step": 108370 }, { "epoch": 0.5412369846937501, "grad_norm": 0.09039416164159775, "learning_rate": 1.3797341610553456e-05, "loss": 8.8413, "step": 108380 }, { "epoch": 0.5412869235186896, "grad_norm": 0.09265271574258804, "learning_rate": 1.3795839695611906e-05, "loss": 8.8387, "step": 108390 }, { "epoch": 0.5413368623436291, "grad_norm": 0.09323190897703171, "learning_rate": 1.3794337780670356e-05, "loss": 8.8338, "step": 108400 }, { "epoch": 0.5413868011685685, "grad_norm": 0.0942569449543953, "learning_rate": 1.3792835865728804e-05, "loss": 8.829, "step": 108410 }, { "epoch": 0.5414367399935079, "grad_norm": 0.09136468917131424, "learning_rate": 1.3791333950787255e-05, "loss": 8.8375, "step": 108420 }, { "epoch": 0.5414866788184474, "grad_norm": 0.09518197923898697, "learning_rate": 1.3789832035845703e-05, "loss": 8.8347, "step": 108430 }, { "epoch": 0.5415366176433869, "grad_norm": 0.09513925015926361, "learning_rate": 1.3788330120904153e-05, "loss": 8.8337, "step": 108440 }, { "epoch": 0.5415865564683263, "grad_norm": 0.09242982417345047, "learning_rate": 1.3786828205962603e-05, "loss": 8.8264, "step": 108450 }, { "epoch": 0.5416364952932657, "grad_norm": 0.09404724091291428, "learning_rate": 1.3785326291021052e-05, "loss": 8.8328, "step": 108460 }, { "epoch": 0.5416864341182052, "grad_norm": 0.09275107085704803, "learning_rate": 1.3783824376079502e-05, "loss": 8.8397, "step": 108470 }, { "epoch": 0.5417363729431447, "grad_norm": 0.09128131717443466, "learning_rate": 1.378232246113795e-05, "loss": 8.8265, "step": 108480 }, { "epoch": 0.5417863117680841, "grad_norm": 0.09248005598783493, "learning_rate": 1.37808205461964e-05, "loss": 8.8399, "step": 108490 }, { "epoch": 0.5418362505930235, "grad_norm": 0.09171829372644424, "learning_rate": 1.3779318631254851e-05, "loss": 8.8332, "step": 108500 }, { "epoch": 0.541886189417963, "grad_norm": 0.09409584105014801, "learning_rate": 1.37778167163133e-05, "loss": 8.8347, "step": 108510 }, { "epoch": 0.5419361282429025, "grad_norm": 0.09212516993284225, "learning_rate": 1.377631480137175e-05, "loss": 8.839, "step": 108520 }, { "epoch": 0.5419860670678419, "grad_norm": 0.0963674783706665, "learning_rate": 1.3774812886430198e-05, "loss": 8.8263, "step": 108530 }, { "epoch": 0.5420360058927813, "grad_norm": 0.09333939105272293, "learning_rate": 1.3773310971488648e-05, "loss": 8.8339, "step": 108540 }, { "epoch": 0.5420859447177208, "grad_norm": 0.0930102989077568, "learning_rate": 1.3771809056547098e-05, "loss": 8.8331, "step": 108550 }, { "epoch": 0.5421358835426603, "grad_norm": 0.09327764064073563, "learning_rate": 1.3770307141605549e-05, "loss": 8.8169, "step": 108560 }, { "epoch": 0.5421858223675997, "grad_norm": 0.09788885712623596, "learning_rate": 1.3768805226663997e-05, "loss": 8.8428, "step": 108570 }, { "epoch": 0.5422357611925391, "grad_norm": 0.08719142526388168, "learning_rate": 1.3767303311722446e-05, "loss": 8.835, "step": 108580 }, { "epoch": 0.5422857000174786, "grad_norm": 0.08899932354688644, "learning_rate": 1.3765801396780896e-05, "loss": 8.8379, "step": 108590 }, { "epoch": 0.5423356388424181, "grad_norm": 0.09376303106546402, "learning_rate": 1.3764299481839346e-05, "loss": 8.8337, "step": 108600 }, { "epoch": 0.5423855776673575, "grad_norm": 0.09086206555366516, "learning_rate": 1.3762797566897796e-05, "loss": 8.8522, "step": 108610 }, { "epoch": 0.5424355164922969, "grad_norm": 0.09616357088088989, "learning_rate": 1.3761295651956245e-05, "loss": 8.8464, "step": 108620 }, { "epoch": 0.5424854553172364, "grad_norm": 0.09458029270172119, "learning_rate": 1.3759793737014693e-05, "loss": 8.8334, "step": 108630 }, { "epoch": 0.5425353941421759, "grad_norm": 0.0973440557718277, "learning_rate": 1.3758291822073143e-05, "loss": 8.8272, "step": 108640 }, { "epoch": 0.5425853329671153, "grad_norm": 0.09475944191217422, "learning_rate": 1.3756789907131593e-05, "loss": 8.828, "step": 108650 }, { "epoch": 0.5426352717920547, "grad_norm": 0.09611568599939346, "learning_rate": 1.3755287992190044e-05, "loss": 8.8075, "step": 108660 }, { "epoch": 0.5426852106169942, "grad_norm": 0.09558840841054916, "learning_rate": 1.3753786077248492e-05, "loss": 8.8357, "step": 108670 }, { "epoch": 0.5427351494419337, "grad_norm": 0.09221713989973068, "learning_rate": 1.375228416230694e-05, "loss": 8.8487, "step": 108680 }, { "epoch": 0.5427850882668731, "grad_norm": 0.09657151997089386, "learning_rate": 1.375078224736539e-05, "loss": 8.8293, "step": 108690 }, { "epoch": 0.5428350270918125, "grad_norm": 0.09163092821836472, "learning_rate": 1.3749280332423841e-05, "loss": 8.8348, "step": 108700 }, { "epoch": 0.542884965916752, "grad_norm": 0.09311132878065109, "learning_rate": 1.3747778417482291e-05, "loss": 8.837, "step": 108710 }, { "epoch": 0.5429349047416915, "grad_norm": 0.08675689995288849, "learning_rate": 1.3746276502540741e-05, "loss": 8.8421, "step": 108720 }, { "epoch": 0.5429848435666309, "grad_norm": 0.08896356076002121, "learning_rate": 1.3744774587599188e-05, "loss": 8.8465, "step": 108730 }, { "epoch": 0.5430347823915703, "grad_norm": 0.09236080199480057, "learning_rate": 1.3743272672657638e-05, "loss": 8.8397, "step": 108740 }, { "epoch": 0.5430847212165097, "grad_norm": 0.09245762974023819, "learning_rate": 1.3741770757716088e-05, "loss": 8.835, "step": 108750 }, { "epoch": 0.5431346600414493, "grad_norm": 0.09627767652273178, "learning_rate": 1.3740268842774539e-05, "loss": 8.8153, "step": 108760 }, { "epoch": 0.5431845988663887, "grad_norm": 0.09583393484354019, "learning_rate": 1.3738766927832989e-05, "loss": 8.8408, "step": 108770 }, { "epoch": 0.5432345376913281, "grad_norm": 0.10037499666213989, "learning_rate": 1.3737265012891436e-05, "loss": 8.8385, "step": 108780 }, { "epoch": 0.5432844765162675, "grad_norm": 0.08847088366746902, "learning_rate": 1.3735763097949886e-05, "loss": 8.8244, "step": 108790 }, { "epoch": 0.543334415341207, "grad_norm": 0.0916975736618042, "learning_rate": 1.3734261183008336e-05, "loss": 8.8241, "step": 108800 }, { "epoch": 0.5433843541661465, "grad_norm": 0.08768840879201889, "learning_rate": 1.3732759268066786e-05, "loss": 8.84, "step": 108810 }, { "epoch": 0.5434342929910859, "grad_norm": 0.09283578395843506, "learning_rate": 1.3731257353125236e-05, "loss": 8.8298, "step": 108820 }, { "epoch": 0.5434842318160253, "grad_norm": 0.09757833927869797, "learning_rate": 1.3729755438183683e-05, "loss": 8.8251, "step": 108830 }, { "epoch": 0.5435341706409648, "grad_norm": 0.10008548200130463, "learning_rate": 1.3728253523242133e-05, "loss": 8.8276, "step": 108840 }, { "epoch": 0.5435841094659043, "grad_norm": 0.09164326637983322, "learning_rate": 1.3726751608300583e-05, "loss": 8.8318, "step": 108850 }, { "epoch": 0.5436340482908437, "grad_norm": 0.09030098468065262, "learning_rate": 1.3725249693359034e-05, "loss": 8.8277, "step": 108860 }, { "epoch": 0.5436839871157831, "grad_norm": 0.08942774683237076, "learning_rate": 1.3723747778417484e-05, "loss": 8.8314, "step": 108870 }, { "epoch": 0.5437339259407226, "grad_norm": 0.10168751329183578, "learning_rate": 1.3722245863475932e-05, "loss": 8.8215, "step": 108880 }, { "epoch": 0.5437838647656621, "grad_norm": 0.09753987193107605, "learning_rate": 1.372074394853438e-05, "loss": 8.8198, "step": 108890 }, { "epoch": 0.5438338035906015, "grad_norm": 0.10058535635471344, "learning_rate": 1.3719242033592831e-05, "loss": 8.8108, "step": 108900 }, { "epoch": 0.5438837424155409, "grad_norm": 0.09578339755535126, "learning_rate": 1.3717740118651281e-05, "loss": 8.8362, "step": 108910 }, { "epoch": 0.5439336812404804, "grad_norm": 0.08314099907875061, "learning_rate": 1.3716238203709731e-05, "loss": 8.8229, "step": 108920 }, { "epoch": 0.5439836200654199, "grad_norm": 0.08905912190675735, "learning_rate": 1.371473628876818e-05, "loss": 8.8382, "step": 108930 }, { "epoch": 0.5440335588903593, "grad_norm": 0.09270746260881424, "learning_rate": 1.3713234373826628e-05, "loss": 8.8224, "step": 108940 }, { "epoch": 0.5440834977152987, "grad_norm": 0.09719360619783401, "learning_rate": 1.3711732458885078e-05, "loss": 8.8304, "step": 108950 }, { "epoch": 0.5441334365402382, "grad_norm": 0.08986958116292953, "learning_rate": 1.3710230543943529e-05, "loss": 8.8256, "step": 108960 }, { "epoch": 0.5441833753651777, "grad_norm": 0.09314291179180145, "learning_rate": 1.3708728629001979e-05, "loss": 8.8262, "step": 108970 }, { "epoch": 0.5442333141901171, "grad_norm": 0.09784272313117981, "learning_rate": 1.3707226714060427e-05, "loss": 8.8172, "step": 108980 }, { "epoch": 0.5442832530150565, "grad_norm": 0.09231096506118774, "learning_rate": 1.3705724799118876e-05, "loss": 8.8399, "step": 108990 }, { "epoch": 0.544333191839996, "grad_norm": 0.09249245375394821, "learning_rate": 1.3704222884177326e-05, "loss": 8.8255, "step": 109000 }, { "epoch": 0.5443831306649355, "grad_norm": 0.09249752014875412, "learning_rate": 1.3702720969235776e-05, "loss": 8.8309, "step": 109010 }, { "epoch": 0.5444330694898749, "grad_norm": 0.09660474956035614, "learning_rate": 1.3701219054294226e-05, "loss": 8.8297, "step": 109020 }, { "epoch": 0.5444830083148143, "grad_norm": 0.09552586078643799, "learning_rate": 1.3699717139352675e-05, "loss": 8.8145, "step": 109030 }, { "epoch": 0.5445329471397538, "grad_norm": 0.09530143439769745, "learning_rate": 1.3698215224411125e-05, "loss": 8.82, "step": 109040 }, { "epoch": 0.5445828859646933, "grad_norm": 0.10076169669628143, "learning_rate": 1.3696713309469573e-05, "loss": 8.8208, "step": 109050 }, { "epoch": 0.5446328247896327, "grad_norm": 0.09382011741399765, "learning_rate": 1.3695211394528024e-05, "loss": 8.8229, "step": 109060 }, { "epoch": 0.5446827636145721, "grad_norm": 0.09154395759105682, "learning_rate": 1.3693709479586474e-05, "loss": 8.833, "step": 109070 }, { "epoch": 0.5447327024395116, "grad_norm": 0.0922236368060112, "learning_rate": 1.3692207564644922e-05, "loss": 8.8162, "step": 109080 }, { "epoch": 0.5447826412644511, "grad_norm": 0.09294591844081879, "learning_rate": 1.3690705649703372e-05, "loss": 8.8393, "step": 109090 }, { "epoch": 0.5448325800893905, "grad_norm": 0.09426495432853699, "learning_rate": 1.3689203734761821e-05, "loss": 8.8455, "step": 109100 }, { "epoch": 0.5448825189143299, "grad_norm": 0.09277299046516418, "learning_rate": 1.3687701819820271e-05, "loss": 8.8368, "step": 109110 }, { "epoch": 0.5449324577392693, "grad_norm": 0.09092520922422409, "learning_rate": 1.3686199904878721e-05, "loss": 8.8391, "step": 109120 }, { "epoch": 0.5449823965642089, "grad_norm": 0.0939490795135498, "learning_rate": 1.368469798993717e-05, "loss": 8.832, "step": 109130 }, { "epoch": 0.5450323353891483, "grad_norm": 0.09599693864583969, "learning_rate": 1.368319607499562e-05, "loss": 8.8155, "step": 109140 }, { "epoch": 0.5450822742140877, "grad_norm": 0.10206658393144608, "learning_rate": 1.3681694160054068e-05, "loss": 8.8281, "step": 109150 }, { "epoch": 0.5451322130390271, "grad_norm": 0.10047373175621033, "learning_rate": 1.3680192245112519e-05, "loss": 8.8277, "step": 109160 }, { "epoch": 0.5451821518639667, "grad_norm": 0.09348422288894653, "learning_rate": 1.3678690330170969e-05, "loss": 8.8309, "step": 109170 }, { "epoch": 0.5452320906889061, "grad_norm": 0.09665758162736893, "learning_rate": 1.3677188415229417e-05, "loss": 8.8489, "step": 109180 }, { "epoch": 0.5452820295138455, "grad_norm": 0.09331992268562317, "learning_rate": 1.3675686500287867e-05, "loss": 8.8213, "step": 109190 }, { "epoch": 0.545331968338785, "grad_norm": 0.08620268851518631, "learning_rate": 1.3674184585346318e-05, "loss": 8.8371, "step": 109200 }, { "epoch": 0.5453819071637245, "grad_norm": 0.09555631130933762, "learning_rate": 1.3672682670404766e-05, "loss": 8.819, "step": 109210 }, { "epoch": 0.5454318459886639, "grad_norm": 0.09607243537902832, "learning_rate": 1.3671180755463216e-05, "loss": 8.8082, "step": 109220 }, { "epoch": 0.5454817848136033, "grad_norm": 0.09805332869291306, "learning_rate": 1.3669678840521665e-05, "loss": 8.8269, "step": 109230 }, { "epoch": 0.5455317236385427, "grad_norm": 0.09287256002426147, "learning_rate": 1.3668176925580115e-05, "loss": 8.8131, "step": 109240 }, { "epoch": 0.5455816624634823, "grad_norm": 0.08924273401498795, "learning_rate": 1.3666675010638565e-05, "loss": 8.8197, "step": 109250 }, { "epoch": 0.5456316012884217, "grad_norm": 0.09110093861818314, "learning_rate": 1.3665173095697014e-05, "loss": 8.8382, "step": 109260 }, { "epoch": 0.5456815401133611, "grad_norm": 0.09054429084062576, "learning_rate": 1.3663671180755464e-05, "loss": 8.8259, "step": 109270 }, { "epoch": 0.5457314789383005, "grad_norm": 0.09228412806987762, "learning_rate": 1.3662169265813912e-05, "loss": 8.8154, "step": 109280 }, { "epoch": 0.5457814177632401, "grad_norm": 0.09101639688014984, "learning_rate": 1.3660667350872362e-05, "loss": 8.8172, "step": 109290 }, { "epoch": 0.5458313565881795, "grad_norm": 0.09964627027511597, "learning_rate": 1.3659165435930813e-05, "loss": 8.8302, "step": 109300 }, { "epoch": 0.5458812954131189, "grad_norm": 0.0952049046754837, "learning_rate": 1.3657663520989261e-05, "loss": 8.8477, "step": 109310 }, { "epoch": 0.5459312342380583, "grad_norm": 0.094510518014431, "learning_rate": 1.3656161606047711e-05, "loss": 8.8274, "step": 109320 }, { "epoch": 0.5459811730629979, "grad_norm": 0.09812169522047043, "learning_rate": 1.365465969110616e-05, "loss": 8.8186, "step": 109330 }, { "epoch": 0.5460311118879373, "grad_norm": 0.09070379287004471, "learning_rate": 1.365315777616461e-05, "loss": 8.8214, "step": 109340 }, { "epoch": 0.5460810507128767, "grad_norm": 0.09096910059452057, "learning_rate": 1.365165586122306e-05, "loss": 8.8309, "step": 109350 }, { "epoch": 0.5461309895378161, "grad_norm": 0.0929383784532547, "learning_rate": 1.365015394628151e-05, "loss": 8.8311, "step": 109360 }, { "epoch": 0.5461809283627557, "grad_norm": 0.09064075350761414, "learning_rate": 1.3648652031339959e-05, "loss": 8.8385, "step": 109370 }, { "epoch": 0.5462308671876951, "grad_norm": 0.09253614395856857, "learning_rate": 1.3647150116398407e-05, "loss": 8.8222, "step": 109380 }, { "epoch": 0.5462808060126345, "grad_norm": 0.09299398958683014, "learning_rate": 1.3645648201456857e-05, "loss": 8.8109, "step": 109390 }, { "epoch": 0.5463307448375739, "grad_norm": 0.09167538583278656, "learning_rate": 1.3644146286515308e-05, "loss": 8.8226, "step": 109400 }, { "epoch": 0.5463806836625135, "grad_norm": 0.09108640998601913, "learning_rate": 1.3642644371573758e-05, "loss": 8.823, "step": 109410 }, { "epoch": 0.5464306224874529, "grad_norm": 0.09583522379398346, "learning_rate": 1.3641142456632206e-05, "loss": 8.83, "step": 109420 }, { "epoch": 0.5464805613123923, "grad_norm": 0.08716997504234314, "learning_rate": 1.3639640541690655e-05, "loss": 8.8318, "step": 109430 }, { "epoch": 0.5465305001373317, "grad_norm": 0.08698141574859619, "learning_rate": 1.3638138626749105e-05, "loss": 8.8372, "step": 109440 }, { "epoch": 0.5465804389622713, "grad_norm": 0.08734197914600372, "learning_rate": 1.3636636711807555e-05, "loss": 8.8252, "step": 109450 }, { "epoch": 0.5466303777872107, "grad_norm": 0.08939076960086823, "learning_rate": 1.3635134796866005e-05, "loss": 8.8243, "step": 109460 }, { "epoch": 0.5466803166121501, "grad_norm": 0.09037525951862335, "learning_rate": 1.3633632881924454e-05, "loss": 8.8242, "step": 109470 }, { "epoch": 0.5467302554370895, "grad_norm": 0.09184879809617996, "learning_rate": 1.3632130966982902e-05, "loss": 8.8219, "step": 109480 }, { "epoch": 0.5467801942620291, "grad_norm": 0.09089411050081253, "learning_rate": 1.3630629052041352e-05, "loss": 8.8242, "step": 109490 }, { "epoch": 0.5468301330869685, "grad_norm": 0.09041132777929306, "learning_rate": 1.3629127137099803e-05, "loss": 8.8208, "step": 109500 }, { "epoch": 0.5468800719119079, "grad_norm": 0.08768140524625778, "learning_rate": 1.3627625222158253e-05, "loss": 8.8216, "step": 109510 }, { "epoch": 0.5469300107368473, "grad_norm": 0.08897078037261963, "learning_rate": 1.3626123307216703e-05, "loss": 8.8137, "step": 109520 }, { "epoch": 0.5469799495617869, "grad_norm": 0.0939832553267479, "learning_rate": 1.362462139227515e-05, "loss": 8.8204, "step": 109530 }, { "epoch": 0.5470298883867263, "grad_norm": 0.09440961480140686, "learning_rate": 1.36231194773336e-05, "loss": 8.8241, "step": 109540 }, { "epoch": 0.5470798272116657, "grad_norm": 0.08818075060844421, "learning_rate": 1.362161756239205e-05, "loss": 8.8393, "step": 109550 }, { "epoch": 0.5471297660366051, "grad_norm": 0.09029530733823776, "learning_rate": 1.36201156474505e-05, "loss": 8.8109, "step": 109560 }, { "epoch": 0.5471797048615447, "grad_norm": 0.09235231578350067, "learning_rate": 1.361861373250895e-05, "loss": 8.8183, "step": 109570 }, { "epoch": 0.5472296436864841, "grad_norm": 0.09568846970796585, "learning_rate": 1.3617111817567397e-05, "loss": 8.8189, "step": 109580 }, { "epoch": 0.5472795825114235, "grad_norm": 0.09308406710624695, "learning_rate": 1.3615609902625847e-05, "loss": 8.827, "step": 109590 }, { "epoch": 0.5473295213363629, "grad_norm": 0.09407919645309448, "learning_rate": 1.3614107987684298e-05, "loss": 8.8238, "step": 109600 }, { "epoch": 0.5473794601613025, "grad_norm": 0.09674926102161407, "learning_rate": 1.3612606072742748e-05, "loss": 8.8228, "step": 109610 }, { "epoch": 0.5474293989862419, "grad_norm": 0.09146232157945633, "learning_rate": 1.3611104157801198e-05, "loss": 8.8094, "step": 109620 }, { "epoch": 0.5474793378111813, "grad_norm": 0.09639618545770645, "learning_rate": 1.3609602242859645e-05, "loss": 8.8284, "step": 109630 }, { "epoch": 0.5475292766361207, "grad_norm": 0.09209806472063065, "learning_rate": 1.3608100327918095e-05, "loss": 8.8338, "step": 109640 }, { "epoch": 0.5475792154610603, "grad_norm": 0.09675581008195877, "learning_rate": 1.3606598412976545e-05, "loss": 8.8283, "step": 109650 }, { "epoch": 0.5476291542859997, "grad_norm": 0.09263081103563309, "learning_rate": 1.3605096498034995e-05, "loss": 8.8038, "step": 109660 }, { "epoch": 0.5476790931109391, "grad_norm": 0.09105950593948364, "learning_rate": 1.3603594583093446e-05, "loss": 8.8312, "step": 109670 }, { "epoch": 0.5477290319358785, "grad_norm": 0.09509497880935669, "learning_rate": 1.3602092668151894e-05, "loss": 8.831, "step": 109680 }, { "epoch": 0.547778970760818, "grad_norm": 0.09219329804182053, "learning_rate": 1.3600590753210343e-05, "loss": 8.8016, "step": 109690 }, { "epoch": 0.5478289095857575, "grad_norm": 0.09572474658489227, "learning_rate": 1.3599088838268793e-05, "loss": 8.8219, "step": 109700 }, { "epoch": 0.5478788484106969, "grad_norm": 0.10046906024217606, "learning_rate": 1.3597586923327243e-05, "loss": 8.8214, "step": 109710 }, { "epoch": 0.5479287872356363, "grad_norm": 0.09304700791835785, "learning_rate": 1.3596085008385693e-05, "loss": 8.8302, "step": 109720 }, { "epoch": 0.5479787260605758, "grad_norm": 0.09092891216278076, "learning_rate": 1.3594583093444142e-05, "loss": 8.8251, "step": 109730 }, { "epoch": 0.5480286648855153, "grad_norm": 0.09128131717443466, "learning_rate": 1.359308117850259e-05, "loss": 8.829, "step": 109740 }, { "epoch": 0.5480786037104547, "grad_norm": 0.08997046947479248, "learning_rate": 1.359157926356104e-05, "loss": 8.8308, "step": 109750 }, { "epoch": 0.5481285425353941, "grad_norm": 0.09499360620975494, "learning_rate": 1.359007734861949e-05, "loss": 8.8291, "step": 109760 }, { "epoch": 0.5481784813603335, "grad_norm": 0.09103532880544662, "learning_rate": 1.358857543367794e-05, "loss": 8.8168, "step": 109770 }, { "epoch": 0.5482284201852731, "grad_norm": 0.08995458483695984, "learning_rate": 1.3587073518736389e-05, "loss": 8.8261, "step": 109780 }, { "epoch": 0.5482783590102125, "grad_norm": 0.09766585379838943, "learning_rate": 1.3585571603794838e-05, "loss": 8.8222, "step": 109790 }, { "epoch": 0.5483282978351519, "grad_norm": 0.09343060851097107, "learning_rate": 1.3584069688853288e-05, "loss": 8.817, "step": 109800 }, { "epoch": 0.5483782366600913, "grad_norm": 0.09289675951004028, "learning_rate": 1.3582567773911738e-05, "loss": 8.8271, "step": 109810 }, { "epoch": 0.5484281754850309, "grad_norm": 0.0947795957326889, "learning_rate": 1.3581065858970188e-05, "loss": 8.8265, "step": 109820 }, { "epoch": 0.5484781143099703, "grad_norm": 0.09564196318387985, "learning_rate": 1.3579563944028637e-05, "loss": 8.823, "step": 109830 }, { "epoch": 0.5485280531349097, "grad_norm": 0.08977336436510086, "learning_rate": 1.3578062029087087e-05, "loss": 8.8181, "step": 109840 }, { "epoch": 0.5485779919598491, "grad_norm": 0.0958486795425415, "learning_rate": 1.3576560114145535e-05, "loss": 8.8321, "step": 109850 }, { "epoch": 0.5486279307847887, "grad_norm": 0.09179677069187164, "learning_rate": 1.3575058199203985e-05, "loss": 8.8342, "step": 109860 }, { "epoch": 0.5486778696097281, "grad_norm": 0.09745611250400543, "learning_rate": 1.3573556284262436e-05, "loss": 8.8287, "step": 109870 }, { "epoch": 0.5487278084346675, "grad_norm": 0.09767097979784012, "learning_rate": 1.3572054369320884e-05, "loss": 8.8168, "step": 109880 }, { "epoch": 0.5487777472596069, "grad_norm": 0.0993611216545105, "learning_rate": 1.3570552454379334e-05, "loss": 8.8124, "step": 109890 }, { "epoch": 0.5488276860845465, "grad_norm": 0.09202627837657928, "learning_rate": 1.3569050539437783e-05, "loss": 8.8306, "step": 109900 }, { "epoch": 0.5488776249094859, "grad_norm": 0.09640886634588242, "learning_rate": 1.3567548624496233e-05, "loss": 8.8248, "step": 109910 }, { "epoch": 0.5489275637344253, "grad_norm": 0.09196940064430237, "learning_rate": 1.3566046709554683e-05, "loss": 8.8267, "step": 109920 }, { "epoch": 0.5489775025593647, "grad_norm": 0.09446310251951218, "learning_rate": 1.3564544794613132e-05, "loss": 8.8257, "step": 109930 }, { "epoch": 0.5490274413843043, "grad_norm": 0.0930735394358635, "learning_rate": 1.3563042879671582e-05, "loss": 8.8324, "step": 109940 }, { "epoch": 0.5490773802092437, "grad_norm": 0.0945110097527504, "learning_rate": 1.356154096473003e-05, "loss": 8.8224, "step": 109950 }, { "epoch": 0.5491273190341831, "grad_norm": 0.09478355199098587, "learning_rate": 1.356003904978848e-05, "loss": 8.8194, "step": 109960 }, { "epoch": 0.5491772578591225, "grad_norm": 0.09091106802225113, "learning_rate": 1.355853713484693e-05, "loss": 8.8413, "step": 109970 }, { "epoch": 0.5492271966840621, "grad_norm": 0.0927327498793602, "learning_rate": 1.3557035219905379e-05, "loss": 8.8189, "step": 109980 }, { "epoch": 0.5492771355090015, "grad_norm": 0.08513586223125458, "learning_rate": 1.355553330496383e-05, "loss": 8.8279, "step": 109990 }, { "epoch": 0.5493270743339409, "grad_norm": 0.08400266617536545, "learning_rate": 1.355403139002228e-05, "loss": 8.8285, "step": 110000 }, { "epoch": 0.5493770131588803, "grad_norm": 0.09037226438522339, "learning_rate": 1.3552529475080728e-05, "loss": 8.8035, "step": 110010 }, { "epoch": 0.5494269519838199, "grad_norm": 0.09207148849964142, "learning_rate": 1.3551027560139178e-05, "loss": 8.8098, "step": 110020 }, { "epoch": 0.5494768908087593, "grad_norm": 0.09380757808685303, "learning_rate": 1.3549525645197627e-05, "loss": 8.8365, "step": 110030 }, { "epoch": 0.5495268296336987, "grad_norm": 0.10270896553993225, "learning_rate": 1.3548023730256077e-05, "loss": 8.8368, "step": 110040 }, { "epoch": 0.5495767684586381, "grad_norm": 0.09271609783172607, "learning_rate": 1.3546521815314527e-05, "loss": 8.8293, "step": 110050 }, { "epoch": 0.5496267072835777, "grad_norm": 0.0941663458943367, "learning_rate": 1.3545019900372975e-05, "loss": 8.8156, "step": 110060 }, { "epoch": 0.5496766461085171, "grad_norm": 0.09754271805286407, "learning_rate": 1.3543517985431426e-05, "loss": 8.8233, "step": 110070 }, { "epoch": 0.5497265849334565, "grad_norm": 0.09003855288028717, "learning_rate": 1.3542016070489874e-05, "loss": 8.8332, "step": 110080 }, { "epoch": 0.5497765237583959, "grad_norm": 0.08914513885974884, "learning_rate": 1.3540514155548324e-05, "loss": 8.8161, "step": 110090 }, { "epoch": 0.5498264625833355, "grad_norm": 0.09949744492769241, "learning_rate": 1.3539012240606774e-05, "loss": 8.8143, "step": 110100 }, { "epoch": 0.5498764014082749, "grad_norm": 0.0968717560172081, "learning_rate": 1.3537510325665223e-05, "loss": 8.8253, "step": 110110 }, { "epoch": 0.5499263402332143, "grad_norm": 0.09448512643575668, "learning_rate": 1.3536008410723673e-05, "loss": 8.8268, "step": 110120 }, { "epoch": 0.5499762790581537, "grad_norm": 0.09558204561471939, "learning_rate": 1.3534506495782123e-05, "loss": 8.8139, "step": 110130 }, { "epoch": 0.5500262178830932, "grad_norm": 0.09001177549362183, "learning_rate": 1.3533004580840572e-05, "loss": 8.8172, "step": 110140 }, { "epoch": 0.5500761567080327, "grad_norm": 0.09885483235120773, "learning_rate": 1.3531502665899022e-05, "loss": 8.8299, "step": 110150 }, { "epoch": 0.5501260955329721, "grad_norm": 0.09453386068344116, "learning_rate": 1.3530000750957472e-05, "loss": 8.8231, "step": 110160 }, { "epoch": 0.5501760343579115, "grad_norm": 0.09200888872146606, "learning_rate": 1.352849883601592e-05, "loss": 8.8221, "step": 110170 }, { "epoch": 0.550225973182851, "grad_norm": 0.09591957181692123, "learning_rate": 1.352699692107437e-05, "loss": 8.8092, "step": 110180 }, { "epoch": 0.5502759120077905, "grad_norm": 0.09270251542329788, "learning_rate": 1.352549500613282e-05, "loss": 8.824, "step": 110190 }, { "epoch": 0.5503258508327299, "grad_norm": 0.0944395586848259, "learning_rate": 1.352399309119127e-05, "loss": 8.8054, "step": 110200 }, { "epoch": 0.5503757896576693, "grad_norm": 0.09260264784097672, "learning_rate": 1.352249117624972e-05, "loss": 8.8058, "step": 110210 }, { "epoch": 0.5504257284826088, "grad_norm": 0.09659282863140106, "learning_rate": 1.3520989261308168e-05, "loss": 8.8116, "step": 110220 }, { "epoch": 0.5504756673075483, "grad_norm": 0.10288282483816147, "learning_rate": 1.3519487346366618e-05, "loss": 8.8174, "step": 110230 }, { "epoch": 0.5505256061324877, "grad_norm": 0.08334256708621979, "learning_rate": 1.3517985431425067e-05, "loss": 8.8262, "step": 110240 }, { "epoch": 0.5505755449574271, "grad_norm": 0.0915384441614151, "learning_rate": 1.3516483516483517e-05, "loss": 8.8317, "step": 110250 }, { "epoch": 0.5506254837823666, "grad_norm": 0.09057852625846863, "learning_rate": 1.3514981601541967e-05, "loss": 8.8207, "step": 110260 }, { "epoch": 0.5506754226073061, "grad_norm": 0.09614181518554688, "learning_rate": 1.3513479686600416e-05, "loss": 8.8095, "step": 110270 }, { "epoch": 0.5507253614322455, "grad_norm": 0.09297794103622437, "learning_rate": 1.3511977771658866e-05, "loss": 8.8024, "step": 110280 }, { "epoch": 0.5507753002571849, "grad_norm": 0.09804685413837433, "learning_rate": 1.3510475856717314e-05, "loss": 8.8088, "step": 110290 }, { "epoch": 0.5508252390821244, "grad_norm": 0.09318984299898148, "learning_rate": 1.3508973941775764e-05, "loss": 8.8085, "step": 110300 }, { "epoch": 0.5508751779070639, "grad_norm": 0.09332066029310226, "learning_rate": 1.3507472026834215e-05, "loss": 8.809, "step": 110310 }, { "epoch": 0.5509251167320033, "grad_norm": 0.0949983298778534, "learning_rate": 1.3505970111892665e-05, "loss": 8.8238, "step": 110320 }, { "epoch": 0.5509750555569427, "grad_norm": 0.09308378398418427, "learning_rate": 1.3504468196951113e-05, "loss": 8.844, "step": 110330 }, { "epoch": 0.5510249943818822, "grad_norm": 0.09120608866214752, "learning_rate": 1.3502966282009562e-05, "loss": 8.8099, "step": 110340 }, { "epoch": 0.5510749332068217, "grad_norm": 0.09323354065418243, "learning_rate": 1.3501464367068012e-05, "loss": 8.8136, "step": 110350 }, { "epoch": 0.5511248720317611, "grad_norm": 0.0914924144744873, "learning_rate": 1.3499962452126462e-05, "loss": 8.8114, "step": 110360 }, { "epoch": 0.5511748108567005, "grad_norm": 0.09144311398267746, "learning_rate": 1.3498460537184912e-05, "loss": 8.8016, "step": 110370 }, { "epoch": 0.55122474968164, "grad_norm": 0.0942022055387497, "learning_rate": 1.349695862224336e-05, "loss": 8.8147, "step": 110380 }, { "epoch": 0.5512746885065795, "grad_norm": 0.09916293621063232, "learning_rate": 1.349545670730181e-05, "loss": 8.8379, "step": 110390 }, { "epoch": 0.5513246273315189, "grad_norm": 0.09357352554798126, "learning_rate": 1.349395479236026e-05, "loss": 8.8308, "step": 110400 }, { "epoch": 0.5513745661564583, "grad_norm": 0.09357526153326035, "learning_rate": 1.349245287741871e-05, "loss": 8.8173, "step": 110410 }, { "epoch": 0.5514245049813978, "grad_norm": 0.08764225989580154, "learning_rate": 1.349095096247716e-05, "loss": 8.8153, "step": 110420 }, { "epoch": 0.5514744438063373, "grad_norm": 0.09387609362602234, "learning_rate": 1.3489449047535608e-05, "loss": 8.8175, "step": 110430 }, { "epoch": 0.5515243826312767, "grad_norm": 0.09247896075248718, "learning_rate": 1.3487947132594057e-05, "loss": 8.809, "step": 110440 }, { "epoch": 0.5515743214562161, "grad_norm": 0.09111214429140091, "learning_rate": 1.3486445217652507e-05, "loss": 8.8131, "step": 110450 }, { "epoch": 0.5516242602811556, "grad_norm": 0.09693354368209839, "learning_rate": 1.3484943302710957e-05, "loss": 8.8086, "step": 110460 }, { "epoch": 0.551674199106095, "grad_norm": 0.09754697233438492, "learning_rate": 1.3483441387769407e-05, "loss": 8.8201, "step": 110470 }, { "epoch": 0.5517241379310345, "grad_norm": 0.09200212359428406, "learning_rate": 1.3481939472827857e-05, "loss": 8.8175, "step": 110480 }, { "epoch": 0.5517740767559739, "grad_norm": 0.10172619670629501, "learning_rate": 1.3480437557886304e-05, "loss": 8.8139, "step": 110490 }, { "epoch": 0.5518240155809134, "grad_norm": 0.09638337045907974, "learning_rate": 1.3478935642944754e-05, "loss": 8.8247, "step": 110500 }, { "epoch": 0.5518739544058529, "grad_norm": 0.09530887752771378, "learning_rate": 1.3477433728003205e-05, "loss": 8.8224, "step": 110510 }, { "epoch": 0.5519238932307923, "grad_norm": 0.09654687345027924, "learning_rate": 1.3475931813061655e-05, "loss": 8.8239, "step": 110520 }, { "epoch": 0.5519738320557317, "grad_norm": 0.0979146957397461, "learning_rate": 1.3474429898120105e-05, "loss": 8.827, "step": 110530 }, { "epoch": 0.5520237708806712, "grad_norm": 0.09774132072925568, "learning_rate": 1.3472927983178552e-05, "loss": 8.7931, "step": 110540 }, { "epoch": 0.5520737097056106, "grad_norm": 0.09289306402206421, "learning_rate": 1.3471426068237002e-05, "loss": 8.8123, "step": 110550 }, { "epoch": 0.5521236485305501, "grad_norm": 0.08744509518146515, "learning_rate": 1.3469924153295452e-05, "loss": 8.8039, "step": 110560 }, { "epoch": 0.5521735873554895, "grad_norm": 0.08979026228189468, "learning_rate": 1.3468422238353902e-05, "loss": 8.8056, "step": 110570 }, { "epoch": 0.552223526180429, "grad_norm": 0.09543202817440033, "learning_rate": 1.3466920323412352e-05, "loss": 8.8093, "step": 110580 }, { "epoch": 0.5522734650053684, "grad_norm": 0.09224719554185867, "learning_rate": 1.34654184084708e-05, "loss": 8.8181, "step": 110590 }, { "epoch": 0.5523234038303079, "grad_norm": 0.0893092080950737, "learning_rate": 1.346391649352925e-05, "loss": 8.8243, "step": 110600 }, { "epoch": 0.5523733426552473, "grad_norm": 0.09298286586999893, "learning_rate": 1.34624145785877e-05, "loss": 8.8224, "step": 110610 }, { "epoch": 0.5524232814801868, "grad_norm": 0.09143733233213425, "learning_rate": 1.346091266364615e-05, "loss": 8.8141, "step": 110620 }, { "epoch": 0.5524732203051262, "grad_norm": 0.09493601322174072, "learning_rate": 1.34594107487046e-05, "loss": 8.8273, "step": 110630 }, { "epoch": 0.5525231591300657, "grad_norm": 0.09791026264429092, "learning_rate": 1.3457908833763048e-05, "loss": 8.801, "step": 110640 }, { "epoch": 0.5525730979550051, "grad_norm": 0.08950292319059372, "learning_rate": 1.3456406918821497e-05, "loss": 8.8103, "step": 110650 }, { "epoch": 0.5526230367799446, "grad_norm": 0.09202896058559418, "learning_rate": 1.3454905003879947e-05, "loss": 8.8157, "step": 110660 }, { "epoch": 0.552672975604884, "grad_norm": 0.09055034816265106, "learning_rate": 1.3453403088938397e-05, "loss": 8.8016, "step": 110670 }, { "epoch": 0.5527229144298235, "grad_norm": 0.09382042288780212, "learning_rate": 1.3451901173996847e-05, "loss": 8.8212, "step": 110680 }, { "epoch": 0.5527728532547629, "grad_norm": 0.09259592741727829, "learning_rate": 1.3450399259055296e-05, "loss": 8.799, "step": 110690 }, { "epoch": 0.5528227920797024, "grad_norm": 0.09515759348869324, "learning_rate": 1.3448897344113744e-05, "loss": 8.8096, "step": 110700 }, { "epoch": 0.5528727309046418, "grad_norm": 0.09235917776823044, "learning_rate": 1.3447395429172195e-05, "loss": 8.8192, "step": 110710 }, { "epoch": 0.5529226697295813, "grad_norm": 0.09815475344657898, "learning_rate": 1.3445893514230645e-05, "loss": 8.8107, "step": 110720 }, { "epoch": 0.5529726085545207, "grad_norm": 0.09027951210737228, "learning_rate": 1.3444391599289095e-05, "loss": 8.8183, "step": 110730 }, { "epoch": 0.5530225473794602, "grad_norm": 0.09175124019384384, "learning_rate": 1.3442889684347543e-05, "loss": 8.8093, "step": 110740 }, { "epoch": 0.5530724862043996, "grad_norm": 0.0899006649851799, "learning_rate": 1.3441387769405992e-05, "loss": 8.8136, "step": 110750 }, { "epoch": 0.5531224250293391, "grad_norm": 0.0890357568860054, "learning_rate": 1.3439885854464442e-05, "loss": 8.8141, "step": 110760 }, { "epoch": 0.5531723638542785, "grad_norm": 0.09818465262651443, "learning_rate": 1.3438383939522892e-05, "loss": 8.8089, "step": 110770 }, { "epoch": 0.5532223026792179, "grad_norm": 0.09356721490621567, "learning_rate": 1.3436882024581342e-05, "loss": 8.8082, "step": 110780 }, { "epoch": 0.5532722415041574, "grad_norm": 0.09822935611009598, "learning_rate": 1.3435380109639791e-05, "loss": 8.8022, "step": 110790 }, { "epoch": 0.5533221803290969, "grad_norm": 0.09416577219963074, "learning_rate": 1.3433878194698241e-05, "loss": 8.8152, "step": 110800 }, { "epoch": 0.5533721191540363, "grad_norm": 0.08785787969827652, "learning_rate": 1.343237627975669e-05, "loss": 8.8243, "step": 110810 }, { "epoch": 0.5534220579789757, "grad_norm": 0.09121017158031464, "learning_rate": 1.343087436481514e-05, "loss": 8.8282, "step": 110820 }, { "epoch": 0.5534719968039152, "grad_norm": 0.09230327606201172, "learning_rate": 1.342937244987359e-05, "loss": 8.807, "step": 110830 }, { "epoch": 0.5535219356288547, "grad_norm": 0.09492280334234238, "learning_rate": 1.3427870534932038e-05, "loss": 8.805, "step": 110840 }, { "epoch": 0.5535718744537941, "grad_norm": 0.09432464092969894, "learning_rate": 1.3426368619990489e-05, "loss": 8.8028, "step": 110850 }, { "epoch": 0.5536218132787335, "grad_norm": 0.08931145071983337, "learning_rate": 1.3424866705048937e-05, "loss": 8.8191, "step": 110860 }, { "epoch": 0.553671752103673, "grad_norm": 0.08781544119119644, "learning_rate": 1.3423364790107387e-05, "loss": 8.8121, "step": 110870 }, { "epoch": 0.5537216909286125, "grad_norm": 0.09263685345649719, "learning_rate": 1.3421862875165837e-05, "loss": 8.8321, "step": 110880 }, { "epoch": 0.5537716297535519, "grad_norm": 0.0927627757191658, "learning_rate": 1.3420360960224286e-05, "loss": 8.8092, "step": 110890 }, { "epoch": 0.5538215685784913, "grad_norm": 0.09033826738595963, "learning_rate": 1.3418859045282736e-05, "loss": 8.8183, "step": 110900 }, { "epoch": 0.5538715074034308, "grad_norm": 0.09372294694185257, "learning_rate": 1.3417357130341185e-05, "loss": 8.807, "step": 110910 }, { "epoch": 0.5539214462283703, "grad_norm": 0.09317562729120255, "learning_rate": 1.3415855215399635e-05, "loss": 8.8221, "step": 110920 }, { "epoch": 0.5539713850533097, "grad_norm": 0.09567903727293015, "learning_rate": 1.3414353300458085e-05, "loss": 8.8016, "step": 110930 }, { "epoch": 0.5540213238782491, "grad_norm": 0.09196696430444717, "learning_rate": 1.3412851385516533e-05, "loss": 8.7924, "step": 110940 }, { "epoch": 0.5540712627031886, "grad_norm": 0.09338729828596115, "learning_rate": 1.3411349470574984e-05, "loss": 8.8085, "step": 110950 }, { "epoch": 0.554121201528128, "grad_norm": 0.08997715264558792, "learning_rate": 1.3409847555633434e-05, "loss": 8.8152, "step": 110960 }, { "epoch": 0.5541711403530675, "grad_norm": 0.09378908574581146, "learning_rate": 1.3408345640691882e-05, "loss": 8.8156, "step": 110970 }, { "epoch": 0.5542210791780069, "grad_norm": 0.09587135910987854, "learning_rate": 1.3406843725750333e-05, "loss": 8.8153, "step": 110980 }, { "epoch": 0.5542710180029464, "grad_norm": 0.09000777453184128, "learning_rate": 1.3405341810808781e-05, "loss": 8.8142, "step": 110990 }, { "epoch": 0.5543209568278858, "grad_norm": 0.09232640266418457, "learning_rate": 1.3403839895867231e-05, "loss": 8.8225, "step": 111000 }, { "epoch": 0.5543708956528253, "grad_norm": 0.0918559655547142, "learning_rate": 1.3402337980925681e-05, "loss": 8.8173, "step": 111010 }, { "epoch": 0.5544208344777647, "grad_norm": 0.09692919254302979, "learning_rate": 1.340083606598413e-05, "loss": 8.8041, "step": 111020 }, { "epoch": 0.5544707733027042, "grad_norm": 0.09214792400598526, "learning_rate": 1.339933415104258e-05, "loss": 8.8139, "step": 111030 }, { "epoch": 0.5545207121276436, "grad_norm": 0.10509046912193298, "learning_rate": 1.3397832236101028e-05, "loss": 8.8006, "step": 111040 }, { "epoch": 0.5545706509525831, "grad_norm": 0.09465457499027252, "learning_rate": 1.3396330321159479e-05, "loss": 8.8227, "step": 111050 }, { "epoch": 0.5546205897775225, "grad_norm": 0.09613688290119171, "learning_rate": 1.3394828406217929e-05, "loss": 8.8222, "step": 111060 }, { "epoch": 0.554670528602462, "grad_norm": 0.09685207903385162, "learning_rate": 1.3393326491276377e-05, "loss": 8.805, "step": 111070 }, { "epoch": 0.5547204674274014, "grad_norm": 0.09501661360263824, "learning_rate": 1.3391824576334828e-05, "loss": 8.8135, "step": 111080 }, { "epoch": 0.5547704062523409, "grad_norm": 0.08681835234165192, "learning_rate": 1.3390322661393276e-05, "loss": 8.8151, "step": 111090 }, { "epoch": 0.5548203450772803, "grad_norm": 0.09274887293577194, "learning_rate": 1.3388820746451726e-05, "loss": 8.8216, "step": 111100 }, { "epoch": 0.5548702839022198, "grad_norm": 0.08929115533828735, "learning_rate": 1.3387318831510176e-05, "loss": 8.8071, "step": 111110 }, { "epoch": 0.5549202227271592, "grad_norm": 0.0890386626124382, "learning_rate": 1.3385816916568627e-05, "loss": 8.8029, "step": 111120 }, { "epoch": 0.5549701615520987, "grad_norm": 0.09131958335638046, "learning_rate": 1.3384315001627075e-05, "loss": 8.8071, "step": 111130 }, { "epoch": 0.5550201003770381, "grad_norm": 0.09491490572690964, "learning_rate": 1.3382813086685524e-05, "loss": 8.8114, "step": 111140 }, { "epoch": 0.5550700392019776, "grad_norm": 0.09056004881858826, "learning_rate": 1.3381311171743974e-05, "loss": 8.8142, "step": 111150 }, { "epoch": 0.555119978026917, "grad_norm": 0.09362094849348068, "learning_rate": 1.3379809256802424e-05, "loss": 8.8108, "step": 111160 }, { "epoch": 0.5551699168518565, "grad_norm": 0.08976966887712479, "learning_rate": 1.3378307341860874e-05, "loss": 8.8087, "step": 111170 }, { "epoch": 0.5552198556767959, "grad_norm": 0.0920504778623581, "learning_rate": 1.3376805426919323e-05, "loss": 8.8133, "step": 111180 }, { "epoch": 0.5552697945017354, "grad_norm": 0.09543319791555405, "learning_rate": 1.3375303511977771e-05, "loss": 8.8236, "step": 111190 }, { "epoch": 0.5553197333266748, "grad_norm": 0.08917569369077682, "learning_rate": 1.3373801597036221e-05, "loss": 8.8106, "step": 111200 }, { "epoch": 0.5553696721516143, "grad_norm": 0.0944250300526619, "learning_rate": 1.3372299682094671e-05, "loss": 8.8051, "step": 111210 }, { "epoch": 0.5554196109765537, "grad_norm": 0.09428086876869202, "learning_rate": 1.3370797767153122e-05, "loss": 8.8179, "step": 111220 }, { "epoch": 0.5554695498014932, "grad_norm": 0.09255554527044296, "learning_rate": 1.336929585221157e-05, "loss": 8.8145, "step": 111230 }, { "epoch": 0.5555194886264326, "grad_norm": 0.09141411632299423, "learning_rate": 1.3367793937270019e-05, "loss": 8.8118, "step": 111240 }, { "epoch": 0.555569427451372, "grad_norm": 0.0931018739938736, "learning_rate": 1.3366292022328469e-05, "loss": 8.8022, "step": 111250 }, { "epoch": 0.5556193662763115, "grad_norm": 0.09240280836820602, "learning_rate": 1.3364790107386919e-05, "loss": 8.8012, "step": 111260 }, { "epoch": 0.555669305101251, "grad_norm": 0.09618735313415527, "learning_rate": 1.3363288192445369e-05, "loss": 8.8128, "step": 111270 }, { "epoch": 0.5557192439261904, "grad_norm": 0.097829669713974, "learning_rate": 1.336178627750382e-05, "loss": 8.8086, "step": 111280 }, { "epoch": 0.5557691827511299, "grad_norm": 0.09861978143453598, "learning_rate": 1.3360284362562266e-05, "loss": 8.8114, "step": 111290 }, { "epoch": 0.5558191215760693, "grad_norm": 0.08871912211179733, "learning_rate": 1.3358782447620716e-05, "loss": 8.8007, "step": 111300 }, { "epoch": 0.5558690604010088, "grad_norm": 0.09760592132806778, "learning_rate": 1.3357280532679166e-05, "loss": 8.8187, "step": 111310 }, { "epoch": 0.5559189992259482, "grad_norm": 0.09599092602729797, "learning_rate": 1.3355778617737617e-05, "loss": 8.8205, "step": 111320 }, { "epoch": 0.5559689380508877, "grad_norm": 0.0927274227142334, "learning_rate": 1.3354276702796067e-05, "loss": 8.8211, "step": 111330 }, { "epoch": 0.5560188768758271, "grad_norm": 0.0984937772154808, "learning_rate": 1.3352774787854514e-05, "loss": 8.8142, "step": 111340 }, { "epoch": 0.5560688157007666, "grad_norm": 0.09656939655542374, "learning_rate": 1.3351272872912964e-05, "loss": 8.8249, "step": 111350 }, { "epoch": 0.556118754525706, "grad_norm": 0.0973270907998085, "learning_rate": 1.3349770957971414e-05, "loss": 8.8217, "step": 111360 }, { "epoch": 0.5561686933506454, "grad_norm": 0.09051787108182907, "learning_rate": 1.3348269043029864e-05, "loss": 8.791, "step": 111370 }, { "epoch": 0.5562186321755849, "grad_norm": 0.09165889024734497, "learning_rate": 1.3346767128088314e-05, "loss": 8.8026, "step": 111380 }, { "epoch": 0.5562685710005244, "grad_norm": 0.09260290116071701, "learning_rate": 1.3345265213146761e-05, "loss": 8.8246, "step": 111390 }, { "epoch": 0.5563185098254638, "grad_norm": 0.08657737821340561, "learning_rate": 1.3343763298205211e-05, "loss": 8.8278, "step": 111400 }, { "epoch": 0.5563684486504032, "grad_norm": 0.09201980382204056, "learning_rate": 1.3342261383263661e-05, "loss": 8.8152, "step": 111410 }, { "epoch": 0.5564183874753427, "grad_norm": 0.09139979630708694, "learning_rate": 1.3340759468322112e-05, "loss": 8.8084, "step": 111420 }, { "epoch": 0.5564683263002822, "grad_norm": 0.08889581263065338, "learning_rate": 1.3339257553380562e-05, "loss": 8.8078, "step": 111430 }, { "epoch": 0.5565182651252216, "grad_norm": 0.09374164789915085, "learning_rate": 1.3337755638439009e-05, "loss": 8.8154, "step": 111440 }, { "epoch": 0.556568203950161, "grad_norm": 0.08838196843862534, "learning_rate": 1.3336253723497459e-05, "loss": 8.8182, "step": 111450 }, { "epoch": 0.5566181427751005, "grad_norm": 0.09526786208152771, "learning_rate": 1.3334751808555909e-05, "loss": 8.8142, "step": 111460 }, { "epoch": 0.55666808160004, "grad_norm": 0.09561596810817719, "learning_rate": 1.3333249893614359e-05, "loss": 8.809, "step": 111470 }, { "epoch": 0.5567180204249794, "grad_norm": 0.09850651025772095, "learning_rate": 1.333174797867281e-05, "loss": 8.8064, "step": 111480 }, { "epoch": 0.5567679592499188, "grad_norm": 0.09541216492652893, "learning_rate": 1.3330246063731258e-05, "loss": 8.8111, "step": 111490 }, { "epoch": 0.5568178980748583, "grad_norm": 0.08966288715600967, "learning_rate": 1.3328744148789706e-05, "loss": 8.8197, "step": 111500 }, { "epoch": 0.5568678368997978, "grad_norm": 0.09588926285505295, "learning_rate": 1.3327242233848156e-05, "loss": 8.81, "step": 111510 }, { "epoch": 0.5569177757247372, "grad_norm": 0.09180673956871033, "learning_rate": 1.3325740318906607e-05, "loss": 8.7976, "step": 111520 }, { "epoch": 0.5569677145496766, "grad_norm": 0.09179525822401047, "learning_rate": 1.3324238403965057e-05, "loss": 8.8198, "step": 111530 }, { "epoch": 0.5570176533746161, "grad_norm": 0.09658701717853546, "learning_rate": 1.3322736489023505e-05, "loss": 8.8084, "step": 111540 }, { "epoch": 0.5570675921995556, "grad_norm": 0.09038843214511871, "learning_rate": 1.3321234574081954e-05, "loss": 8.8118, "step": 111550 }, { "epoch": 0.557117531024495, "grad_norm": 0.09037397056818008, "learning_rate": 1.3319732659140404e-05, "loss": 8.8033, "step": 111560 }, { "epoch": 0.5571674698494344, "grad_norm": 0.09661097079515457, "learning_rate": 1.3318230744198854e-05, "loss": 8.8129, "step": 111570 }, { "epoch": 0.5572174086743739, "grad_norm": 0.09827481955289841, "learning_rate": 1.3316728829257304e-05, "loss": 8.8086, "step": 111580 }, { "epoch": 0.5572673474993134, "grad_norm": 0.09280990809202194, "learning_rate": 1.3315226914315753e-05, "loss": 8.822, "step": 111590 }, { "epoch": 0.5573172863242528, "grad_norm": 0.09101619571447372, "learning_rate": 1.3313724999374201e-05, "loss": 8.8055, "step": 111600 }, { "epoch": 0.5573672251491922, "grad_norm": 0.09069830179214478, "learning_rate": 1.3312223084432651e-05, "loss": 8.8176, "step": 111610 }, { "epoch": 0.5574171639741317, "grad_norm": 0.08932122588157654, "learning_rate": 1.3310721169491102e-05, "loss": 8.8185, "step": 111620 }, { "epoch": 0.5574671027990712, "grad_norm": 0.08982568234205246, "learning_rate": 1.3309219254549552e-05, "loss": 8.8209, "step": 111630 }, { "epoch": 0.5575170416240106, "grad_norm": 0.09528286010026932, "learning_rate": 1.3307717339608e-05, "loss": 8.8101, "step": 111640 }, { "epoch": 0.55756698044895, "grad_norm": 0.09759854525327682, "learning_rate": 1.330621542466645e-05, "loss": 8.7972, "step": 111650 }, { "epoch": 0.5576169192738895, "grad_norm": 0.09867121279239655, "learning_rate": 1.3304713509724899e-05, "loss": 8.817, "step": 111660 }, { "epoch": 0.557666858098829, "grad_norm": 0.09287247806787491, "learning_rate": 1.3303211594783349e-05, "loss": 8.8053, "step": 111670 }, { "epoch": 0.5577167969237684, "grad_norm": 0.09172237664461136, "learning_rate": 1.33017096798418e-05, "loss": 8.8155, "step": 111680 }, { "epoch": 0.5577667357487078, "grad_norm": 0.08916451781988144, "learning_rate": 1.3300207764900248e-05, "loss": 8.8164, "step": 111690 }, { "epoch": 0.5578166745736473, "grad_norm": 0.08944099396467209, "learning_rate": 1.3298705849958698e-05, "loss": 8.8193, "step": 111700 }, { "epoch": 0.5578666133985868, "grad_norm": 0.09073956310749054, "learning_rate": 1.3297203935017146e-05, "loss": 8.8127, "step": 111710 }, { "epoch": 0.5579165522235262, "grad_norm": 0.09659486263990402, "learning_rate": 1.3295702020075597e-05, "loss": 8.7983, "step": 111720 }, { "epoch": 0.5579664910484656, "grad_norm": 0.09616279602050781, "learning_rate": 1.3294200105134047e-05, "loss": 8.8022, "step": 111730 }, { "epoch": 0.558016429873405, "grad_norm": 0.09376160800457001, "learning_rate": 1.3292698190192495e-05, "loss": 8.8009, "step": 111740 }, { "epoch": 0.5580663686983445, "grad_norm": 0.10133592039346695, "learning_rate": 1.3291196275250945e-05, "loss": 8.8166, "step": 111750 }, { "epoch": 0.558116307523284, "grad_norm": 0.09426382184028625, "learning_rate": 1.3289694360309394e-05, "loss": 8.8035, "step": 111760 }, { "epoch": 0.5581662463482234, "grad_norm": 0.09278400242328644, "learning_rate": 1.3288192445367844e-05, "loss": 8.8152, "step": 111770 }, { "epoch": 0.5582161851731628, "grad_norm": 0.09196425974369049, "learning_rate": 1.3286690530426294e-05, "loss": 8.8105, "step": 111780 }, { "epoch": 0.5582661239981023, "grad_norm": 0.09005366265773773, "learning_rate": 1.3285188615484743e-05, "loss": 8.8056, "step": 111790 }, { "epoch": 0.5583160628230418, "grad_norm": 0.09192261844873428, "learning_rate": 1.3283686700543193e-05, "loss": 8.8059, "step": 111800 }, { "epoch": 0.5583660016479812, "grad_norm": 0.09564708918333054, "learning_rate": 1.3282184785601643e-05, "loss": 8.8168, "step": 111810 }, { "epoch": 0.5584159404729206, "grad_norm": 0.09438828378915787, "learning_rate": 1.3280682870660092e-05, "loss": 8.8086, "step": 111820 }, { "epoch": 0.5584658792978601, "grad_norm": 0.0901927575469017, "learning_rate": 1.3279180955718542e-05, "loss": 8.8193, "step": 111830 }, { "epoch": 0.5585158181227996, "grad_norm": 0.0903163030743599, "learning_rate": 1.327767904077699e-05, "loss": 8.8052, "step": 111840 }, { "epoch": 0.558565756947739, "grad_norm": 0.08931218832731247, "learning_rate": 1.327617712583544e-05, "loss": 8.8079, "step": 111850 }, { "epoch": 0.5586156957726784, "grad_norm": 0.0938708558678627, "learning_rate": 1.327467521089389e-05, "loss": 8.7983, "step": 111860 }, { "epoch": 0.5586656345976179, "grad_norm": 0.09172417223453522, "learning_rate": 1.3273173295952339e-05, "loss": 8.8037, "step": 111870 }, { "epoch": 0.5587155734225574, "grad_norm": 0.0920783281326294, "learning_rate": 1.327167138101079e-05, "loss": 8.7911, "step": 111880 }, { "epoch": 0.5587655122474968, "grad_norm": 0.09476281702518463, "learning_rate": 1.3270169466069238e-05, "loss": 8.7965, "step": 111890 }, { "epoch": 0.5588154510724362, "grad_norm": 0.09168004244565964, "learning_rate": 1.3268667551127688e-05, "loss": 8.8016, "step": 111900 }, { "epoch": 0.5588653898973757, "grad_norm": 0.09263158589601517, "learning_rate": 1.3267165636186138e-05, "loss": 8.8066, "step": 111910 }, { "epoch": 0.5589153287223152, "grad_norm": 0.09056047350168228, "learning_rate": 1.3265663721244587e-05, "loss": 8.8106, "step": 111920 }, { "epoch": 0.5589652675472546, "grad_norm": 0.09256269037723541, "learning_rate": 1.3264161806303037e-05, "loss": 8.8149, "step": 111930 }, { "epoch": 0.559015206372194, "grad_norm": 0.09468802064657211, "learning_rate": 1.3262659891361485e-05, "loss": 8.7959, "step": 111940 }, { "epoch": 0.5590651451971335, "grad_norm": 0.09123507142066956, "learning_rate": 1.3261157976419935e-05, "loss": 8.7775, "step": 111950 }, { "epoch": 0.559115084022073, "grad_norm": 0.08695657551288605, "learning_rate": 1.3259656061478386e-05, "loss": 8.8059, "step": 111960 }, { "epoch": 0.5591650228470124, "grad_norm": 0.09323490411043167, "learning_rate": 1.3258154146536836e-05, "loss": 8.8008, "step": 111970 }, { "epoch": 0.5592149616719518, "grad_norm": 0.09762334078550339, "learning_rate": 1.3256652231595284e-05, "loss": 8.8023, "step": 111980 }, { "epoch": 0.5592649004968913, "grad_norm": 0.0962090715765953, "learning_rate": 1.3255150316653733e-05, "loss": 8.8002, "step": 111990 }, { "epoch": 0.5593148393218308, "grad_norm": 0.09336007386445999, "learning_rate": 1.3253648401712183e-05, "loss": 8.8044, "step": 112000 }, { "epoch": 0.5593647781467702, "grad_norm": 0.09324468672275543, "learning_rate": 1.3252146486770633e-05, "loss": 8.7984, "step": 112010 }, { "epoch": 0.5594147169717096, "grad_norm": 0.09045629948377609, "learning_rate": 1.3250644571829083e-05, "loss": 8.7981, "step": 112020 }, { "epoch": 0.5594646557966491, "grad_norm": 0.09312763065099716, "learning_rate": 1.3249142656887532e-05, "loss": 8.7998, "step": 112030 }, { "epoch": 0.5595145946215886, "grad_norm": 0.09190090000629425, "learning_rate": 1.324764074194598e-05, "loss": 8.8192, "step": 112040 }, { "epoch": 0.559564533446528, "grad_norm": 0.09083940088748932, "learning_rate": 1.324613882700443e-05, "loss": 8.8084, "step": 112050 }, { "epoch": 0.5596144722714674, "grad_norm": 0.09351202100515366, "learning_rate": 1.324463691206288e-05, "loss": 8.8105, "step": 112060 }, { "epoch": 0.5596644110964069, "grad_norm": 0.09421859681606293, "learning_rate": 1.324313499712133e-05, "loss": 8.7967, "step": 112070 }, { "epoch": 0.5597143499213464, "grad_norm": 0.09057319909334183, "learning_rate": 1.324163308217978e-05, "loss": 8.8044, "step": 112080 }, { "epoch": 0.5597642887462858, "grad_norm": 0.09181645512580872, "learning_rate": 1.3240131167238228e-05, "loss": 8.798, "step": 112090 }, { "epoch": 0.5598142275712252, "grad_norm": 0.08992860466241837, "learning_rate": 1.3238629252296678e-05, "loss": 8.7979, "step": 112100 }, { "epoch": 0.5598641663961647, "grad_norm": 0.09091092646121979, "learning_rate": 1.3237127337355128e-05, "loss": 8.8105, "step": 112110 }, { "epoch": 0.5599141052211042, "grad_norm": 0.0932513177394867, "learning_rate": 1.3235625422413578e-05, "loss": 8.7957, "step": 112120 }, { "epoch": 0.5599640440460436, "grad_norm": 0.08964350819587708, "learning_rate": 1.3234123507472028e-05, "loss": 8.8212, "step": 112130 }, { "epoch": 0.560013982870983, "grad_norm": 0.09178796410560608, "learning_rate": 1.3232621592530475e-05, "loss": 8.7955, "step": 112140 }, { "epoch": 0.5600639216959225, "grad_norm": 0.09413108229637146, "learning_rate": 1.3231119677588925e-05, "loss": 8.799, "step": 112150 }, { "epoch": 0.560113860520862, "grad_norm": 0.09713994711637497, "learning_rate": 1.3229617762647376e-05, "loss": 8.8062, "step": 112160 }, { "epoch": 0.5601637993458014, "grad_norm": 0.09629582613706589, "learning_rate": 1.3228115847705826e-05, "loss": 8.8064, "step": 112170 }, { "epoch": 0.5602137381707408, "grad_norm": 0.09238281100988388, "learning_rate": 1.3226613932764276e-05, "loss": 8.8076, "step": 112180 }, { "epoch": 0.5602636769956802, "grad_norm": 0.0891227275133133, "learning_rate": 1.3225112017822723e-05, "loss": 8.8078, "step": 112190 }, { "epoch": 0.5603136158206198, "grad_norm": 0.09361886978149414, "learning_rate": 1.3223610102881173e-05, "loss": 8.8078, "step": 112200 }, { "epoch": 0.5603635546455592, "grad_norm": 0.09347978234291077, "learning_rate": 1.3222108187939623e-05, "loss": 8.7925, "step": 112210 }, { "epoch": 0.5604134934704986, "grad_norm": 0.09022147953510284, "learning_rate": 1.3220606272998073e-05, "loss": 8.7998, "step": 112220 }, { "epoch": 0.560463432295438, "grad_norm": 0.09901180863380432, "learning_rate": 1.3219104358056523e-05, "loss": 8.7882, "step": 112230 }, { "epoch": 0.5605133711203776, "grad_norm": 0.09106682986021042, "learning_rate": 1.3217602443114972e-05, "loss": 8.7916, "step": 112240 }, { "epoch": 0.560563309945317, "grad_norm": 0.09734653681516647, "learning_rate": 1.321610052817342e-05, "loss": 8.7859, "step": 112250 }, { "epoch": 0.5606132487702564, "grad_norm": 0.09442363679409027, "learning_rate": 1.321459861323187e-05, "loss": 8.7861, "step": 112260 }, { "epoch": 0.5606631875951958, "grad_norm": 0.09389246255159378, "learning_rate": 1.321309669829032e-05, "loss": 8.8063, "step": 112270 }, { "epoch": 0.5607131264201354, "grad_norm": 0.09472633898258209, "learning_rate": 1.3211594783348771e-05, "loss": 8.7963, "step": 112280 }, { "epoch": 0.5607630652450748, "grad_norm": 0.0962495282292366, "learning_rate": 1.321009286840722e-05, "loss": 8.8155, "step": 112290 }, { "epoch": 0.5608130040700142, "grad_norm": 0.09074840694665909, "learning_rate": 1.3208590953465668e-05, "loss": 8.8103, "step": 112300 }, { "epoch": 0.5608629428949536, "grad_norm": 0.09516070038080215, "learning_rate": 1.3207089038524118e-05, "loss": 8.7933, "step": 112310 }, { "epoch": 0.5609128817198932, "grad_norm": 0.09915341436862946, "learning_rate": 1.3205587123582568e-05, "loss": 8.8079, "step": 112320 }, { "epoch": 0.5609628205448326, "grad_norm": 0.09369552135467529, "learning_rate": 1.3204085208641018e-05, "loss": 8.8061, "step": 112330 }, { "epoch": 0.561012759369772, "grad_norm": 0.09473207592964172, "learning_rate": 1.3202583293699467e-05, "loss": 8.8014, "step": 112340 }, { "epoch": 0.5610626981947114, "grad_norm": 0.09303644299507141, "learning_rate": 1.3201081378757915e-05, "loss": 8.8019, "step": 112350 }, { "epoch": 0.561112637019651, "grad_norm": 0.09537196904420853, "learning_rate": 1.3199579463816366e-05, "loss": 8.8123, "step": 112360 }, { "epoch": 0.5611625758445904, "grad_norm": 0.08959188312292099, "learning_rate": 1.3198077548874816e-05, "loss": 8.7895, "step": 112370 }, { "epoch": 0.5612125146695298, "grad_norm": 0.0941653922200203, "learning_rate": 1.3196575633933266e-05, "loss": 8.7977, "step": 112380 }, { "epoch": 0.5612624534944692, "grad_norm": 0.09285327792167664, "learning_rate": 1.3195073718991716e-05, "loss": 8.8058, "step": 112390 }, { "epoch": 0.5613123923194088, "grad_norm": 0.09470658004283905, "learning_rate": 1.3193571804050163e-05, "loss": 8.7981, "step": 112400 }, { "epoch": 0.5613623311443482, "grad_norm": 0.0920797735452652, "learning_rate": 1.3192069889108613e-05, "loss": 8.8114, "step": 112410 }, { "epoch": 0.5614122699692876, "grad_norm": 0.09267135709524155, "learning_rate": 1.3190567974167063e-05, "loss": 8.8038, "step": 112420 }, { "epoch": 0.561462208794227, "grad_norm": 0.0882105603814125, "learning_rate": 1.3189066059225513e-05, "loss": 8.804, "step": 112430 }, { "epoch": 0.5615121476191666, "grad_norm": 0.09088388085365295, "learning_rate": 1.3187564144283964e-05, "loss": 8.7979, "step": 112440 }, { "epoch": 0.561562086444106, "grad_norm": 0.09451723098754883, "learning_rate": 1.3186062229342412e-05, "loss": 8.8055, "step": 112450 }, { "epoch": 0.5616120252690454, "grad_norm": 0.08706700801849365, "learning_rate": 1.318456031440086e-05, "loss": 8.8008, "step": 112460 }, { "epoch": 0.5616619640939848, "grad_norm": 0.09282489866018295, "learning_rate": 1.318305839945931e-05, "loss": 8.7917, "step": 112470 }, { "epoch": 0.5617119029189244, "grad_norm": 0.08970408886671066, "learning_rate": 1.3181556484517761e-05, "loss": 8.8134, "step": 112480 }, { "epoch": 0.5617618417438638, "grad_norm": 0.09117847681045532, "learning_rate": 1.3180054569576211e-05, "loss": 8.8164, "step": 112490 }, { "epoch": 0.5618117805688032, "grad_norm": 0.0940316915512085, "learning_rate": 1.317855265463466e-05, "loss": 8.81, "step": 112500 }, { "epoch": 0.5618617193937426, "grad_norm": 0.08806363493204117, "learning_rate": 1.3177050739693108e-05, "loss": 8.8093, "step": 112510 }, { "epoch": 0.5619116582186822, "grad_norm": 0.0929834246635437, "learning_rate": 1.3175548824751558e-05, "loss": 8.79, "step": 112520 }, { "epoch": 0.5619615970436216, "grad_norm": 0.09379177540540695, "learning_rate": 1.3174046909810009e-05, "loss": 8.7881, "step": 112530 }, { "epoch": 0.562011535868561, "grad_norm": 0.0958777591586113, "learning_rate": 1.3172544994868459e-05, "loss": 8.7917, "step": 112540 }, { "epoch": 0.5620614746935004, "grad_norm": 0.09718500822782516, "learning_rate": 1.3171043079926907e-05, "loss": 8.807, "step": 112550 }, { "epoch": 0.56211141351844, "grad_norm": 0.09176292270421982, "learning_rate": 1.3169541164985356e-05, "loss": 8.8189, "step": 112560 }, { "epoch": 0.5621613523433794, "grad_norm": 0.09247379750013351, "learning_rate": 1.3168039250043806e-05, "loss": 8.798, "step": 112570 }, { "epoch": 0.5622112911683188, "grad_norm": 0.10087962448596954, "learning_rate": 1.3166537335102256e-05, "loss": 8.7897, "step": 112580 }, { "epoch": 0.5622612299932582, "grad_norm": 0.08978106826543808, "learning_rate": 1.3165035420160706e-05, "loss": 8.801, "step": 112590 }, { "epoch": 0.5623111688181978, "grad_norm": 0.09266199916601181, "learning_rate": 1.3163533505219155e-05, "loss": 8.7918, "step": 112600 }, { "epoch": 0.5623611076431372, "grad_norm": 0.087232805788517, "learning_rate": 1.3162031590277605e-05, "loss": 8.8059, "step": 112610 }, { "epoch": 0.5624110464680766, "grad_norm": 0.09095864742994308, "learning_rate": 1.3160529675336053e-05, "loss": 8.8065, "step": 112620 }, { "epoch": 0.562460985293016, "grad_norm": 0.09693886339664459, "learning_rate": 1.3159027760394504e-05, "loss": 8.7969, "step": 112630 }, { "epoch": 0.5625109241179556, "grad_norm": 0.09412913024425507, "learning_rate": 1.3157525845452954e-05, "loss": 8.8179, "step": 112640 }, { "epoch": 0.562560862942895, "grad_norm": 0.0964449867606163, "learning_rate": 1.3156023930511402e-05, "loss": 8.7957, "step": 112650 }, { "epoch": 0.5626108017678344, "grad_norm": 0.09539590030908585, "learning_rate": 1.3154522015569852e-05, "loss": 8.7885, "step": 112660 }, { "epoch": 0.5626607405927738, "grad_norm": 0.0972166359424591, "learning_rate": 1.31530201006283e-05, "loss": 8.8137, "step": 112670 }, { "epoch": 0.5627106794177134, "grad_norm": 0.092460498213768, "learning_rate": 1.3151518185686751e-05, "loss": 8.8034, "step": 112680 }, { "epoch": 0.5627606182426528, "grad_norm": 0.0895271971821785, "learning_rate": 1.3150016270745201e-05, "loss": 8.8099, "step": 112690 }, { "epoch": 0.5628105570675922, "grad_norm": 0.09291508048772812, "learning_rate": 1.314851435580365e-05, "loss": 8.802, "step": 112700 }, { "epoch": 0.5628604958925316, "grad_norm": 0.08741452544927597, "learning_rate": 1.31470124408621e-05, "loss": 8.7987, "step": 112710 }, { "epoch": 0.5629104347174712, "grad_norm": 0.09074393659830093, "learning_rate": 1.3145510525920548e-05, "loss": 8.7936, "step": 112720 }, { "epoch": 0.5629603735424106, "grad_norm": 0.09287057816982269, "learning_rate": 1.3144008610978999e-05, "loss": 8.7991, "step": 112730 }, { "epoch": 0.56301031236735, "grad_norm": 0.09430692344903946, "learning_rate": 1.3142506696037449e-05, "loss": 8.8014, "step": 112740 }, { "epoch": 0.5630602511922894, "grad_norm": 0.09498278051614761, "learning_rate": 1.3141004781095897e-05, "loss": 8.8221, "step": 112750 }, { "epoch": 0.5631101900172288, "grad_norm": 0.09470874071121216, "learning_rate": 1.3139502866154347e-05, "loss": 8.7972, "step": 112760 }, { "epoch": 0.5631601288421684, "grad_norm": 0.09391231089830399, "learning_rate": 1.3138000951212798e-05, "loss": 8.781, "step": 112770 }, { "epoch": 0.5632100676671078, "grad_norm": 0.09245941042900085, "learning_rate": 1.3136499036271246e-05, "loss": 8.8051, "step": 112780 }, { "epoch": 0.5632600064920472, "grad_norm": 0.09180227667093277, "learning_rate": 1.3134997121329696e-05, "loss": 8.7889, "step": 112790 }, { "epoch": 0.5633099453169866, "grad_norm": 0.10003501176834106, "learning_rate": 1.3133495206388145e-05, "loss": 8.7907, "step": 112800 }, { "epoch": 0.5633598841419262, "grad_norm": 0.09212306886911392, "learning_rate": 1.3131993291446595e-05, "loss": 8.7978, "step": 112810 }, { "epoch": 0.5634098229668656, "grad_norm": 0.0921301320195198, "learning_rate": 1.3130491376505045e-05, "loss": 8.797, "step": 112820 }, { "epoch": 0.563459761791805, "grad_norm": 0.09204480797052383, "learning_rate": 1.3128989461563494e-05, "loss": 8.7892, "step": 112830 }, { "epoch": 0.5635097006167444, "grad_norm": 0.09469878673553467, "learning_rate": 1.3127487546621944e-05, "loss": 8.8042, "step": 112840 }, { "epoch": 0.563559639441684, "grad_norm": 0.09218306094408035, "learning_rate": 1.3125985631680392e-05, "loss": 8.8014, "step": 112850 }, { "epoch": 0.5636095782666234, "grad_norm": 0.09181991964578629, "learning_rate": 1.3124483716738842e-05, "loss": 8.7919, "step": 112860 }, { "epoch": 0.5636595170915628, "grad_norm": 0.09692985564470291, "learning_rate": 1.3122981801797293e-05, "loss": 8.8035, "step": 112870 }, { "epoch": 0.5637094559165022, "grad_norm": 0.09414488822221756, "learning_rate": 1.3121479886855741e-05, "loss": 8.7791, "step": 112880 }, { "epoch": 0.5637593947414418, "grad_norm": 0.09031105786561966, "learning_rate": 1.3119977971914191e-05, "loss": 8.7972, "step": 112890 }, { "epoch": 0.5638093335663812, "grad_norm": 0.08820466697216034, "learning_rate": 1.311847605697264e-05, "loss": 8.8065, "step": 112900 }, { "epoch": 0.5638592723913206, "grad_norm": 0.09765250235795975, "learning_rate": 1.311697414203109e-05, "loss": 8.8143, "step": 112910 }, { "epoch": 0.56390921121626, "grad_norm": 0.09003455191850662, "learning_rate": 1.311547222708954e-05, "loss": 8.7868, "step": 112920 }, { "epoch": 0.5639591500411996, "grad_norm": 0.09266629070043564, "learning_rate": 1.311397031214799e-05, "loss": 8.8078, "step": 112930 }, { "epoch": 0.564009088866139, "grad_norm": 0.08923901617527008, "learning_rate": 1.3112468397206439e-05, "loss": 8.8024, "step": 112940 }, { "epoch": 0.5640590276910784, "grad_norm": 0.09165368974208832, "learning_rate": 1.3110966482264887e-05, "loss": 8.8027, "step": 112950 }, { "epoch": 0.5641089665160178, "grad_norm": 0.09243005514144897, "learning_rate": 1.3109464567323337e-05, "loss": 8.8092, "step": 112960 }, { "epoch": 0.5641589053409574, "grad_norm": 0.08696895092725754, "learning_rate": 1.3107962652381788e-05, "loss": 8.8019, "step": 112970 }, { "epoch": 0.5642088441658968, "grad_norm": 0.0982133075594902, "learning_rate": 1.3106460737440238e-05, "loss": 8.7784, "step": 112980 }, { "epoch": 0.5642587829908362, "grad_norm": 0.09230280667543411, "learning_rate": 1.3104958822498686e-05, "loss": 8.8033, "step": 112990 }, { "epoch": 0.5643087218157756, "grad_norm": 0.09832031279802322, "learning_rate": 1.3103456907557135e-05, "loss": 8.8019, "step": 113000 }, { "epoch": 0.5643586606407152, "grad_norm": 0.09023145586252213, "learning_rate": 1.3101954992615585e-05, "loss": 8.8122, "step": 113010 }, { "epoch": 0.5644085994656546, "grad_norm": 0.09258860349655151, "learning_rate": 1.3100453077674035e-05, "loss": 8.8005, "step": 113020 }, { "epoch": 0.564458538290594, "grad_norm": 0.09020715951919556, "learning_rate": 1.3098951162732485e-05, "loss": 8.8047, "step": 113030 }, { "epoch": 0.5645084771155334, "grad_norm": 0.09244858473539352, "learning_rate": 1.3097449247790934e-05, "loss": 8.7822, "step": 113040 }, { "epoch": 0.564558415940473, "grad_norm": 0.09436847269535065, "learning_rate": 1.3095947332849382e-05, "loss": 8.8044, "step": 113050 }, { "epoch": 0.5646083547654124, "grad_norm": 0.0903688296675682, "learning_rate": 1.3094445417907832e-05, "loss": 8.7983, "step": 113060 }, { "epoch": 0.5646582935903518, "grad_norm": 0.08826575428247452, "learning_rate": 1.3092943502966283e-05, "loss": 8.7907, "step": 113070 }, { "epoch": 0.5647082324152912, "grad_norm": 0.09253015369176865, "learning_rate": 1.3091441588024733e-05, "loss": 8.7995, "step": 113080 }, { "epoch": 0.5647581712402308, "grad_norm": 0.08847915381193161, "learning_rate": 1.3089939673083183e-05, "loss": 8.7927, "step": 113090 }, { "epoch": 0.5648081100651702, "grad_norm": 0.09462785720825195, "learning_rate": 1.308843775814163e-05, "loss": 8.7956, "step": 113100 }, { "epoch": 0.5648580488901096, "grad_norm": 0.09631918370723724, "learning_rate": 1.308693584320008e-05, "loss": 8.8033, "step": 113110 }, { "epoch": 0.564907987715049, "grad_norm": 0.09395527094602585, "learning_rate": 1.308543392825853e-05, "loss": 8.7929, "step": 113120 }, { "epoch": 0.5649579265399886, "grad_norm": 0.09398861974477768, "learning_rate": 1.308393201331698e-05, "loss": 8.7886, "step": 113130 }, { "epoch": 0.565007865364928, "grad_norm": 0.09166686236858368, "learning_rate": 1.308243009837543e-05, "loss": 8.7877, "step": 113140 }, { "epoch": 0.5650578041898674, "grad_norm": 0.09073825925588608, "learning_rate": 1.3080928183433877e-05, "loss": 8.8058, "step": 113150 }, { "epoch": 0.5651077430148068, "grad_norm": 0.09182006120681763, "learning_rate": 1.3079426268492327e-05, "loss": 8.7976, "step": 113160 }, { "epoch": 0.5651576818397464, "grad_norm": 0.09310693293809891, "learning_rate": 1.3077924353550778e-05, "loss": 8.7859, "step": 113170 }, { "epoch": 0.5652076206646858, "grad_norm": 0.09456516802310944, "learning_rate": 1.3076422438609228e-05, "loss": 8.8033, "step": 113180 }, { "epoch": 0.5652575594896252, "grad_norm": 0.08986683934926987, "learning_rate": 1.3074920523667678e-05, "loss": 8.808, "step": 113190 }, { "epoch": 0.5653074983145646, "grad_norm": 0.09818089008331299, "learning_rate": 1.3073418608726125e-05, "loss": 8.7929, "step": 113200 }, { "epoch": 0.5653574371395041, "grad_norm": 0.09592483192682266, "learning_rate": 1.3071916693784575e-05, "loss": 8.8035, "step": 113210 }, { "epoch": 0.5654073759644436, "grad_norm": 0.09427493065595627, "learning_rate": 1.3070414778843025e-05, "loss": 8.7978, "step": 113220 }, { "epoch": 0.565457314789383, "grad_norm": 0.09667732566595078, "learning_rate": 1.3068912863901475e-05, "loss": 8.7764, "step": 113230 }, { "epoch": 0.5655072536143224, "grad_norm": 0.10168339312076569, "learning_rate": 1.3067410948959925e-05, "loss": 8.7989, "step": 113240 }, { "epoch": 0.565557192439262, "grad_norm": 0.09616504609584808, "learning_rate": 1.3065909034018374e-05, "loss": 8.8068, "step": 113250 }, { "epoch": 0.5656071312642014, "grad_norm": 0.09338529407978058, "learning_rate": 1.3064407119076822e-05, "loss": 8.7902, "step": 113260 }, { "epoch": 0.5656570700891408, "grad_norm": 0.09293079376220703, "learning_rate": 1.3062905204135273e-05, "loss": 8.7938, "step": 113270 }, { "epoch": 0.5657070089140802, "grad_norm": 0.09134235978126526, "learning_rate": 1.3061403289193723e-05, "loss": 8.7961, "step": 113280 }, { "epoch": 0.5657569477390197, "grad_norm": 0.0925818383693695, "learning_rate": 1.3059901374252173e-05, "loss": 8.7874, "step": 113290 }, { "epoch": 0.5658068865639592, "grad_norm": 0.09122247248888016, "learning_rate": 1.3058399459310621e-05, "loss": 8.807, "step": 113300 }, { "epoch": 0.5658568253888986, "grad_norm": 0.08886589109897614, "learning_rate": 1.305689754436907e-05, "loss": 8.792, "step": 113310 }, { "epoch": 0.565906764213838, "grad_norm": 0.09335944801568985, "learning_rate": 1.305539562942752e-05, "loss": 8.7786, "step": 113320 }, { "epoch": 0.5659567030387775, "grad_norm": 0.088760145008564, "learning_rate": 1.305389371448597e-05, "loss": 8.791, "step": 113330 }, { "epoch": 0.566006641863717, "grad_norm": 0.09438331425189972, "learning_rate": 1.305239179954442e-05, "loss": 8.7911, "step": 113340 }, { "epoch": 0.5660565806886564, "grad_norm": 0.09462893754243851, "learning_rate": 1.3050889884602869e-05, "loss": 8.7829, "step": 113350 }, { "epoch": 0.5661065195135958, "grad_norm": 0.09259051084518433, "learning_rate": 1.3049387969661317e-05, "loss": 8.8002, "step": 113360 }, { "epoch": 0.5661564583385353, "grad_norm": 0.09230745583772659, "learning_rate": 1.3047886054719768e-05, "loss": 8.7846, "step": 113370 }, { "epoch": 0.5662063971634748, "grad_norm": 0.09083818644285202, "learning_rate": 1.3046384139778218e-05, "loss": 8.7832, "step": 113380 }, { "epoch": 0.5662563359884142, "grad_norm": 0.09257438033819199, "learning_rate": 1.3044882224836668e-05, "loss": 8.7789, "step": 113390 }, { "epoch": 0.5663062748133536, "grad_norm": 0.09438567608594894, "learning_rate": 1.3043380309895116e-05, "loss": 8.7963, "step": 113400 }, { "epoch": 0.5663562136382931, "grad_norm": 0.09249507635831833, "learning_rate": 1.3041878394953567e-05, "loss": 8.7973, "step": 113410 }, { "epoch": 0.5664061524632326, "grad_norm": 0.09157245606184006, "learning_rate": 1.3040376480012015e-05, "loss": 8.7952, "step": 113420 }, { "epoch": 0.566456091288172, "grad_norm": 0.09889290481805801, "learning_rate": 1.3038874565070465e-05, "loss": 8.8033, "step": 113430 }, { "epoch": 0.5665060301131114, "grad_norm": 0.09277231991291046, "learning_rate": 1.3037372650128915e-05, "loss": 8.8005, "step": 113440 }, { "epoch": 0.5665559689380509, "grad_norm": 0.09674065560102463, "learning_rate": 1.3035870735187364e-05, "loss": 8.7933, "step": 113450 }, { "epoch": 0.5666059077629904, "grad_norm": 0.09464483708143234, "learning_rate": 1.3034368820245814e-05, "loss": 8.7983, "step": 113460 }, { "epoch": 0.5666558465879298, "grad_norm": 0.09485206753015518, "learning_rate": 1.3032866905304263e-05, "loss": 8.797, "step": 113470 }, { "epoch": 0.5667057854128692, "grad_norm": 0.08628489077091217, "learning_rate": 1.3031364990362713e-05, "loss": 8.8084, "step": 113480 }, { "epoch": 0.5667557242378087, "grad_norm": 0.09606333822011948, "learning_rate": 1.3029863075421163e-05, "loss": 8.7985, "step": 113490 }, { "epoch": 0.5668056630627482, "grad_norm": 0.09314040094614029, "learning_rate": 1.3028361160479611e-05, "loss": 8.8068, "step": 113500 }, { "epoch": 0.5668556018876876, "grad_norm": 0.09132982790470123, "learning_rate": 1.3026859245538062e-05, "loss": 8.7969, "step": 113510 }, { "epoch": 0.566905540712627, "grad_norm": 0.09420787543058395, "learning_rate": 1.302535733059651e-05, "loss": 8.7799, "step": 113520 }, { "epoch": 0.5669554795375665, "grad_norm": 0.09258543699979782, "learning_rate": 1.302385541565496e-05, "loss": 8.8007, "step": 113530 }, { "epoch": 0.567005418362506, "grad_norm": 0.09835248440504074, "learning_rate": 1.302235350071341e-05, "loss": 8.8051, "step": 113540 }, { "epoch": 0.5670553571874454, "grad_norm": 0.093469999730587, "learning_rate": 1.3020851585771859e-05, "loss": 8.81, "step": 113550 }, { "epoch": 0.5671052960123848, "grad_norm": 0.09576337039470673, "learning_rate": 1.3019349670830309e-05, "loss": 8.7956, "step": 113560 }, { "epoch": 0.5671552348373243, "grad_norm": 0.09052767604589462, "learning_rate": 1.301784775588876e-05, "loss": 8.7867, "step": 113570 }, { "epoch": 0.5672051736622638, "grad_norm": 0.09173665940761566, "learning_rate": 1.3016345840947208e-05, "loss": 8.7889, "step": 113580 }, { "epoch": 0.5672551124872032, "grad_norm": 0.09585422277450562, "learning_rate": 1.3014843926005658e-05, "loss": 8.799, "step": 113590 }, { "epoch": 0.5673050513121426, "grad_norm": 0.09513553977012634, "learning_rate": 1.3013342011064106e-05, "loss": 8.767, "step": 113600 }, { "epoch": 0.5673549901370821, "grad_norm": 0.08924262225627899, "learning_rate": 1.3011840096122557e-05, "loss": 8.8002, "step": 113610 }, { "epoch": 0.5674049289620215, "grad_norm": 0.0911087617278099, "learning_rate": 1.3010338181181007e-05, "loss": 8.794, "step": 113620 }, { "epoch": 0.567454867786961, "grad_norm": 0.09030284732580185, "learning_rate": 1.3008836266239455e-05, "loss": 8.8027, "step": 113630 }, { "epoch": 0.5675048066119004, "grad_norm": 0.09411875158548355, "learning_rate": 1.3007334351297905e-05, "loss": 8.7871, "step": 113640 }, { "epoch": 0.5675547454368399, "grad_norm": 0.09722502529621124, "learning_rate": 1.3005832436356354e-05, "loss": 8.7956, "step": 113650 }, { "epoch": 0.5676046842617793, "grad_norm": 0.09281644970178604, "learning_rate": 1.3004330521414804e-05, "loss": 8.787, "step": 113660 }, { "epoch": 0.5676546230867188, "grad_norm": 0.0939922034740448, "learning_rate": 1.3002828606473254e-05, "loss": 8.7992, "step": 113670 }, { "epoch": 0.5677045619116582, "grad_norm": 0.08864696323871613, "learning_rate": 1.3001326691531703e-05, "loss": 8.7967, "step": 113680 }, { "epoch": 0.5677545007365977, "grad_norm": 0.09028666466474533, "learning_rate": 1.2999824776590153e-05, "loss": 8.792, "step": 113690 }, { "epoch": 0.5678044395615371, "grad_norm": 0.09309808909893036, "learning_rate": 1.2998322861648601e-05, "loss": 8.7893, "step": 113700 }, { "epoch": 0.5678543783864766, "grad_norm": 0.08978434652090073, "learning_rate": 1.2996820946707052e-05, "loss": 8.7902, "step": 113710 }, { "epoch": 0.567904317211416, "grad_norm": 0.09246104210615158, "learning_rate": 1.2995319031765502e-05, "loss": 8.7838, "step": 113720 }, { "epoch": 0.5679542560363554, "grad_norm": 0.0925283282995224, "learning_rate": 1.2993817116823952e-05, "loss": 8.7888, "step": 113730 }, { "epoch": 0.5680041948612949, "grad_norm": 0.09267081320285797, "learning_rate": 1.29923152018824e-05, "loss": 8.7942, "step": 113740 }, { "epoch": 0.5680541336862344, "grad_norm": 0.09131681174039841, "learning_rate": 1.2990813286940849e-05, "loss": 8.7979, "step": 113750 }, { "epoch": 0.5681040725111738, "grad_norm": 0.09731698036193848, "learning_rate": 1.2989311371999299e-05, "loss": 8.7936, "step": 113760 }, { "epoch": 0.5681540113361132, "grad_norm": 0.09286783635616302, "learning_rate": 1.298780945705775e-05, "loss": 8.8072, "step": 113770 }, { "epoch": 0.5682039501610527, "grad_norm": 0.08999673277139664, "learning_rate": 1.29863075421162e-05, "loss": 8.7972, "step": 113780 }, { "epoch": 0.5682538889859922, "grad_norm": 0.09515384584665298, "learning_rate": 1.2984805627174648e-05, "loss": 8.8005, "step": 113790 }, { "epoch": 0.5683038278109316, "grad_norm": 0.09427673369646072, "learning_rate": 1.2983303712233096e-05, "loss": 8.7889, "step": 113800 }, { "epoch": 0.568353766635871, "grad_norm": 0.09467284381389618, "learning_rate": 1.2981801797291547e-05, "loss": 8.8057, "step": 113810 }, { "epoch": 0.5684037054608105, "grad_norm": 0.09350381791591644, "learning_rate": 1.2980299882349997e-05, "loss": 8.7858, "step": 113820 }, { "epoch": 0.56845364428575, "grad_norm": 0.08991306275129318, "learning_rate": 1.2978797967408447e-05, "loss": 8.7852, "step": 113830 }, { "epoch": 0.5685035831106894, "grad_norm": 0.08979171514511108, "learning_rate": 1.2977296052466895e-05, "loss": 8.7917, "step": 113840 }, { "epoch": 0.5685535219356288, "grad_norm": 0.09261246025562286, "learning_rate": 1.2975794137525344e-05, "loss": 8.8051, "step": 113850 }, { "epoch": 0.5686034607605683, "grad_norm": 0.09397750347852707, "learning_rate": 1.2974292222583794e-05, "loss": 8.7922, "step": 113860 }, { "epoch": 0.5686533995855078, "grad_norm": 0.08991680294275284, "learning_rate": 1.2972790307642244e-05, "loss": 8.8009, "step": 113870 }, { "epoch": 0.5687033384104472, "grad_norm": 0.08592606335878372, "learning_rate": 1.2971288392700694e-05, "loss": 8.7829, "step": 113880 }, { "epoch": 0.5687532772353866, "grad_norm": 0.08787564933300018, "learning_rate": 1.2969786477759145e-05, "loss": 8.7925, "step": 113890 }, { "epoch": 0.5688032160603261, "grad_norm": 0.09271685034036636, "learning_rate": 1.2968284562817591e-05, "loss": 8.7928, "step": 113900 }, { "epoch": 0.5688531548852656, "grad_norm": 0.09563078731298447, "learning_rate": 1.2966782647876042e-05, "loss": 8.7901, "step": 113910 }, { "epoch": 0.568903093710205, "grad_norm": 0.08803986757993698, "learning_rate": 1.2965280732934492e-05, "loss": 8.7871, "step": 113920 }, { "epoch": 0.5689530325351444, "grad_norm": 0.09317762404680252, "learning_rate": 1.2963778817992942e-05, "loss": 8.7962, "step": 113930 }, { "epoch": 0.5690029713600839, "grad_norm": 0.09467476606369019, "learning_rate": 1.2962276903051392e-05, "loss": 8.8057, "step": 113940 }, { "epoch": 0.5690529101850234, "grad_norm": 0.0917622447013855, "learning_rate": 1.2960774988109839e-05, "loss": 8.7854, "step": 113950 }, { "epoch": 0.5691028490099628, "grad_norm": 0.09286537766456604, "learning_rate": 1.2959273073168289e-05, "loss": 8.7928, "step": 113960 }, { "epoch": 0.5691527878349022, "grad_norm": 0.09457229822874069, "learning_rate": 1.295777115822674e-05, "loss": 8.8018, "step": 113970 }, { "epoch": 0.5692027266598417, "grad_norm": 0.09735032916069031, "learning_rate": 1.295626924328519e-05, "loss": 8.7764, "step": 113980 }, { "epoch": 0.5692526654847812, "grad_norm": 0.09656371176242828, "learning_rate": 1.295476732834364e-05, "loss": 8.7861, "step": 113990 }, { "epoch": 0.5693026043097206, "grad_norm": 0.0926496610045433, "learning_rate": 1.2953265413402086e-05, "loss": 8.7779, "step": 114000 }, { "epoch": 0.56935254313466, "grad_norm": 0.09257472306489944, "learning_rate": 1.2951763498460537e-05, "loss": 8.7978, "step": 114010 }, { "epoch": 0.5694024819595995, "grad_norm": 0.09308171272277832, "learning_rate": 1.2950261583518987e-05, "loss": 8.7984, "step": 114020 }, { "epoch": 0.569452420784539, "grad_norm": 0.09056945145130157, "learning_rate": 1.2948759668577437e-05, "loss": 8.8057, "step": 114030 }, { "epoch": 0.5695023596094784, "grad_norm": 0.09501245617866516, "learning_rate": 1.2947257753635887e-05, "loss": 8.7859, "step": 114040 }, { "epoch": 0.5695522984344178, "grad_norm": 0.08686729520559311, "learning_rate": 1.2945755838694336e-05, "loss": 8.7834, "step": 114050 }, { "epoch": 0.5696022372593573, "grad_norm": 0.08709965646266937, "learning_rate": 1.2944253923752784e-05, "loss": 8.7863, "step": 114060 }, { "epoch": 0.5696521760842967, "grad_norm": 0.09584898501634598, "learning_rate": 1.2942752008811234e-05, "loss": 8.7893, "step": 114070 }, { "epoch": 0.5697021149092362, "grad_norm": 0.09185926616191864, "learning_rate": 1.2941250093869685e-05, "loss": 8.7821, "step": 114080 }, { "epoch": 0.5697520537341756, "grad_norm": 0.09461195766925812, "learning_rate": 1.2939748178928135e-05, "loss": 8.7804, "step": 114090 }, { "epoch": 0.5698019925591151, "grad_norm": 0.09803737699985504, "learning_rate": 1.2938246263986583e-05, "loss": 8.7888, "step": 114100 }, { "epoch": 0.5698519313840545, "grad_norm": 0.08924354612827301, "learning_rate": 1.2936744349045032e-05, "loss": 8.7978, "step": 114110 }, { "epoch": 0.569901870208994, "grad_norm": 0.0952063798904419, "learning_rate": 1.2935242434103482e-05, "loss": 8.8019, "step": 114120 }, { "epoch": 0.5699518090339334, "grad_norm": 0.09857337176799774, "learning_rate": 1.2933740519161932e-05, "loss": 8.7956, "step": 114130 }, { "epoch": 0.5700017478588729, "grad_norm": 0.09166187793016434, "learning_rate": 1.2932238604220382e-05, "loss": 8.7832, "step": 114140 }, { "epoch": 0.5700516866838123, "grad_norm": 0.09211006760597229, "learning_rate": 1.293073668927883e-05, "loss": 8.7998, "step": 114150 }, { "epoch": 0.5701016255087518, "grad_norm": 0.09411497414112091, "learning_rate": 1.292923477433728e-05, "loss": 8.7912, "step": 114160 }, { "epoch": 0.5701515643336912, "grad_norm": 0.09173117578029633, "learning_rate": 1.292773285939573e-05, "loss": 8.8004, "step": 114170 }, { "epoch": 0.5702015031586307, "grad_norm": 0.09168483316898346, "learning_rate": 1.292623094445418e-05, "loss": 8.7954, "step": 114180 }, { "epoch": 0.5702514419835701, "grad_norm": 0.09110462665557861, "learning_rate": 1.292472902951263e-05, "loss": 8.7839, "step": 114190 }, { "epoch": 0.5703013808085096, "grad_norm": 0.0920116975903511, "learning_rate": 1.2923227114571078e-05, "loss": 8.7785, "step": 114200 }, { "epoch": 0.570351319633449, "grad_norm": 0.08953049778938293, "learning_rate": 1.2921725199629528e-05, "loss": 8.7941, "step": 114210 }, { "epoch": 0.5704012584583885, "grad_norm": 0.09635299444198608, "learning_rate": 1.2920223284687977e-05, "loss": 8.7874, "step": 114220 }, { "epoch": 0.5704511972833279, "grad_norm": 0.09404581040143967, "learning_rate": 1.2918721369746427e-05, "loss": 8.7777, "step": 114230 }, { "epoch": 0.5705011361082674, "grad_norm": 0.08653539419174194, "learning_rate": 1.2917219454804877e-05, "loss": 8.7868, "step": 114240 }, { "epoch": 0.5705510749332068, "grad_norm": 0.08841050416231155, "learning_rate": 1.2915717539863326e-05, "loss": 8.7813, "step": 114250 }, { "epoch": 0.5706010137581463, "grad_norm": 0.0912218913435936, "learning_rate": 1.2914215624921776e-05, "loss": 8.8012, "step": 114260 }, { "epoch": 0.5706509525830857, "grad_norm": 0.09305412322282791, "learning_rate": 1.2912713709980224e-05, "loss": 8.8028, "step": 114270 }, { "epoch": 0.5707008914080252, "grad_norm": 0.09391267597675323, "learning_rate": 1.2911211795038675e-05, "loss": 8.7863, "step": 114280 }, { "epoch": 0.5707508302329646, "grad_norm": 0.09395688027143478, "learning_rate": 1.2909709880097125e-05, "loss": 8.796, "step": 114290 }, { "epoch": 0.5708007690579041, "grad_norm": 0.09076563268899918, "learning_rate": 1.2908207965155573e-05, "loss": 8.7863, "step": 114300 }, { "epoch": 0.5708507078828435, "grad_norm": 0.09484805911779404, "learning_rate": 1.2906706050214023e-05, "loss": 8.7788, "step": 114310 }, { "epoch": 0.570900646707783, "grad_norm": 0.0906333476305008, "learning_rate": 1.2905204135272472e-05, "loss": 8.7787, "step": 114320 }, { "epoch": 0.5709505855327224, "grad_norm": 0.0936955064535141, "learning_rate": 1.2903702220330922e-05, "loss": 8.7984, "step": 114330 }, { "epoch": 0.5710005243576619, "grad_norm": 0.09082095324993134, "learning_rate": 1.2902200305389372e-05, "loss": 8.7958, "step": 114340 }, { "epoch": 0.5710504631826013, "grad_norm": 0.096087247133255, "learning_rate": 1.290069839044782e-05, "loss": 8.7895, "step": 114350 }, { "epoch": 0.5711004020075408, "grad_norm": 0.09378685802221298, "learning_rate": 1.2899196475506271e-05, "loss": 8.7897, "step": 114360 }, { "epoch": 0.5711503408324802, "grad_norm": 0.09574133902788162, "learning_rate": 1.289769456056472e-05, "loss": 8.7893, "step": 114370 }, { "epoch": 0.5712002796574197, "grad_norm": 0.09573321789503098, "learning_rate": 1.289619264562317e-05, "loss": 8.7822, "step": 114380 }, { "epoch": 0.5712502184823591, "grad_norm": 0.0985926166176796, "learning_rate": 1.289469073068162e-05, "loss": 8.7745, "step": 114390 }, { "epoch": 0.5713001573072986, "grad_norm": 0.09592541307210922, "learning_rate": 1.2893188815740068e-05, "loss": 8.7653, "step": 114400 }, { "epoch": 0.571350096132238, "grad_norm": 0.09706228971481323, "learning_rate": 1.2891686900798518e-05, "loss": 8.787, "step": 114410 }, { "epoch": 0.5714000349571775, "grad_norm": 0.09211396425962448, "learning_rate": 1.2890184985856969e-05, "loss": 8.7994, "step": 114420 }, { "epoch": 0.5714499737821169, "grad_norm": 0.08897708356380463, "learning_rate": 1.2888683070915417e-05, "loss": 8.7914, "step": 114430 }, { "epoch": 0.5714999126070563, "grad_norm": 0.09851206839084625, "learning_rate": 1.2887181155973867e-05, "loss": 8.7865, "step": 114440 }, { "epoch": 0.5715498514319958, "grad_norm": 0.0946231335401535, "learning_rate": 1.2885679241032316e-05, "loss": 8.7873, "step": 114450 }, { "epoch": 0.5715997902569353, "grad_norm": 0.096876360476017, "learning_rate": 1.2884177326090766e-05, "loss": 8.7855, "step": 114460 }, { "epoch": 0.5716497290818747, "grad_norm": 0.09491303563117981, "learning_rate": 1.2882675411149216e-05, "loss": 8.806, "step": 114470 }, { "epoch": 0.5716996679068141, "grad_norm": 0.08952588587999344, "learning_rate": 1.2881173496207665e-05, "loss": 8.7896, "step": 114480 }, { "epoch": 0.5717496067317536, "grad_norm": 0.08914480358362198, "learning_rate": 1.2879671581266115e-05, "loss": 8.7765, "step": 114490 }, { "epoch": 0.5717995455566931, "grad_norm": 0.09196481853723526, "learning_rate": 1.2878169666324565e-05, "loss": 8.7767, "step": 114500 }, { "epoch": 0.5718494843816325, "grad_norm": 0.08663848787546158, "learning_rate": 1.2876667751383013e-05, "loss": 8.7991, "step": 114510 }, { "epoch": 0.5718994232065719, "grad_norm": 0.08811726421117783, "learning_rate": 1.2875165836441464e-05, "loss": 8.786, "step": 114520 }, { "epoch": 0.5719493620315114, "grad_norm": 0.09745927155017853, "learning_rate": 1.2873663921499912e-05, "loss": 8.7802, "step": 114530 }, { "epoch": 0.5719993008564509, "grad_norm": 0.09472652524709702, "learning_rate": 1.2872162006558362e-05, "loss": 8.7909, "step": 114540 }, { "epoch": 0.5720492396813903, "grad_norm": 0.10268702358007431, "learning_rate": 1.2870660091616812e-05, "loss": 8.7917, "step": 114550 }, { "epoch": 0.5720991785063297, "grad_norm": 0.09564602375030518, "learning_rate": 1.2869158176675261e-05, "loss": 8.7963, "step": 114560 }, { "epoch": 0.5721491173312692, "grad_norm": 0.08768244832754135, "learning_rate": 1.2867656261733711e-05, "loss": 8.7989, "step": 114570 }, { "epoch": 0.5721990561562087, "grad_norm": 0.09054630994796753, "learning_rate": 1.2866154346792161e-05, "loss": 8.7797, "step": 114580 }, { "epoch": 0.5722489949811481, "grad_norm": 0.09199676662683487, "learning_rate": 1.286465243185061e-05, "loss": 8.7763, "step": 114590 }, { "epoch": 0.5722989338060875, "grad_norm": 0.09266217797994614, "learning_rate": 1.286315051690906e-05, "loss": 8.793, "step": 114600 }, { "epoch": 0.572348872631027, "grad_norm": 0.09249533712863922, "learning_rate": 1.2861648601967508e-05, "loss": 8.7705, "step": 114610 }, { "epoch": 0.5723988114559665, "grad_norm": 0.0948820635676384, "learning_rate": 1.2860146687025959e-05, "loss": 8.7812, "step": 114620 }, { "epoch": 0.5724487502809059, "grad_norm": 0.08931874483823776, "learning_rate": 1.2858644772084409e-05, "loss": 8.782, "step": 114630 }, { "epoch": 0.5724986891058453, "grad_norm": 0.09403346478939056, "learning_rate": 1.2857142857142857e-05, "loss": 8.7976, "step": 114640 }, { "epoch": 0.5725486279307848, "grad_norm": 0.0919768437743187, "learning_rate": 1.2855640942201307e-05, "loss": 8.7873, "step": 114650 }, { "epoch": 0.5725985667557243, "grad_norm": 0.09091559052467346, "learning_rate": 1.2854139027259756e-05, "loss": 8.8047, "step": 114660 }, { "epoch": 0.5726485055806637, "grad_norm": 0.09633230417966843, "learning_rate": 1.2852637112318206e-05, "loss": 8.7847, "step": 114670 }, { "epoch": 0.5726984444056031, "grad_norm": 0.0949721410870552, "learning_rate": 1.2851135197376656e-05, "loss": 8.8071, "step": 114680 }, { "epoch": 0.5727483832305426, "grad_norm": 0.09185748547315598, "learning_rate": 1.2849633282435105e-05, "loss": 8.7908, "step": 114690 }, { "epoch": 0.5727983220554821, "grad_norm": 0.0928587019443512, "learning_rate": 1.2848131367493555e-05, "loss": 8.787, "step": 114700 }, { "epoch": 0.5728482608804215, "grad_norm": 0.09270650893449783, "learning_rate": 1.2846629452552003e-05, "loss": 8.7901, "step": 114710 }, { "epoch": 0.5728981997053609, "grad_norm": 0.09454198181629181, "learning_rate": 1.2845127537610454e-05, "loss": 8.7726, "step": 114720 }, { "epoch": 0.5729481385303004, "grad_norm": 0.09954354166984558, "learning_rate": 1.2843625622668904e-05, "loss": 8.7879, "step": 114730 }, { "epoch": 0.5729980773552398, "grad_norm": 0.09377159178256989, "learning_rate": 1.2842123707727354e-05, "loss": 8.7744, "step": 114740 }, { "epoch": 0.5730480161801793, "grad_norm": 0.08639419823884964, "learning_rate": 1.2840621792785802e-05, "loss": 8.7816, "step": 114750 }, { "epoch": 0.5730979550051187, "grad_norm": 0.0929587110877037, "learning_rate": 1.2839119877844251e-05, "loss": 8.7803, "step": 114760 }, { "epoch": 0.5731478938300582, "grad_norm": 0.09413690865039825, "learning_rate": 1.2837617962902701e-05, "loss": 8.7931, "step": 114770 }, { "epoch": 0.5731978326549976, "grad_norm": 0.09625568985939026, "learning_rate": 1.2836116047961151e-05, "loss": 8.774, "step": 114780 }, { "epoch": 0.5732477714799371, "grad_norm": 0.09371865540742874, "learning_rate": 1.2834614133019601e-05, "loss": 8.797, "step": 114790 }, { "epoch": 0.5732977103048765, "grad_norm": 0.09570909291505814, "learning_rate": 1.283311221807805e-05, "loss": 8.7847, "step": 114800 }, { "epoch": 0.573347649129816, "grad_norm": 0.09836754947900772, "learning_rate": 1.2831610303136498e-05, "loss": 8.7969, "step": 114810 }, { "epoch": 0.5733975879547554, "grad_norm": 0.08818056434392929, "learning_rate": 1.2830108388194949e-05, "loss": 8.7888, "step": 114820 }, { "epoch": 0.5734475267796949, "grad_norm": 0.09481636434793472, "learning_rate": 1.2828606473253399e-05, "loss": 8.7942, "step": 114830 }, { "epoch": 0.5734974656046343, "grad_norm": 0.09127473831176758, "learning_rate": 1.2827104558311849e-05, "loss": 8.7794, "step": 114840 }, { "epoch": 0.5735474044295737, "grad_norm": 0.08786597847938538, "learning_rate": 1.2825602643370297e-05, "loss": 8.7874, "step": 114850 }, { "epoch": 0.5735973432545132, "grad_norm": 0.10287149995565414, "learning_rate": 1.2824100728428746e-05, "loss": 8.7911, "step": 114860 }, { "epoch": 0.5736472820794527, "grad_norm": 0.09321471303701401, "learning_rate": 1.2822598813487196e-05, "loss": 8.7907, "step": 114870 }, { "epoch": 0.5736972209043921, "grad_norm": 0.08879370242357254, "learning_rate": 1.2821096898545646e-05, "loss": 8.7877, "step": 114880 }, { "epoch": 0.5737471597293315, "grad_norm": 0.0937252789735794, "learning_rate": 1.2819594983604096e-05, "loss": 8.7674, "step": 114890 }, { "epoch": 0.573797098554271, "grad_norm": 0.09370169043540955, "learning_rate": 1.2818093068662547e-05, "loss": 8.7896, "step": 114900 }, { "epoch": 0.5738470373792105, "grad_norm": 0.09470123052597046, "learning_rate": 1.2816591153720993e-05, "loss": 8.7953, "step": 114910 }, { "epoch": 0.5738969762041499, "grad_norm": 0.09248079359531403, "learning_rate": 1.2815089238779444e-05, "loss": 8.79, "step": 114920 }, { "epoch": 0.5739469150290893, "grad_norm": 0.09325503557920456, "learning_rate": 1.2813587323837894e-05, "loss": 8.7838, "step": 114930 }, { "epoch": 0.5739968538540288, "grad_norm": 0.0931696742773056, "learning_rate": 1.2812085408896344e-05, "loss": 8.7742, "step": 114940 }, { "epoch": 0.5740467926789683, "grad_norm": 0.09374930709600449, "learning_rate": 1.2810583493954794e-05, "loss": 8.7925, "step": 114950 }, { "epoch": 0.5740967315039077, "grad_norm": 0.08893626183271408, "learning_rate": 1.2809081579013241e-05, "loss": 8.7882, "step": 114960 }, { "epoch": 0.5741466703288471, "grad_norm": 0.08865047246217728, "learning_rate": 1.2807579664071691e-05, "loss": 8.7788, "step": 114970 }, { "epoch": 0.5741966091537866, "grad_norm": 0.09421852231025696, "learning_rate": 1.2806077749130141e-05, "loss": 8.7768, "step": 114980 }, { "epoch": 0.5742465479787261, "grad_norm": 0.08916622400283813, "learning_rate": 1.2804575834188591e-05, "loss": 8.785, "step": 114990 }, { "epoch": 0.5742964868036655, "grad_norm": 0.08971542119979858, "learning_rate": 1.2803073919247042e-05, "loss": 8.7801, "step": 115000 }, { "epoch": 0.5743464256286049, "grad_norm": 0.09803227335214615, "learning_rate": 1.2801572004305488e-05, "loss": 8.7843, "step": 115010 }, { "epoch": 0.5743963644535444, "grad_norm": 0.0927937850356102, "learning_rate": 1.2800070089363939e-05, "loss": 8.7938, "step": 115020 }, { "epoch": 0.5744463032784839, "grad_norm": 0.09266925603151321, "learning_rate": 1.2798568174422389e-05, "loss": 8.7783, "step": 115030 }, { "epoch": 0.5744962421034233, "grad_norm": 0.09375444054603577, "learning_rate": 1.2797066259480839e-05, "loss": 8.7916, "step": 115040 }, { "epoch": 0.5745461809283627, "grad_norm": 0.09235719591379166, "learning_rate": 1.2795564344539289e-05, "loss": 8.79, "step": 115050 }, { "epoch": 0.5745961197533022, "grad_norm": 0.09506914019584656, "learning_rate": 1.2794062429597738e-05, "loss": 8.7758, "step": 115060 }, { "epoch": 0.5746460585782417, "grad_norm": 0.09061712771654129, "learning_rate": 1.2792560514656186e-05, "loss": 8.785, "step": 115070 }, { "epoch": 0.5746959974031811, "grad_norm": 0.09398726373910904, "learning_rate": 1.2791058599714636e-05, "loss": 8.7835, "step": 115080 }, { "epoch": 0.5747459362281205, "grad_norm": 0.09454075247049332, "learning_rate": 1.2789556684773086e-05, "loss": 8.7984, "step": 115090 }, { "epoch": 0.57479587505306, "grad_norm": 0.09011261910200119, "learning_rate": 1.2788054769831537e-05, "loss": 8.7811, "step": 115100 }, { "epoch": 0.5748458138779995, "grad_norm": 0.09368349611759186, "learning_rate": 1.2786552854889985e-05, "loss": 8.7701, "step": 115110 }, { "epoch": 0.5748957527029389, "grad_norm": 0.09060084819793701, "learning_rate": 1.2785050939948434e-05, "loss": 8.78, "step": 115120 }, { "epoch": 0.5749456915278783, "grad_norm": 0.09300053119659424, "learning_rate": 1.2783549025006884e-05, "loss": 8.7671, "step": 115130 }, { "epoch": 0.5749956303528178, "grad_norm": 0.09098996222019196, "learning_rate": 1.2782047110065334e-05, "loss": 8.7889, "step": 115140 }, { "epoch": 0.5750455691777573, "grad_norm": 0.09513608366250992, "learning_rate": 1.2780545195123784e-05, "loss": 8.7721, "step": 115150 }, { "epoch": 0.5750955080026967, "grad_norm": 0.09404340386390686, "learning_rate": 1.2779043280182233e-05, "loss": 8.7911, "step": 115160 }, { "epoch": 0.5751454468276361, "grad_norm": 0.0925978347659111, "learning_rate": 1.2777541365240681e-05, "loss": 8.784, "step": 115170 }, { "epoch": 0.5751953856525756, "grad_norm": 0.09595629572868347, "learning_rate": 1.2776039450299131e-05, "loss": 8.7783, "step": 115180 }, { "epoch": 0.5752453244775151, "grad_norm": 0.09918168187141418, "learning_rate": 1.2774537535357581e-05, "loss": 8.7753, "step": 115190 }, { "epoch": 0.5752952633024545, "grad_norm": 0.09095029532909393, "learning_rate": 1.2773035620416032e-05, "loss": 8.7743, "step": 115200 }, { "epoch": 0.5753452021273939, "grad_norm": 0.09520848840475082, "learning_rate": 1.277153370547448e-05, "loss": 8.7882, "step": 115210 }, { "epoch": 0.5753951409523334, "grad_norm": 0.0901302695274353, "learning_rate": 1.277003179053293e-05, "loss": 8.7796, "step": 115220 }, { "epoch": 0.5754450797772729, "grad_norm": 0.08923668414354324, "learning_rate": 1.2768529875591379e-05, "loss": 8.7896, "step": 115230 }, { "epoch": 0.5754950186022123, "grad_norm": 0.09207792580127716, "learning_rate": 1.2767027960649829e-05, "loss": 8.7796, "step": 115240 }, { "epoch": 0.5755449574271517, "grad_norm": 0.09478235244750977, "learning_rate": 1.2765526045708279e-05, "loss": 8.7792, "step": 115250 }, { "epoch": 0.5755948962520911, "grad_norm": 0.09395542740821838, "learning_rate": 1.2764024130766728e-05, "loss": 8.791, "step": 115260 }, { "epoch": 0.5756448350770307, "grad_norm": 0.09286878257989883, "learning_rate": 1.2762522215825178e-05, "loss": 8.7711, "step": 115270 }, { "epoch": 0.5756947739019701, "grad_norm": 0.09151309728622437, "learning_rate": 1.2761020300883626e-05, "loss": 8.785, "step": 115280 }, { "epoch": 0.5757447127269095, "grad_norm": 0.09942897409200668, "learning_rate": 1.2759518385942076e-05, "loss": 8.7799, "step": 115290 }, { "epoch": 0.575794651551849, "grad_norm": 0.09082534909248352, "learning_rate": 1.2758016471000527e-05, "loss": 8.7904, "step": 115300 }, { "epoch": 0.5758445903767885, "grad_norm": 0.09576486796140671, "learning_rate": 1.2756514556058975e-05, "loss": 8.7816, "step": 115310 }, { "epoch": 0.5758945292017279, "grad_norm": 0.09835550934076309, "learning_rate": 1.2755012641117425e-05, "loss": 8.7852, "step": 115320 }, { "epoch": 0.5759444680266673, "grad_norm": 0.09621113538742065, "learning_rate": 1.2753510726175874e-05, "loss": 8.7841, "step": 115330 }, { "epoch": 0.5759944068516067, "grad_norm": 0.09935037046670914, "learning_rate": 1.2752008811234324e-05, "loss": 8.7874, "step": 115340 }, { "epoch": 0.5760443456765463, "grad_norm": 0.09516027569770813, "learning_rate": 1.2750506896292774e-05, "loss": 8.7866, "step": 115350 }, { "epoch": 0.5760942845014857, "grad_norm": 0.09592787176370621, "learning_rate": 1.2749004981351223e-05, "loss": 8.7759, "step": 115360 }, { "epoch": 0.5761442233264251, "grad_norm": 0.09324520081281662, "learning_rate": 1.2747503066409673e-05, "loss": 8.7756, "step": 115370 }, { "epoch": 0.5761941621513645, "grad_norm": 0.09475477784872055, "learning_rate": 1.2746001151468123e-05, "loss": 8.7751, "step": 115380 }, { "epoch": 0.5762441009763041, "grad_norm": 0.09500320255756378, "learning_rate": 1.2744499236526571e-05, "loss": 8.7876, "step": 115390 }, { "epoch": 0.5762940398012435, "grad_norm": 0.09369548410177231, "learning_rate": 1.2742997321585022e-05, "loss": 8.7883, "step": 115400 }, { "epoch": 0.5763439786261829, "grad_norm": 0.0879288762807846, "learning_rate": 1.274149540664347e-05, "loss": 8.7739, "step": 115410 }, { "epoch": 0.5763939174511223, "grad_norm": 0.09155362099409103, "learning_rate": 1.273999349170192e-05, "loss": 8.7865, "step": 115420 }, { "epoch": 0.5764438562760619, "grad_norm": 0.09589493274688721, "learning_rate": 1.273849157676037e-05, "loss": 8.7779, "step": 115430 }, { "epoch": 0.5764937951010013, "grad_norm": 0.09291618317365646, "learning_rate": 1.2736989661818819e-05, "loss": 8.7973, "step": 115440 }, { "epoch": 0.5765437339259407, "grad_norm": 0.08438339829444885, "learning_rate": 1.273548774687727e-05, "loss": 8.779, "step": 115450 }, { "epoch": 0.5765936727508801, "grad_norm": 0.09597896039485931, "learning_rate": 1.2733985831935718e-05, "loss": 8.7717, "step": 115460 }, { "epoch": 0.5766436115758197, "grad_norm": 0.09120340645313263, "learning_rate": 1.2732483916994168e-05, "loss": 8.7793, "step": 115470 }, { "epoch": 0.5766935504007591, "grad_norm": 0.09236761182546616, "learning_rate": 1.2730982002052618e-05, "loss": 8.7749, "step": 115480 }, { "epoch": 0.5767434892256985, "grad_norm": 0.09631557762622833, "learning_rate": 1.2729480087111066e-05, "loss": 8.7847, "step": 115490 }, { "epoch": 0.5767934280506379, "grad_norm": 0.09939119219779968, "learning_rate": 1.2727978172169517e-05, "loss": 8.7732, "step": 115500 }, { "epoch": 0.5768433668755775, "grad_norm": 0.08828873187303543, "learning_rate": 1.2726476257227965e-05, "loss": 8.7684, "step": 115510 }, { "epoch": 0.5768933057005169, "grad_norm": 0.09669400006532669, "learning_rate": 1.2724974342286415e-05, "loss": 8.782, "step": 115520 }, { "epoch": 0.5769432445254563, "grad_norm": 0.08688271045684814, "learning_rate": 1.2723472427344866e-05, "loss": 8.7748, "step": 115530 }, { "epoch": 0.5769931833503957, "grad_norm": 0.09424971789121628, "learning_rate": 1.2721970512403316e-05, "loss": 8.7726, "step": 115540 }, { "epoch": 0.5770431221753353, "grad_norm": 0.09156131744384766, "learning_rate": 1.2720468597461764e-05, "loss": 8.7737, "step": 115550 }, { "epoch": 0.5770930610002747, "grad_norm": 0.09321281313896179, "learning_rate": 1.2718966682520213e-05, "loss": 8.7779, "step": 115560 }, { "epoch": 0.5771429998252141, "grad_norm": 0.09447400271892548, "learning_rate": 1.2717464767578663e-05, "loss": 8.7736, "step": 115570 }, { "epoch": 0.5771929386501535, "grad_norm": 0.09802529215812683, "learning_rate": 1.2715962852637113e-05, "loss": 8.7908, "step": 115580 }, { "epoch": 0.5772428774750931, "grad_norm": 0.09059146791696548, "learning_rate": 1.2714460937695563e-05, "loss": 8.7868, "step": 115590 }, { "epoch": 0.5772928163000325, "grad_norm": 0.09354998171329498, "learning_rate": 1.2712959022754012e-05, "loss": 8.7936, "step": 115600 }, { "epoch": 0.5773427551249719, "grad_norm": 0.09096765518188477, "learning_rate": 1.271145710781246e-05, "loss": 8.7762, "step": 115610 }, { "epoch": 0.5773926939499113, "grad_norm": 0.09621896594762802, "learning_rate": 1.270995519287091e-05, "loss": 8.7725, "step": 115620 }, { "epoch": 0.5774426327748509, "grad_norm": 0.09030432254076004, "learning_rate": 1.270845327792936e-05, "loss": 8.7869, "step": 115630 }, { "epoch": 0.5774925715997903, "grad_norm": 0.0910486951470375, "learning_rate": 1.270695136298781e-05, "loss": 8.7925, "step": 115640 }, { "epoch": 0.5775425104247297, "grad_norm": 0.09086786955595016, "learning_rate": 1.270544944804626e-05, "loss": 8.7813, "step": 115650 }, { "epoch": 0.5775924492496691, "grad_norm": 0.08619526028633118, "learning_rate": 1.2703947533104708e-05, "loss": 8.7701, "step": 115660 }, { "epoch": 0.5776423880746087, "grad_norm": 0.09702730923891068, "learning_rate": 1.2702445618163158e-05, "loss": 8.78, "step": 115670 }, { "epoch": 0.5776923268995481, "grad_norm": 0.0912908986210823, "learning_rate": 1.2700943703221608e-05, "loss": 8.7743, "step": 115680 }, { "epoch": 0.5777422657244875, "grad_norm": 0.09520822018384933, "learning_rate": 1.2699441788280058e-05, "loss": 8.7819, "step": 115690 }, { "epoch": 0.5777922045494269, "grad_norm": 0.0918937474489212, "learning_rate": 1.2697939873338508e-05, "loss": 8.7656, "step": 115700 }, { "epoch": 0.5778421433743663, "grad_norm": 0.09397950768470764, "learning_rate": 1.2696437958396955e-05, "loss": 8.7783, "step": 115710 }, { "epoch": 0.5778920821993059, "grad_norm": 0.09476295858621597, "learning_rate": 1.2694936043455405e-05, "loss": 8.7867, "step": 115720 }, { "epoch": 0.5779420210242453, "grad_norm": 0.09248529374599457, "learning_rate": 1.2693434128513856e-05, "loss": 8.7845, "step": 115730 }, { "epoch": 0.5779919598491847, "grad_norm": 0.0914454385638237, "learning_rate": 1.2691932213572306e-05, "loss": 8.7718, "step": 115740 }, { "epoch": 0.5780418986741241, "grad_norm": 0.09722574055194855, "learning_rate": 1.2690430298630756e-05, "loss": 8.7653, "step": 115750 }, { "epoch": 0.5780918374990637, "grad_norm": 0.09682019799947739, "learning_rate": 1.2688928383689203e-05, "loss": 8.7623, "step": 115760 }, { "epoch": 0.5781417763240031, "grad_norm": 0.08837232738733292, "learning_rate": 1.2687426468747653e-05, "loss": 8.78, "step": 115770 }, { "epoch": 0.5781917151489425, "grad_norm": 0.0944056510925293, "learning_rate": 1.2685924553806103e-05, "loss": 8.7849, "step": 115780 }, { "epoch": 0.5782416539738819, "grad_norm": 0.0868343710899353, "learning_rate": 1.2684422638864553e-05, "loss": 8.7678, "step": 115790 }, { "epoch": 0.5782915927988215, "grad_norm": 0.09442039579153061, "learning_rate": 1.2682920723923003e-05, "loss": 8.7778, "step": 115800 }, { "epoch": 0.5783415316237609, "grad_norm": 0.08963579684495926, "learning_rate": 1.268141880898145e-05, "loss": 8.7754, "step": 115810 }, { "epoch": 0.5783914704487003, "grad_norm": 0.09150784462690353, "learning_rate": 1.26799168940399e-05, "loss": 8.7636, "step": 115820 }, { "epoch": 0.5784414092736397, "grad_norm": 0.09078077971935272, "learning_rate": 1.267841497909835e-05, "loss": 8.7658, "step": 115830 }, { "epoch": 0.5784913480985793, "grad_norm": 0.09242097288370132, "learning_rate": 1.26769130641568e-05, "loss": 8.7743, "step": 115840 }, { "epoch": 0.5785412869235187, "grad_norm": 0.09405604004859924, "learning_rate": 1.2675411149215251e-05, "loss": 8.7668, "step": 115850 }, { "epoch": 0.5785912257484581, "grad_norm": 0.09389903396368027, "learning_rate": 1.26739092342737e-05, "loss": 8.7732, "step": 115860 }, { "epoch": 0.5786411645733975, "grad_norm": 0.08914590626955032, "learning_rate": 1.2672407319332148e-05, "loss": 8.7808, "step": 115870 }, { "epoch": 0.5786911033983371, "grad_norm": 0.09185650944709778, "learning_rate": 1.2670905404390598e-05, "loss": 8.779, "step": 115880 }, { "epoch": 0.5787410422232765, "grad_norm": 0.091721311211586, "learning_rate": 1.2669403489449048e-05, "loss": 8.778, "step": 115890 }, { "epoch": 0.5787909810482159, "grad_norm": 0.08753747493028641, "learning_rate": 1.2667901574507498e-05, "loss": 8.7874, "step": 115900 }, { "epoch": 0.5788409198731553, "grad_norm": 0.09685467183589935, "learning_rate": 1.2666399659565947e-05, "loss": 8.7786, "step": 115910 }, { "epoch": 0.5788908586980949, "grad_norm": 0.09479455649852753, "learning_rate": 1.2664897744624395e-05, "loss": 8.7859, "step": 115920 }, { "epoch": 0.5789407975230343, "grad_norm": 0.09902866929769516, "learning_rate": 1.2663395829682846e-05, "loss": 8.7763, "step": 115930 }, { "epoch": 0.5789907363479737, "grad_norm": 0.09214528650045395, "learning_rate": 1.2661893914741296e-05, "loss": 8.7802, "step": 115940 }, { "epoch": 0.5790406751729131, "grad_norm": 0.09406938403844833, "learning_rate": 1.2660391999799746e-05, "loss": 8.7633, "step": 115950 }, { "epoch": 0.5790906139978527, "grad_norm": 0.0909062996506691, "learning_rate": 1.2658890084858194e-05, "loss": 8.7809, "step": 115960 }, { "epoch": 0.5791405528227921, "grad_norm": 0.08931753784418106, "learning_rate": 1.2657388169916643e-05, "loss": 8.781, "step": 115970 }, { "epoch": 0.5791904916477315, "grad_norm": 0.100962333381176, "learning_rate": 1.2655886254975093e-05, "loss": 8.7796, "step": 115980 }, { "epoch": 0.5792404304726709, "grad_norm": 0.09826654195785522, "learning_rate": 1.2654384340033543e-05, "loss": 8.7885, "step": 115990 }, { "epoch": 0.5792903692976105, "grad_norm": 0.09005758911371231, "learning_rate": 1.2652882425091993e-05, "loss": 8.7879, "step": 116000 }, { "epoch": 0.5793403081225499, "grad_norm": 0.09193582087755203, "learning_rate": 1.2651380510150442e-05, "loss": 8.7698, "step": 116010 }, { "epoch": 0.5793902469474893, "grad_norm": 0.09053417295217514, "learning_rate": 1.2649878595208892e-05, "loss": 8.7836, "step": 116020 }, { "epoch": 0.5794401857724287, "grad_norm": 0.09887261688709259, "learning_rate": 1.264837668026734e-05, "loss": 8.7667, "step": 116030 }, { "epoch": 0.5794901245973683, "grad_norm": 0.09462026506662369, "learning_rate": 1.264687476532579e-05, "loss": 8.7674, "step": 116040 }, { "epoch": 0.5795400634223077, "grad_norm": 0.0957246646285057, "learning_rate": 1.2645372850384241e-05, "loss": 8.7798, "step": 116050 }, { "epoch": 0.5795900022472471, "grad_norm": 0.10332443565130234, "learning_rate": 1.264387093544269e-05, "loss": 8.7828, "step": 116060 }, { "epoch": 0.5796399410721865, "grad_norm": 0.08929960429668427, "learning_rate": 1.264236902050114e-05, "loss": 8.7821, "step": 116070 }, { "epoch": 0.5796898798971261, "grad_norm": 0.09699279814958572, "learning_rate": 1.2640867105559588e-05, "loss": 8.7763, "step": 116080 }, { "epoch": 0.5797398187220655, "grad_norm": 0.0911206379532814, "learning_rate": 1.2639365190618038e-05, "loss": 8.7705, "step": 116090 }, { "epoch": 0.5797897575470049, "grad_norm": 0.09146827459335327, "learning_rate": 1.2637863275676488e-05, "loss": 8.7882, "step": 116100 }, { "epoch": 0.5798396963719443, "grad_norm": 0.09372538328170776, "learning_rate": 1.2636361360734937e-05, "loss": 8.7599, "step": 116110 }, { "epoch": 0.5798896351968839, "grad_norm": 0.09762945026159286, "learning_rate": 1.2634859445793387e-05, "loss": 8.7786, "step": 116120 }, { "epoch": 0.5799395740218233, "grad_norm": 0.09360945969820023, "learning_rate": 1.2633357530851836e-05, "loss": 8.7724, "step": 116130 }, { "epoch": 0.5799895128467627, "grad_norm": 0.09288088977336884, "learning_rate": 1.2631855615910286e-05, "loss": 8.7698, "step": 116140 }, { "epoch": 0.5800394516717021, "grad_norm": 0.09539160132408142, "learning_rate": 1.2630353700968736e-05, "loss": 8.7872, "step": 116150 }, { "epoch": 0.5800893904966417, "grad_norm": 0.08673470467329025, "learning_rate": 1.2628851786027184e-05, "loss": 8.7901, "step": 116160 }, { "epoch": 0.5801393293215811, "grad_norm": 0.09566935151815414, "learning_rate": 1.2627349871085635e-05, "loss": 8.7816, "step": 116170 }, { "epoch": 0.5801892681465205, "grad_norm": 0.09150587022304535, "learning_rate": 1.2625847956144085e-05, "loss": 8.7651, "step": 116180 }, { "epoch": 0.5802392069714599, "grad_norm": 0.08590695261955261, "learning_rate": 1.2624346041202533e-05, "loss": 8.7845, "step": 116190 }, { "epoch": 0.5802891457963995, "grad_norm": 0.09772205352783203, "learning_rate": 1.2622844126260983e-05, "loss": 8.7651, "step": 116200 }, { "epoch": 0.5803390846213389, "grad_norm": 0.09127950668334961, "learning_rate": 1.2621342211319432e-05, "loss": 8.7905, "step": 116210 }, { "epoch": 0.5803890234462783, "grad_norm": 0.09437844902276993, "learning_rate": 1.2619840296377882e-05, "loss": 8.7782, "step": 116220 }, { "epoch": 0.5804389622712177, "grad_norm": 0.08986880630254745, "learning_rate": 1.2618338381436332e-05, "loss": 8.7814, "step": 116230 }, { "epoch": 0.5804889010961573, "grad_norm": 0.09137626737356186, "learning_rate": 1.261683646649478e-05, "loss": 8.7681, "step": 116240 }, { "epoch": 0.5805388399210967, "grad_norm": 0.08857541531324387, "learning_rate": 1.2615334551553231e-05, "loss": 8.776, "step": 116250 }, { "epoch": 0.5805887787460361, "grad_norm": 0.0896126851439476, "learning_rate": 1.261383263661168e-05, "loss": 8.7803, "step": 116260 }, { "epoch": 0.5806387175709755, "grad_norm": 0.08985196799039841, "learning_rate": 1.261233072167013e-05, "loss": 8.7829, "step": 116270 }, { "epoch": 0.580688656395915, "grad_norm": 0.09273276478052139, "learning_rate": 1.261082880672858e-05, "loss": 8.7826, "step": 116280 }, { "epoch": 0.5807385952208545, "grad_norm": 0.08667043596506119, "learning_rate": 1.2609326891787028e-05, "loss": 8.7681, "step": 116290 }, { "epoch": 0.5807885340457939, "grad_norm": 0.09057953208684921, "learning_rate": 1.2607824976845478e-05, "loss": 8.7694, "step": 116300 }, { "epoch": 0.5808384728707333, "grad_norm": 0.09501492232084274, "learning_rate": 1.2606323061903927e-05, "loss": 8.7712, "step": 116310 }, { "epoch": 0.5808884116956728, "grad_norm": 0.08906516432762146, "learning_rate": 1.2604821146962377e-05, "loss": 8.788, "step": 116320 }, { "epoch": 0.5809383505206123, "grad_norm": 0.09163739532232285, "learning_rate": 1.2603319232020827e-05, "loss": 8.7738, "step": 116330 }, { "epoch": 0.5809882893455517, "grad_norm": 0.08950494229793549, "learning_rate": 1.2601817317079277e-05, "loss": 8.7851, "step": 116340 }, { "epoch": 0.5810382281704911, "grad_norm": 0.09091368317604065, "learning_rate": 1.2600315402137726e-05, "loss": 8.7618, "step": 116350 }, { "epoch": 0.5810881669954306, "grad_norm": 0.09785959124565125, "learning_rate": 1.2598813487196174e-05, "loss": 8.7833, "step": 116360 }, { "epoch": 0.5811381058203701, "grad_norm": 0.09358986467123032, "learning_rate": 1.2597311572254625e-05, "loss": 8.7742, "step": 116370 }, { "epoch": 0.5811880446453095, "grad_norm": 0.09452328830957413, "learning_rate": 1.2595809657313075e-05, "loss": 8.7679, "step": 116380 }, { "epoch": 0.5812379834702489, "grad_norm": 0.09855862706899643, "learning_rate": 1.2594307742371525e-05, "loss": 8.7724, "step": 116390 }, { "epoch": 0.5812879222951884, "grad_norm": 0.09987233579158783, "learning_rate": 1.2592805827429973e-05, "loss": 8.7737, "step": 116400 }, { "epoch": 0.5813378611201279, "grad_norm": 0.09587401151657104, "learning_rate": 1.2591303912488422e-05, "loss": 8.7721, "step": 116410 }, { "epoch": 0.5813877999450673, "grad_norm": 0.08716830611228943, "learning_rate": 1.2589801997546872e-05, "loss": 8.7768, "step": 116420 }, { "epoch": 0.5814377387700067, "grad_norm": 0.09757670015096664, "learning_rate": 1.2588300082605322e-05, "loss": 8.7556, "step": 116430 }, { "epoch": 0.5814876775949462, "grad_norm": 0.09444671124219894, "learning_rate": 1.2586798167663772e-05, "loss": 8.763, "step": 116440 }, { "epoch": 0.5815376164198857, "grad_norm": 0.091017946600914, "learning_rate": 1.2585296252722221e-05, "loss": 8.7753, "step": 116450 }, { "epoch": 0.5815875552448251, "grad_norm": 0.09647946804761887, "learning_rate": 1.258379433778067e-05, "loss": 8.7778, "step": 116460 }, { "epoch": 0.5816374940697645, "grad_norm": 0.09835417568683624, "learning_rate": 1.258229242283912e-05, "loss": 8.7821, "step": 116470 }, { "epoch": 0.581687432894704, "grad_norm": 0.09176066517829895, "learning_rate": 1.258079050789757e-05, "loss": 8.7742, "step": 116480 }, { "epoch": 0.5817373717196435, "grad_norm": 0.09349832683801651, "learning_rate": 1.257928859295602e-05, "loss": 8.7629, "step": 116490 }, { "epoch": 0.5817873105445829, "grad_norm": 0.0905727818608284, "learning_rate": 1.257778667801447e-05, "loss": 8.7742, "step": 116500 }, { "epoch": 0.5818372493695223, "grad_norm": 0.09379737824201584, "learning_rate": 1.2576284763072917e-05, "loss": 8.7643, "step": 116510 }, { "epoch": 0.5818871881944618, "grad_norm": 0.09791654348373413, "learning_rate": 1.2574782848131367e-05, "loss": 8.7811, "step": 116520 }, { "epoch": 0.5819371270194013, "grad_norm": 0.09698036313056946, "learning_rate": 1.2573280933189817e-05, "loss": 8.7858, "step": 116530 }, { "epoch": 0.5819870658443407, "grad_norm": 0.09158887714147568, "learning_rate": 1.2571779018248267e-05, "loss": 8.7815, "step": 116540 }, { "epoch": 0.5820370046692801, "grad_norm": 0.08619420230388641, "learning_rate": 1.2570277103306718e-05, "loss": 8.7721, "step": 116550 }, { "epoch": 0.5820869434942196, "grad_norm": 0.09012347459793091, "learning_rate": 1.2568775188365164e-05, "loss": 8.7651, "step": 116560 }, { "epoch": 0.582136882319159, "grad_norm": 0.0954323559999466, "learning_rate": 1.2567273273423615e-05, "loss": 8.7664, "step": 116570 }, { "epoch": 0.5821868211440985, "grad_norm": 0.09117588400840759, "learning_rate": 1.2565771358482065e-05, "loss": 8.7764, "step": 116580 }, { "epoch": 0.5822367599690379, "grad_norm": 0.09088102728128433, "learning_rate": 1.2564269443540515e-05, "loss": 8.7799, "step": 116590 }, { "epoch": 0.5822866987939774, "grad_norm": 0.09504615515470505, "learning_rate": 1.2562767528598965e-05, "loss": 8.7671, "step": 116600 }, { "epoch": 0.5823366376189169, "grad_norm": 0.09126445651054382, "learning_rate": 1.2561265613657414e-05, "loss": 8.7732, "step": 116610 }, { "epoch": 0.5823865764438563, "grad_norm": 0.0928245261311531, "learning_rate": 1.2559763698715862e-05, "loss": 8.7712, "step": 116620 }, { "epoch": 0.5824365152687957, "grad_norm": 0.09340432286262512, "learning_rate": 1.2558261783774312e-05, "loss": 8.7847, "step": 116630 }, { "epoch": 0.5824864540937352, "grad_norm": 0.09035646170377731, "learning_rate": 1.2556759868832762e-05, "loss": 8.7678, "step": 116640 }, { "epoch": 0.5825363929186747, "grad_norm": 0.09360393136739731, "learning_rate": 1.2555257953891213e-05, "loss": 8.7652, "step": 116650 }, { "epoch": 0.5825863317436141, "grad_norm": 0.09272781759500504, "learning_rate": 1.2553756038949661e-05, "loss": 8.7708, "step": 116660 }, { "epoch": 0.5826362705685535, "grad_norm": 0.09432695060968399, "learning_rate": 1.255225412400811e-05, "loss": 8.7648, "step": 116670 }, { "epoch": 0.582686209393493, "grad_norm": 0.10040241479873657, "learning_rate": 1.255075220906656e-05, "loss": 8.7673, "step": 116680 }, { "epoch": 0.5827361482184324, "grad_norm": 0.09128203243017197, "learning_rate": 1.254925029412501e-05, "loss": 8.7639, "step": 116690 }, { "epoch": 0.5827860870433719, "grad_norm": 0.08589877933263779, "learning_rate": 1.254774837918346e-05, "loss": 8.7859, "step": 116700 }, { "epoch": 0.5828360258683113, "grad_norm": 0.09354274719953537, "learning_rate": 1.2546246464241909e-05, "loss": 8.7649, "step": 116710 }, { "epoch": 0.5828859646932507, "grad_norm": 0.09442020207643509, "learning_rate": 1.2544744549300357e-05, "loss": 8.7609, "step": 116720 }, { "epoch": 0.5829359035181902, "grad_norm": 0.0930701419711113, "learning_rate": 1.2543242634358807e-05, "loss": 8.7645, "step": 116730 }, { "epoch": 0.5829858423431297, "grad_norm": 0.09396909922361374, "learning_rate": 1.2541740719417257e-05, "loss": 8.7725, "step": 116740 }, { "epoch": 0.5830357811680691, "grad_norm": 0.09508174657821655, "learning_rate": 1.2540238804475708e-05, "loss": 8.7659, "step": 116750 }, { "epoch": 0.5830857199930085, "grad_norm": 0.09655360132455826, "learning_rate": 1.2538736889534158e-05, "loss": 8.7918, "step": 116760 }, { "epoch": 0.583135658817948, "grad_norm": 0.09458701312541962, "learning_rate": 1.2537234974592605e-05, "loss": 8.7626, "step": 116770 }, { "epoch": 0.5831855976428875, "grad_norm": 0.09564093500375748, "learning_rate": 1.2535733059651055e-05, "loss": 8.77, "step": 116780 }, { "epoch": 0.5832355364678269, "grad_norm": 0.09138777107000351, "learning_rate": 1.2534231144709505e-05, "loss": 8.7656, "step": 116790 }, { "epoch": 0.5832854752927663, "grad_norm": 0.09551417827606201, "learning_rate": 1.2532729229767955e-05, "loss": 8.755, "step": 116800 }, { "epoch": 0.5833354141177058, "grad_norm": 0.09259354323148727, "learning_rate": 1.2531227314826405e-05, "loss": 8.7714, "step": 116810 }, { "epoch": 0.5833853529426453, "grad_norm": 0.09137726575136185, "learning_rate": 1.2529725399884854e-05, "loss": 8.7712, "step": 116820 }, { "epoch": 0.5834352917675847, "grad_norm": 0.08896147459745407, "learning_rate": 1.2528223484943302e-05, "loss": 8.7618, "step": 116830 }, { "epoch": 0.5834852305925241, "grad_norm": 0.09602434188127518, "learning_rate": 1.2526721570001752e-05, "loss": 8.7643, "step": 116840 }, { "epoch": 0.5835351694174636, "grad_norm": 0.0915607139468193, "learning_rate": 1.2525219655060203e-05, "loss": 8.7753, "step": 116850 }, { "epoch": 0.5835851082424031, "grad_norm": 0.09016492962837219, "learning_rate": 1.2523717740118653e-05, "loss": 8.7609, "step": 116860 }, { "epoch": 0.5836350470673425, "grad_norm": 0.08857151120901108, "learning_rate": 1.2522215825177101e-05, "loss": 8.7689, "step": 116870 }, { "epoch": 0.5836849858922819, "grad_norm": 0.09395987540483475, "learning_rate": 1.252071391023555e-05, "loss": 8.765, "step": 116880 }, { "epoch": 0.5837349247172214, "grad_norm": 0.09694436937570572, "learning_rate": 1.2519211995294e-05, "loss": 8.7747, "step": 116890 }, { "epoch": 0.5837848635421609, "grad_norm": 0.0934179425239563, "learning_rate": 1.251771008035245e-05, "loss": 8.7531, "step": 116900 }, { "epoch": 0.5838348023671003, "grad_norm": 0.08705223351716995, "learning_rate": 1.25162081654109e-05, "loss": 8.7724, "step": 116910 }, { "epoch": 0.5838847411920397, "grad_norm": 0.09162668138742447, "learning_rate": 1.2514706250469349e-05, "loss": 8.7788, "step": 116920 }, { "epoch": 0.5839346800169792, "grad_norm": 0.08836338669061661, "learning_rate": 1.2513204335527797e-05, "loss": 8.7742, "step": 116930 }, { "epoch": 0.5839846188419187, "grad_norm": 0.08714136481285095, "learning_rate": 1.2511702420586247e-05, "loss": 8.7651, "step": 116940 }, { "epoch": 0.5840345576668581, "grad_norm": 0.09328541159629822, "learning_rate": 1.2510200505644698e-05, "loss": 8.7717, "step": 116950 }, { "epoch": 0.5840844964917975, "grad_norm": 0.09145410358905792, "learning_rate": 1.2508698590703148e-05, "loss": 8.77, "step": 116960 }, { "epoch": 0.584134435316737, "grad_norm": 0.08732394874095917, "learning_rate": 1.2507196675761596e-05, "loss": 8.7813, "step": 116970 }, { "epoch": 0.5841843741416765, "grad_norm": 0.09267169237136841, "learning_rate": 1.2505694760820046e-05, "loss": 8.7717, "step": 116980 }, { "epoch": 0.5842343129666159, "grad_norm": 0.08930670469999313, "learning_rate": 1.2504192845878495e-05, "loss": 8.7648, "step": 116990 }, { "epoch": 0.5842842517915553, "grad_norm": 0.09315798431634903, "learning_rate": 1.2502690930936945e-05, "loss": 8.7702, "step": 117000 }, { "epoch": 0.5843341906164948, "grad_norm": 0.0956578180193901, "learning_rate": 1.2501189015995395e-05, "loss": 8.7584, "step": 117010 }, { "epoch": 0.5843841294414343, "grad_norm": 0.0876522958278656, "learning_rate": 1.2499687101053844e-05, "loss": 8.7719, "step": 117020 }, { "epoch": 0.5844340682663737, "grad_norm": 0.09839426726102829, "learning_rate": 1.2498185186112294e-05, "loss": 8.7733, "step": 117030 }, { "epoch": 0.5844840070913131, "grad_norm": 0.09518906474113464, "learning_rate": 1.2496683271170742e-05, "loss": 8.7664, "step": 117040 }, { "epoch": 0.5845339459162526, "grad_norm": 0.09161265939474106, "learning_rate": 1.2495181356229193e-05, "loss": 8.7651, "step": 117050 }, { "epoch": 0.584583884741192, "grad_norm": 0.08892537653446198, "learning_rate": 1.2493679441287643e-05, "loss": 8.7663, "step": 117060 }, { "epoch": 0.5846338235661315, "grad_norm": 0.09384747594594955, "learning_rate": 1.2492177526346091e-05, "loss": 8.7786, "step": 117070 }, { "epoch": 0.5846837623910709, "grad_norm": 0.09078042209148407, "learning_rate": 1.2490675611404542e-05, "loss": 8.7735, "step": 117080 }, { "epoch": 0.5847337012160104, "grad_norm": 0.09043184667825699, "learning_rate": 1.248917369646299e-05, "loss": 8.7555, "step": 117090 }, { "epoch": 0.5847836400409498, "grad_norm": 0.09139489382505417, "learning_rate": 1.248767178152144e-05, "loss": 8.7699, "step": 117100 }, { "epoch": 0.5848335788658893, "grad_norm": 0.09362711757421494, "learning_rate": 1.248616986657989e-05, "loss": 8.7641, "step": 117110 }, { "epoch": 0.5848835176908287, "grad_norm": 0.09047945588827133, "learning_rate": 1.2484667951638339e-05, "loss": 8.766, "step": 117120 }, { "epoch": 0.5849334565157682, "grad_norm": 0.09149231016635895, "learning_rate": 1.2483166036696789e-05, "loss": 8.7654, "step": 117130 }, { "epoch": 0.5849833953407076, "grad_norm": 0.09510745108127594, "learning_rate": 1.2481664121755237e-05, "loss": 8.7704, "step": 117140 }, { "epoch": 0.5850333341656471, "grad_norm": 0.09530346840620041, "learning_rate": 1.2480162206813688e-05, "loss": 8.777, "step": 117150 }, { "epoch": 0.5850832729905865, "grad_norm": 0.09704668074846268, "learning_rate": 1.2478660291872138e-05, "loss": 8.7655, "step": 117160 }, { "epoch": 0.585133211815526, "grad_norm": 0.095854252576828, "learning_rate": 1.2477158376930586e-05, "loss": 8.7692, "step": 117170 }, { "epoch": 0.5851831506404654, "grad_norm": 0.09275175631046295, "learning_rate": 1.2475656461989037e-05, "loss": 8.7775, "step": 117180 }, { "epoch": 0.5852330894654049, "grad_norm": 0.09659481048583984, "learning_rate": 1.2474154547047487e-05, "loss": 8.7822, "step": 117190 }, { "epoch": 0.5852830282903443, "grad_norm": 0.08653581887483597, "learning_rate": 1.2472652632105935e-05, "loss": 8.774, "step": 117200 }, { "epoch": 0.5853329671152838, "grad_norm": 0.0911843553185463, "learning_rate": 1.2471150717164385e-05, "loss": 8.7828, "step": 117210 }, { "epoch": 0.5853829059402232, "grad_norm": 0.09394507855176926, "learning_rate": 1.2469648802222834e-05, "loss": 8.7798, "step": 117220 }, { "epoch": 0.5854328447651627, "grad_norm": 0.0936194658279419, "learning_rate": 1.2468146887281284e-05, "loss": 8.7758, "step": 117230 }, { "epoch": 0.5854827835901021, "grad_norm": 0.09510088711977005, "learning_rate": 1.2466644972339734e-05, "loss": 8.7771, "step": 117240 }, { "epoch": 0.5855327224150416, "grad_norm": 0.09112706780433655, "learning_rate": 1.2465143057398183e-05, "loss": 8.7511, "step": 117250 }, { "epoch": 0.585582661239981, "grad_norm": 0.08992750197649002, "learning_rate": 1.2463641142456633e-05, "loss": 8.766, "step": 117260 }, { "epoch": 0.5856326000649205, "grad_norm": 0.09287506341934204, "learning_rate": 1.2462139227515081e-05, "loss": 8.7652, "step": 117270 }, { "epoch": 0.5856825388898599, "grad_norm": 0.09044741094112396, "learning_rate": 1.2460637312573532e-05, "loss": 8.7573, "step": 117280 }, { "epoch": 0.5857324777147994, "grad_norm": 0.09210161119699478, "learning_rate": 1.2459135397631982e-05, "loss": 8.7738, "step": 117290 }, { "epoch": 0.5857824165397388, "grad_norm": 0.08944103866815567, "learning_rate": 1.245763348269043e-05, "loss": 8.766, "step": 117300 }, { "epoch": 0.5858323553646783, "grad_norm": 0.09100234508514404, "learning_rate": 1.245613156774888e-05, "loss": 8.78, "step": 117310 }, { "epoch": 0.5858822941896177, "grad_norm": 0.09310677647590637, "learning_rate": 1.2454629652807329e-05, "loss": 8.7623, "step": 117320 }, { "epoch": 0.5859322330145572, "grad_norm": 0.09582335501909256, "learning_rate": 1.2453127737865779e-05, "loss": 8.7645, "step": 117330 }, { "epoch": 0.5859821718394966, "grad_norm": 0.09254664927721024, "learning_rate": 1.245162582292423e-05, "loss": 8.7731, "step": 117340 }, { "epoch": 0.5860321106644361, "grad_norm": 0.09486523270606995, "learning_rate": 1.245012390798268e-05, "loss": 8.7579, "step": 117350 }, { "epoch": 0.5860820494893755, "grad_norm": 0.101737841963768, "learning_rate": 1.2448621993041128e-05, "loss": 8.7666, "step": 117360 }, { "epoch": 0.586131988314315, "grad_norm": 0.08502329140901566, "learning_rate": 1.2447120078099576e-05, "loss": 8.7802, "step": 117370 }, { "epoch": 0.5861819271392544, "grad_norm": 0.09242632240056992, "learning_rate": 1.2445618163158027e-05, "loss": 8.7565, "step": 117380 }, { "epoch": 0.5862318659641939, "grad_norm": 0.08945038169622421, "learning_rate": 1.2444116248216477e-05, "loss": 8.7817, "step": 117390 }, { "epoch": 0.5862818047891333, "grad_norm": 0.09282662719488144, "learning_rate": 1.2442614333274927e-05, "loss": 8.7616, "step": 117400 }, { "epoch": 0.5863317436140728, "grad_norm": 0.08877874165773392, "learning_rate": 1.2441112418333375e-05, "loss": 8.7676, "step": 117410 }, { "epoch": 0.5863816824390122, "grad_norm": 0.09669318050146103, "learning_rate": 1.2439610503391824e-05, "loss": 8.7787, "step": 117420 }, { "epoch": 0.5864316212639517, "grad_norm": 0.08970874547958374, "learning_rate": 1.2438108588450274e-05, "loss": 8.7688, "step": 117430 }, { "epoch": 0.5864815600888911, "grad_norm": 0.09568735212087631, "learning_rate": 1.2436606673508724e-05, "loss": 8.7597, "step": 117440 }, { "epoch": 0.5865314989138306, "grad_norm": 0.09088582545518875, "learning_rate": 1.2435104758567174e-05, "loss": 8.7621, "step": 117450 }, { "epoch": 0.58658143773877, "grad_norm": 0.08822001516819, "learning_rate": 1.2433602843625623e-05, "loss": 8.7695, "step": 117460 }, { "epoch": 0.5866313765637095, "grad_norm": 0.09296117722988129, "learning_rate": 1.2432100928684071e-05, "loss": 8.7744, "step": 117470 }, { "epoch": 0.5866813153886489, "grad_norm": 0.09976507723331451, "learning_rate": 1.2430599013742522e-05, "loss": 8.7743, "step": 117480 }, { "epoch": 0.5867312542135884, "grad_norm": 0.09362848848104477, "learning_rate": 1.2429097098800972e-05, "loss": 8.7735, "step": 117490 }, { "epoch": 0.5867811930385278, "grad_norm": 0.08755030483007431, "learning_rate": 1.2427595183859422e-05, "loss": 8.772, "step": 117500 }, { "epoch": 0.5868311318634672, "grad_norm": 0.09280279278755188, "learning_rate": 1.2426093268917872e-05, "loss": 8.7612, "step": 117510 }, { "epoch": 0.5868810706884067, "grad_norm": 0.08822941035032272, "learning_rate": 1.2424591353976319e-05, "loss": 8.7682, "step": 117520 }, { "epoch": 0.5869310095133462, "grad_norm": 0.09698329120874405, "learning_rate": 1.2423089439034769e-05, "loss": 8.753, "step": 117530 }, { "epoch": 0.5869809483382856, "grad_norm": 0.09743697941303253, "learning_rate": 1.242158752409322e-05, "loss": 8.7608, "step": 117540 }, { "epoch": 0.587030887163225, "grad_norm": 0.09233416616916656, "learning_rate": 1.242008560915167e-05, "loss": 8.7681, "step": 117550 }, { "epoch": 0.5870808259881645, "grad_norm": 0.09132979065179825, "learning_rate": 1.241858369421012e-05, "loss": 8.7779, "step": 117560 }, { "epoch": 0.587130764813104, "grad_norm": 0.08776623755693436, "learning_rate": 1.2417081779268566e-05, "loss": 8.7762, "step": 117570 }, { "epoch": 0.5871807036380434, "grad_norm": 0.09042856097221375, "learning_rate": 1.2415579864327017e-05, "loss": 8.7604, "step": 117580 }, { "epoch": 0.5872306424629828, "grad_norm": 0.09197208285331726, "learning_rate": 1.2414077949385467e-05, "loss": 8.7629, "step": 117590 }, { "epoch": 0.5872805812879223, "grad_norm": 0.09105636179447174, "learning_rate": 1.2412576034443917e-05, "loss": 8.7535, "step": 117600 }, { "epoch": 0.5873305201128618, "grad_norm": 0.0907004103064537, "learning_rate": 1.2411074119502367e-05, "loss": 8.7604, "step": 117610 }, { "epoch": 0.5873804589378012, "grad_norm": 0.09452099353075027, "learning_rate": 1.2409572204560814e-05, "loss": 8.757, "step": 117620 }, { "epoch": 0.5874303977627406, "grad_norm": 0.09557855874300003, "learning_rate": 1.2408070289619264e-05, "loss": 8.7649, "step": 117630 }, { "epoch": 0.5874803365876801, "grad_norm": 0.09412212669849396, "learning_rate": 1.2406568374677714e-05, "loss": 8.7602, "step": 117640 }, { "epoch": 0.5875302754126196, "grad_norm": 0.09182430058717728, "learning_rate": 1.2405066459736164e-05, "loss": 8.7597, "step": 117650 }, { "epoch": 0.587580214237559, "grad_norm": 0.08901126682758331, "learning_rate": 1.2403564544794615e-05, "loss": 8.7648, "step": 117660 }, { "epoch": 0.5876301530624984, "grad_norm": 0.09120889008045197, "learning_rate": 1.2402062629853063e-05, "loss": 8.752, "step": 117670 }, { "epoch": 0.5876800918874379, "grad_norm": 0.09408021718263626, "learning_rate": 1.2400560714911512e-05, "loss": 8.7826, "step": 117680 }, { "epoch": 0.5877300307123773, "grad_norm": 0.0881025642156601, "learning_rate": 1.2399058799969962e-05, "loss": 8.7617, "step": 117690 }, { "epoch": 0.5877799695373168, "grad_norm": 0.09191694110631943, "learning_rate": 1.2397556885028412e-05, "loss": 8.7643, "step": 117700 }, { "epoch": 0.5878299083622562, "grad_norm": 0.08953646570444107, "learning_rate": 1.2396054970086862e-05, "loss": 8.7694, "step": 117710 }, { "epoch": 0.5878798471871957, "grad_norm": 0.08859028667211533, "learning_rate": 1.239455305514531e-05, "loss": 8.773, "step": 117720 }, { "epoch": 0.5879297860121351, "grad_norm": 0.09517339617013931, "learning_rate": 1.2393051140203759e-05, "loss": 8.7629, "step": 117730 }, { "epoch": 0.5879797248370746, "grad_norm": 0.09675818681716919, "learning_rate": 1.239154922526221e-05, "loss": 8.7473, "step": 117740 }, { "epoch": 0.588029663662014, "grad_norm": 0.09151841700077057, "learning_rate": 1.239004731032066e-05, "loss": 8.7576, "step": 117750 }, { "epoch": 0.5880796024869535, "grad_norm": 0.09673187136650085, "learning_rate": 1.238854539537911e-05, "loss": 8.7646, "step": 117760 }, { "epoch": 0.5881295413118929, "grad_norm": 0.0907597616314888, "learning_rate": 1.2387043480437558e-05, "loss": 8.7586, "step": 117770 }, { "epoch": 0.5881794801368324, "grad_norm": 0.09877236932516098, "learning_rate": 1.2385541565496007e-05, "loss": 8.759, "step": 117780 }, { "epoch": 0.5882294189617718, "grad_norm": 0.09087306261062622, "learning_rate": 1.2384039650554457e-05, "loss": 8.7683, "step": 117790 }, { "epoch": 0.5882793577867113, "grad_norm": 0.09113863110542297, "learning_rate": 1.2382537735612907e-05, "loss": 8.7498, "step": 117800 }, { "epoch": 0.5883292966116507, "grad_norm": 0.09708728641271591, "learning_rate": 1.2381035820671357e-05, "loss": 8.7612, "step": 117810 }, { "epoch": 0.5883792354365902, "grad_norm": 0.09207798540592194, "learning_rate": 1.2379533905729806e-05, "loss": 8.771, "step": 117820 }, { "epoch": 0.5884291742615296, "grad_norm": 0.09187018126249313, "learning_rate": 1.2378031990788256e-05, "loss": 8.7732, "step": 117830 }, { "epoch": 0.588479113086469, "grad_norm": 0.09126574546098709, "learning_rate": 1.2376530075846704e-05, "loss": 8.7423, "step": 117840 }, { "epoch": 0.5885290519114085, "grad_norm": 0.09289278835058212, "learning_rate": 1.2375028160905154e-05, "loss": 8.7448, "step": 117850 }, { "epoch": 0.588578990736348, "grad_norm": 0.09014803171157837, "learning_rate": 1.2373526245963605e-05, "loss": 8.7721, "step": 117860 }, { "epoch": 0.5886289295612874, "grad_norm": 0.09293773025274277, "learning_rate": 1.2372024331022053e-05, "loss": 8.7588, "step": 117870 }, { "epoch": 0.5886788683862269, "grad_norm": 0.08857384324073792, "learning_rate": 1.2370522416080503e-05, "loss": 8.7656, "step": 117880 }, { "epoch": 0.5887288072111663, "grad_norm": 0.09308899194002151, "learning_rate": 1.2369020501138952e-05, "loss": 8.7587, "step": 117890 }, { "epoch": 0.5887787460361058, "grad_norm": 0.09509921073913574, "learning_rate": 1.2367518586197402e-05, "loss": 8.7589, "step": 117900 }, { "epoch": 0.5888286848610452, "grad_norm": 0.090652696788311, "learning_rate": 1.2366016671255852e-05, "loss": 8.7635, "step": 117910 }, { "epoch": 0.5888786236859846, "grad_norm": 0.08929238468408585, "learning_rate": 1.23645147563143e-05, "loss": 8.7458, "step": 117920 }, { "epoch": 0.5889285625109241, "grad_norm": 0.09252835810184479, "learning_rate": 1.236301284137275e-05, "loss": 8.7504, "step": 117930 }, { "epoch": 0.5889785013358636, "grad_norm": 0.09552726149559021, "learning_rate": 1.23615109264312e-05, "loss": 8.7493, "step": 117940 }, { "epoch": 0.589028440160803, "grad_norm": 0.08871292322874069, "learning_rate": 1.236000901148965e-05, "loss": 8.7727, "step": 117950 }, { "epoch": 0.5890783789857424, "grad_norm": 0.09849249571561813, "learning_rate": 1.23585070965481e-05, "loss": 8.7701, "step": 117960 }, { "epoch": 0.5891283178106819, "grad_norm": 0.09064419567584991, "learning_rate": 1.2357005181606548e-05, "loss": 8.7677, "step": 117970 }, { "epoch": 0.5891782566356214, "grad_norm": 0.096795454621315, "learning_rate": 1.2355503266664998e-05, "loss": 8.7601, "step": 117980 }, { "epoch": 0.5892281954605608, "grad_norm": 0.09287415444850922, "learning_rate": 1.2354001351723448e-05, "loss": 8.7754, "step": 117990 }, { "epoch": 0.5892781342855002, "grad_norm": 0.09310322254896164, "learning_rate": 1.2352499436781897e-05, "loss": 8.7775, "step": 118000 }, { "epoch": 0.5893280731104397, "grad_norm": 0.09387247264385223, "learning_rate": 1.2350997521840347e-05, "loss": 8.7668, "step": 118010 }, { "epoch": 0.5893780119353792, "grad_norm": 0.09476472437381744, "learning_rate": 1.2349495606898796e-05, "loss": 8.7517, "step": 118020 }, { "epoch": 0.5894279507603186, "grad_norm": 0.09105034172534943, "learning_rate": 1.2347993691957246e-05, "loss": 8.7663, "step": 118030 }, { "epoch": 0.589477889585258, "grad_norm": 0.09070300310850143, "learning_rate": 1.2346491777015696e-05, "loss": 8.7565, "step": 118040 }, { "epoch": 0.5895278284101975, "grad_norm": 0.09422162175178528, "learning_rate": 1.2344989862074144e-05, "loss": 8.7575, "step": 118050 }, { "epoch": 0.589577767235137, "grad_norm": 0.08815252780914307, "learning_rate": 1.2343487947132595e-05, "loss": 8.7475, "step": 118060 }, { "epoch": 0.5896277060600764, "grad_norm": 0.08904334157705307, "learning_rate": 1.2341986032191043e-05, "loss": 8.7518, "step": 118070 }, { "epoch": 0.5896776448850158, "grad_norm": 0.09929101914167404, "learning_rate": 1.2340484117249493e-05, "loss": 8.7426, "step": 118080 }, { "epoch": 0.5897275837099553, "grad_norm": 0.09539781510829926, "learning_rate": 1.2338982202307943e-05, "loss": 8.7484, "step": 118090 }, { "epoch": 0.5897775225348948, "grad_norm": 0.09773596376180649, "learning_rate": 1.2337480287366392e-05, "loss": 8.7371, "step": 118100 }, { "epoch": 0.5898274613598342, "grad_norm": 0.09675204008817673, "learning_rate": 1.2335978372424842e-05, "loss": 8.7507, "step": 118110 }, { "epoch": 0.5898774001847736, "grad_norm": 0.09443136304616928, "learning_rate": 1.233447645748329e-05, "loss": 8.7688, "step": 118120 }, { "epoch": 0.5899273390097131, "grad_norm": 0.09123744815587997, "learning_rate": 1.233297454254174e-05, "loss": 8.7677, "step": 118130 }, { "epoch": 0.5899772778346526, "grad_norm": 0.09106560051441193, "learning_rate": 1.2331472627600191e-05, "loss": 8.7569, "step": 118140 }, { "epoch": 0.590027216659592, "grad_norm": 0.08924289792776108, "learning_rate": 1.2329970712658641e-05, "loss": 8.7577, "step": 118150 }, { "epoch": 0.5900771554845314, "grad_norm": 0.08827322721481323, "learning_rate": 1.232846879771709e-05, "loss": 8.7717, "step": 118160 }, { "epoch": 0.5901270943094709, "grad_norm": 0.08779691904783249, "learning_rate": 1.2326966882775538e-05, "loss": 8.7642, "step": 118170 }, { "epoch": 0.5901770331344104, "grad_norm": 0.09139509499073029, "learning_rate": 1.2325464967833988e-05, "loss": 8.777, "step": 118180 }, { "epoch": 0.5902269719593498, "grad_norm": 0.0951077789068222, "learning_rate": 1.2323963052892438e-05, "loss": 8.7487, "step": 118190 }, { "epoch": 0.5902769107842892, "grad_norm": 0.09444812685251236, "learning_rate": 1.2322461137950889e-05, "loss": 8.752, "step": 118200 }, { "epoch": 0.5903268496092287, "grad_norm": 0.0971173644065857, "learning_rate": 1.2320959223009337e-05, "loss": 8.7693, "step": 118210 }, { "epoch": 0.5903767884341682, "grad_norm": 0.08963154256343842, "learning_rate": 1.2319457308067786e-05, "loss": 8.7772, "step": 118220 }, { "epoch": 0.5904267272591076, "grad_norm": 0.09564877301454544, "learning_rate": 1.2317955393126236e-05, "loss": 8.771, "step": 118230 }, { "epoch": 0.590476666084047, "grad_norm": 0.09680573642253876, "learning_rate": 1.2316453478184686e-05, "loss": 8.7641, "step": 118240 }, { "epoch": 0.5905266049089865, "grad_norm": 0.09979761391878128, "learning_rate": 1.2314951563243136e-05, "loss": 8.7556, "step": 118250 }, { "epoch": 0.590576543733926, "grad_norm": 0.0942818745970726, "learning_rate": 1.2313449648301585e-05, "loss": 8.7516, "step": 118260 }, { "epoch": 0.5906264825588654, "grad_norm": 0.0858750268816948, "learning_rate": 1.2311947733360033e-05, "loss": 8.7616, "step": 118270 }, { "epoch": 0.5906764213838048, "grad_norm": 0.09308760613203049, "learning_rate": 1.2310445818418483e-05, "loss": 8.7686, "step": 118280 }, { "epoch": 0.5907263602087443, "grad_norm": 0.09751647710800171, "learning_rate": 1.2308943903476933e-05, "loss": 8.7575, "step": 118290 }, { "epoch": 0.5907762990336838, "grad_norm": 0.09501097351312637, "learning_rate": 1.2307441988535384e-05, "loss": 8.7436, "step": 118300 }, { "epoch": 0.5908262378586232, "grad_norm": 0.09237045794725418, "learning_rate": 1.2305940073593834e-05, "loss": 8.7537, "step": 118310 }, { "epoch": 0.5908761766835626, "grad_norm": 0.08817274123430252, "learning_rate": 1.230443815865228e-05, "loss": 8.7618, "step": 118320 }, { "epoch": 0.590926115508502, "grad_norm": 0.08590534329414368, "learning_rate": 1.230293624371073e-05, "loss": 8.7611, "step": 118330 }, { "epoch": 0.5909760543334416, "grad_norm": 0.09068264812231064, "learning_rate": 1.2301434328769181e-05, "loss": 8.7521, "step": 118340 }, { "epoch": 0.591025993158381, "grad_norm": 0.08929526805877686, "learning_rate": 1.2299932413827631e-05, "loss": 8.7686, "step": 118350 }, { "epoch": 0.5910759319833204, "grad_norm": 0.09027723968029022, "learning_rate": 1.2298430498886081e-05, "loss": 8.7556, "step": 118360 }, { "epoch": 0.5911258708082598, "grad_norm": 0.08829135447740555, "learning_rate": 1.2296928583944528e-05, "loss": 8.7631, "step": 118370 }, { "epoch": 0.5911758096331994, "grad_norm": 0.08966977894306183, "learning_rate": 1.2295426669002978e-05, "loss": 8.7739, "step": 118380 }, { "epoch": 0.5912257484581388, "grad_norm": 0.09342916309833527, "learning_rate": 1.2293924754061428e-05, "loss": 8.76, "step": 118390 }, { "epoch": 0.5912756872830782, "grad_norm": 0.09105119109153748, "learning_rate": 1.2292422839119879e-05, "loss": 8.7592, "step": 118400 }, { "epoch": 0.5913256261080176, "grad_norm": 0.09159651398658752, "learning_rate": 1.2290920924178329e-05, "loss": 8.7641, "step": 118410 }, { "epoch": 0.5913755649329572, "grad_norm": 0.08399590849876404, "learning_rate": 1.2289419009236776e-05, "loss": 8.7847, "step": 118420 }, { "epoch": 0.5914255037578966, "grad_norm": 0.08947955816984177, "learning_rate": 1.2287917094295226e-05, "loss": 8.7599, "step": 118430 }, { "epoch": 0.591475442582836, "grad_norm": 0.08784180134534836, "learning_rate": 1.2286415179353676e-05, "loss": 8.7619, "step": 118440 }, { "epoch": 0.5915253814077754, "grad_norm": 0.09851372241973877, "learning_rate": 1.2284913264412126e-05, "loss": 8.7478, "step": 118450 }, { "epoch": 0.591575320232715, "grad_norm": 0.09141961485147476, "learning_rate": 1.2283411349470576e-05, "loss": 8.7752, "step": 118460 }, { "epoch": 0.5916252590576544, "grad_norm": 0.09081381559371948, "learning_rate": 1.2281909434529025e-05, "loss": 8.7681, "step": 118470 }, { "epoch": 0.5916751978825938, "grad_norm": 0.0934201031923294, "learning_rate": 1.2280407519587473e-05, "loss": 8.7523, "step": 118480 }, { "epoch": 0.5917251367075332, "grad_norm": 0.09097079187631607, "learning_rate": 1.2278905604645923e-05, "loss": 8.7642, "step": 118490 }, { "epoch": 0.5917750755324728, "grad_norm": 0.09448742121458054, "learning_rate": 1.2277403689704374e-05, "loss": 8.7639, "step": 118500 }, { "epoch": 0.5918250143574122, "grad_norm": 0.09550192207098007, "learning_rate": 1.2275901774762824e-05, "loss": 8.7621, "step": 118510 }, { "epoch": 0.5918749531823516, "grad_norm": 0.0902007445693016, "learning_rate": 1.2274399859821272e-05, "loss": 8.7534, "step": 118520 }, { "epoch": 0.591924892007291, "grad_norm": 0.09537496417760849, "learning_rate": 1.227289794487972e-05, "loss": 8.7706, "step": 118530 }, { "epoch": 0.5919748308322306, "grad_norm": 0.09085094183683395, "learning_rate": 1.2271396029938171e-05, "loss": 8.755, "step": 118540 }, { "epoch": 0.59202476965717, "grad_norm": 0.09145759046077728, "learning_rate": 1.2269894114996621e-05, "loss": 8.7584, "step": 118550 }, { "epoch": 0.5920747084821094, "grad_norm": 0.08843749761581421, "learning_rate": 1.2268392200055071e-05, "loss": 8.7606, "step": 118560 }, { "epoch": 0.5921246473070488, "grad_norm": 0.08855432271957397, "learning_rate": 1.226689028511352e-05, "loss": 8.7522, "step": 118570 }, { "epoch": 0.5921745861319884, "grad_norm": 0.09247451275587082, "learning_rate": 1.2265388370171968e-05, "loss": 8.756, "step": 118580 }, { "epoch": 0.5922245249569278, "grad_norm": 0.09341256320476532, "learning_rate": 1.2263886455230418e-05, "loss": 8.7578, "step": 118590 }, { "epoch": 0.5922744637818672, "grad_norm": 0.08816874027252197, "learning_rate": 1.2262384540288869e-05, "loss": 8.7486, "step": 118600 }, { "epoch": 0.5923244026068066, "grad_norm": 0.08888726681470871, "learning_rate": 1.2260882625347319e-05, "loss": 8.7589, "step": 118610 }, { "epoch": 0.5923743414317462, "grad_norm": 0.08801273256540298, "learning_rate": 1.2259380710405767e-05, "loss": 8.7634, "step": 118620 }, { "epoch": 0.5924242802566856, "grad_norm": 0.09357626736164093, "learning_rate": 1.2257878795464218e-05, "loss": 8.7542, "step": 118630 }, { "epoch": 0.592474219081625, "grad_norm": 0.09778086096048355, "learning_rate": 1.2256376880522666e-05, "loss": 8.7537, "step": 118640 }, { "epoch": 0.5925241579065644, "grad_norm": 0.09161866456270218, "learning_rate": 1.2254874965581116e-05, "loss": 8.7466, "step": 118650 }, { "epoch": 0.592574096731504, "grad_norm": 0.09025179594755173, "learning_rate": 1.2253373050639566e-05, "loss": 8.7628, "step": 118660 }, { "epoch": 0.5926240355564434, "grad_norm": 0.09360655397176743, "learning_rate": 1.2251871135698015e-05, "loss": 8.7526, "step": 118670 }, { "epoch": 0.5926739743813828, "grad_norm": 0.09320328384637833, "learning_rate": 1.2250369220756465e-05, "loss": 8.7654, "step": 118680 }, { "epoch": 0.5927239132063222, "grad_norm": 0.08667218685150146, "learning_rate": 1.2248867305814913e-05, "loss": 8.763, "step": 118690 }, { "epoch": 0.5927738520312616, "grad_norm": 0.08786769956350327, "learning_rate": 1.2247365390873364e-05, "loss": 8.7849, "step": 118700 }, { "epoch": 0.5928237908562012, "grad_norm": 0.09707433730363846, "learning_rate": 1.2245863475931814e-05, "loss": 8.75, "step": 118710 }, { "epoch": 0.5928737296811406, "grad_norm": 0.09028863161802292, "learning_rate": 1.2244361560990262e-05, "loss": 8.7534, "step": 118720 }, { "epoch": 0.59292366850608, "grad_norm": 0.08839409798383713, "learning_rate": 1.2242859646048713e-05, "loss": 8.7801, "step": 118730 }, { "epoch": 0.5929736073310194, "grad_norm": 0.09288914501667023, "learning_rate": 1.2241357731107161e-05, "loss": 8.7674, "step": 118740 }, { "epoch": 0.593023546155959, "grad_norm": 0.08941594511270523, "learning_rate": 1.2239855816165611e-05, "loss": 8.7506, "step": 118750 }, { "epoch": 0.5930734849808984, "grad_norm": 0.08502016216516495, "learning_rate": 1.2238353901224061e-05, "loss": 8.7585, "step": 118760 }, { "epoch": 0.5931234238058378, "grad_norm": 0.09299997240304947, "learning_rate": 1.223685198628251e-05, "loss": 8.7511, "step": 118770 }, { "epoch": 0.5931733626307772, "grad_norm": 0.09595504403114319, "learning_rate": 1.223535007134096e-05, "loss": 8.7624, "step": 118780 }, { "epoch": 0.5932233014557168, "grad_norm": 0.09487087279558182, "learning_rate": 1.223384815639941e-05, "loss": 8.7711, "step": 118790 }, { "epoch": 0.5932732402806562, "grad_norm": 0.09738994389772415, "learning_rate": 1.2232346241457859e-05, "loss": 8.7638, "step": 118800 }, { "epoch": 0.5933231791055956, "grad_norm": 0.09411431103944778, "learning_rate": 1.2230844326516309e-05, "loss": 8.7554, "step": 118810 }, { "epoch": 0.593373117930535, "grad_norm": 0.09381046146154404, "learning_rate": 1.2229342411574757e-05, "loss": 8.7498, "step": 118820 }, { "epoch": 0.5934230567554746, "grad_norm": 0.08876951038837433, "learning_rate": 1.2227840496633208e-05, "loss": 8.7571, "step": 118830 }, { "epoch": 0.593472995580414, "grad_norm": 0.08969464153051376, "learning_rate": 1.2226338581691658e-05, "loss": 8.7615, "step": 118840 }, { "epoch": 0.5935229344053534, "grad_norm": 0.09235970675945282, "learning_rate": 1.2224836666750106e-05, "loss": 8.7604, "step": 118850 }, { "epoch": 0.5935728732302928, "grad_norm": 0.09277419745922089, "learning_rate": 1.2223334751808556e-05, "loss": 8.7511, "step": 118860 }, { "epoch": 0.5936228120552324, "grad_norm": 0.08957847207784653, "learning_rate": 1.2221832836867007e-05, "loss": 8.7614, "step": 118870 }, { "epoch": 0.5936727508801718, "grad_norm": 0.09086732566356659, "learning_rate": 1.2220330921925455e-05, "loss": 8.7601, "step": 118880 }, { "epoch": 0.5937226897051112, "grad_norm": 0.09175397455692291, "learning_rate": 1.2218829006983905e-05, "loss": 8.7584, "step": 118890 }, { "epoch": 0.5937726285300506, "grad_norm": 0.09422596544027328, "learning_rate": 1.2217327092042354e-05, "loss": 8.7508, "step": 118900 }, { "epoch": 0.5938225673549902, "grad_norm": 0.09395137429237366, "learning_rate": 1.2215825177100804e-05, "loss": 8.753, "step": 118910 }, { "epoch": 0.5938725061799296, "grad_norm": 0.08856262266635895, "learning_rate": 1.2214323262159254e-05, "loss": 8.7564, "step": 118920 }, { "epoch": 0.593922445004869, "grad_norm": 0.08992553502321243, "learning_rate": 1.2212821347217703e-05, "loss": 8.7466, "step": 118930 }, { "epoch": 0.5939723838298084, "grad_norm": 0.094720758497715, "learning_rate": 1.2211319432276153e-05, "loss": 8.7473, "step": 118940 }, { "epoch": 0.594022322654748, "grad_norm": 0.09277505427598953, "learning_rate": 1.2209817517334603e-05, "loss": 8.7496, "step": 118950 }, { "epoch": 0.5940722614796874, "grad_norm": 0.09039611369371414, "learning_rate": 1.2208315602393051e-05, "loss": 8.746, "step": 118960 }, { "epoch": 0.5941222003046268, "grad_norm": 0.09134664386510849, "learning_rate": 1.2206813687451502e-05, "loss": 8.7584, "step": 118970 }, { "epoch": 0.5941721391295662, "grad_norm": 0.09162914752960205, "learning_rate": 1.220531177250995e-05, "loss": 8.754, "step": 118980 }, { "epoch": 0.5942220779545058, "grad_norm": 0.0885622650384903, "learning_rate": 1.22038098575684e-05, "loss": 8.7476, "step": 118990 }, { "epoch": 0.5942720167794452, "grad_norm": 0.09717550873756409, "learning_rate": 1.220230794262685e-05, "loss": 8.759, "step": 119000 }, { "epoch": 0.5943219556043846, "grad_norm": 0.0922437235713005, "learning_rate": 1.2200806027685299e-05, "loss": 8.7528, "step": 119010 }, { "epoch": 0.594371894429324, "grad_norm": 0.08450714498758316, "learning_rate": 1.2199304112743749e-05, "loss": 8.7721, "step": 119020 }, { "epoch": 0.5944218332542636, "grad_norm": 0.09239663183689117, "learning_rate": 1.2197802197802198e-05, "loss": 8.7442, "step": 119030 }, { "epoch": 0.594471772079203, "grad_norm": 0.09686051309108734, "learning_rate": 1.2196300282860648e-05, "loss": 8.7684, "step": 119040 }, { "epoch": 0.5945217109041424, "grad_norm": 0.0940653383731842, "learning_rate": 1.2194798367919098e-05, "loss": 8.7579, "step": 119050 }, { "epoch": 0.5945716497290818, "grad_norm": 0.08960103243589401, "learning_rate": 1.2193296452977546e-05, "loss": 8.7545, "step": 119060 }, { "epoch": 0.5946215885540214, "grad_norm": 0.09707477688789368, "learning_rate": 1.2191794538035997e-05, "loss": 8.7575, "step": 119070 }, { "epoch": 0.5946715273789608, "grad_norm": 0.09536729753017426, "learning_rate": 1.2190292623094445e-05, "loss": 8.7516, "step": 119080 }, { "epoch": 0.5947214662039002, "grad_norm": 0.09189159423112869, "learning_rate": 1.2188790708152895e-05, "loss": 8.7709, "step": 119090 }, { "epoch": 0.5947714050288396, "grad_norm": 0.09311476349830627, "learning_rate": 1.2187288793211345e-05, "loss": 8.7534, "step": 119100 }, { "epoch": 0.5948213438537792, "grad_norm": 0.09297338128089905, "learning_rate": 1.2185786878269796e-05, "loss": 8.7442, "step": 119110 }, { "epoch": 0.5948712826787186, "grad_norm": 0.09508251398801804, "learning_rate": 1.2184284963328244e-05, "loss": 8.7543, "step": 119120 }, { "epoch": 0.594921221503658, "grad_norm": 0.09149755537509918, "learning_rate": 1.2182783048386693e-05, "loss": 8.741, "step": 119130 }, { "epoch": 0.5949711603285974, "grad_norm": 0.09345537424087524, "learning_rate": 1.2181281133445143e-05, "loss": 8.7353, "step": 119140 }, { "epoch": 0.595021099153537, "grad_norm": 0.08903185278177261, "learning_rate": 1.2179779218503593e-05, "loss": 8.7532, "step": 119150 }, { "epoch": 0.5950710379784764, "grad_norm": 0.08982034772634506, "learning_rate": 1.2178277303562043e-05, "loss": 8.7417, "step": 119160 }, { "epoch": 0.5951209768034158, "grad_norm": 0.09356318414211273, "learning_rate": 1.2176775388620492e-05, "loss": 8.7377, "step": 119170 }, { "epoch": 0.5951709156283552, "grad_norm": 0.09444409608840942, "learning_rate": 1.217527347367894e-05, "loss": 8.7521, "step": 119180 }, { "epoch": 0.5952208544532948, "grad_norm": 0.09872134029865265, "learning_rate": 1.217377155873739e-05, "loss": 8.7463, "step": 119190 }, { "epoch": 0.5952707932782342, "grad_norm": 0.09588941931724548, "learning_rate": 1.217226964379584e-05, "loss": 8.7578, "step": 119200 }, { "epoch": 0.5953207321031736, "grad_norm": 0.09109771251678467, "learning_rate": 1.217076772885429e-05, "loss": 8.7556, "step": 119210 }, { "epoch": 0.595370670928113, "grad_norm": 0.09148535132408142, "learning_rate": 1.2169265813912739e-05, "loss": 8.7584, "step": 119220 }, { "epoch": 0.5954206097530526, "grad_norm": 0.09875836223363876, "learning_rate": 1.2167763898971188e-05, "loss": 8.7525, "step": 119230 }, { "epoch": 0.595470548577992, "grad_norm": 0.09247757494449615, "learning_rate": 1.2166261984029638e-05, "loss": 8.7402, "step": 119240 }, { "epoch": 0.5955204874029314, "grad_norm": 0.09525955468416214, "learning_rate": 1.2164760069088088e-05, "loss": 8.7735, "step": 119250 }, { "epoch": 0.5955704262278708, "grad_norm": 0.09496864676475525, "learning_rate": 1.2163258154146538e-05, "loss": 8.7534, "step": 119260 }, { "epoch": 0.5956203650528104, "grad_norm": 0.09750999510288239, "learning_rate": 1.2161756239204988e-05, "loss": 8.7551, "step": 119270 }, { "epoch": 0.5956703038777498, "grad_norm": 0.0901816189289093, "learning_rate": 1.2160254324263435e-05, "loss": 8.7569, "step": 119280 }, { "epoch": 0.5957202427026892, "grad_norm": 0.09008877724409103, "learning_rate": 1.2158752409321885e-05, "loss": 8.7433, "step": 119290 }, { "epoch": 0.5957701815276286, "grad_norm": 0.08970709890127182, "learning_rate": 1.2157250494380335e-05, "loss": 8.7651, "step": 119300 }, { "epoch": 0.5958201203525681, "grad_norm": 0.09239272028207779, "learning_rate": 1.2155748579438786e-05, "loss": 8.746, "step": 119310 }, { "epoch": 0.5958700591775076, "grad_norm": 0.0948515385389328, "learning_rate": 1.2154246664497236e-05, "loss": 8.7497, "step": 119320 }, { "epoch": 0.595919998002447, "grad_norm": 0.09544006735086441, "learning_rate": 1.2152744749555683e-05, "loss": 8.7772, "step": 119330 }, { "epoch": 0.5959699368273864, "grad_norm": 0.092537522315979, "learning_rate": 1.2151242834614133e-05, "loss": 8.7515, "step": 119340 }, { "epoch": 0.596019875652326, "grad_norm": 0.09258683770895004, "learning_rate": 1.2149740919672583e-05, "loss": 8.7646, "step": 119350 }, { "epoch": 0.5960698144772654, "grad_norm": 0.09671740233898163, "learning_rate": 1.2148239004731033e-05, "loss": 8.7454, "step": 119360 }, { "epoch": 0.5961197533022048, "grad_norm": 0.0934329703450203, "learning_rate": 1.2146737089789483e-05, "loss": 8.7486, "step": 119370 }, { "epoch": 0.5961696921271442, "grad_norm": 0.09332381933927536, "learning_rate": 1.214523517484793e-05, "loss": 8.7586, "step": 119380 }, { "epoch": 0.5962196309520837, "grad_norm": 0.09403794258832932, "learning_rate": 1.214373325990638e-05, "loss": 8.7459, "step": 119390 }, { "epoch": 0.5962695697770232, "grad_norm": 0.097443126142025, "learning_rate": 1.214223134496483e-05, "loss": 8.7482, "step": 119400 }, { "epoch": 0.5963195086019626, "grad_norm": 0.09268827736377716, "learning_rate": 1.214072943002328e-05, "loss": 8.75, "step": 119410 }, { "epoch": 0.596369447426902, "grad_norm": 0.09062253683805466, "learning_rate": 1.213922751508173e-05, "loss": 8.7481, "step": 119420 }, { "epoch": 0.5964193862518415, "grad_norm": 0.08736632764339447, "learning_rate": 1.213772560014018e-05, "loss": 8.7588, "step": 119430 }, { "epoch": 0.596469325076781, "grad_norm": 0.09273076057434082, "learning_rate": 1.2136223685198628e-05, "loss": 8.7602, "step": 119440 }, { "epoch": 0.5965192639017204, "grad_norm": 0.0936063900589943, "learning_rate": 1.2134721770257078e-05, "loss": 8.7539, "step": 119450 }, { "epoch": 0.5965692027266598, "grad_norm": 0.09075089544057846, "learning_rate": 1.2133219855315528e-05, "loss": 8.7523, "step": 119460 }, { "epoch": 0.5966191415515993, "grad_norm": 0.09237466752529144, "learning_rate": 1.2131717940373978e-05, "loss": 8.7609, "step": 119470 }, { "epoch": 0.5966690803765388, "grad_norm": 0.09360762685537338, "learning_rate": 1.2130216025432427e-05, "loss": 8.7495, "step": 119480 }, { "epoch": 0.5967190192014782, "grad_norm": 0.09769395738840103, "learning_rate": 1.2128714110490875e-05, "loss": 8.7676, "step": 119490 }, { "epoch": 0.5967689580264176, "grad_norm": 0.0937671959400177, "learning_rate": 1.2127212195549325e-05, "loss": 8.7536, "step": 119500 }, { "epoch": 0.5968188968513571, "grad_norm": 0.09220635890960693, "learning_rate": 1.2125710280607776e-05, "loss": 8.7522, "step": 119510 }, { "epoch": 0.5968688356762966, "grad_norm": 0.0912616029381752, "learning_rate": 1.2124208365666226e-05, "loss": 8.7607, "step": 119520 }, { "epoch": 0.596918774501236, "grad_norm": 0.09352380782365799, "learning_rate": 1.2122706450724674e-05, "loss": 8.7563, "step": 119530 }, { "epoch": 0.5969687133261754, "grad_norm": 0.09166228026151657, "learning_rate": 1.2121204535783123e-05, "loss": 8.7546, "step": 119540 }, { "epoch": 0.5970186521511149, "grad_norm": 0.09042517840862274, "learning_rate": 1.2119702620841573e-05, "loss": 8.7494, "step": 119550 }, { "epoch": 0.5970685909760544, "grad_norm": 0.09637751430273056, "learning_rate": 1.2118200705900023e-05, "loss": 8.7369, "step": 119560 }, { "epoch": 0.5971185298009938, "grad_norm": 0.09652339667081833, "learning_rate": 1.2116698790958473e-05, "loss": 8.7499, "step": 119570 }, { "epoch": 0.5971684686259332, "grad_norm": 0.08974555879831314, "learning_rate": 1.2115196876016922e-05, "loss": 8.7598, "step": 119580 }, { "epoch": 0.5972184074508727, "grad_norm": 0.09256293624639511, "learning_rate": 1.2113694961075372e-05, "loss": 8.7288, "step": 119590 }, { "epoch": 0.5972683462758122, "grad_norm": 0.09114140272140503, "learning_rate": 1.211219304613382e-05, "loss": 8.77, "step": 119600 }, { "epoch": 0.5973182851007516, "grad_norm": 0.09448247402906418, "learning_rate": 1.211069113119227e-05, "loss": 8.7269, "step": 119610 }, { "epoch": 0.597368223925691, "grad_norm": 0.09690418839454651, "learning_rate": 1.210918921625072e-05, "loss": 8.7554, "step": 119620 }, { "epoch": 0.5974181627506305, "grad_norm": 0.09861066192388535, "learning_rate": 1.210768730130917e-05, "loss": 8.76, "step": 119630 }, { "epoch": 0.59746810157557, "grad_norm": 0.09121115505695343, "learning_rate": 1.210618538636762e-05, "loss": 8.7617, "step": 119640 }, { "epoch": 0.5975180404005094, "grad_norm": 0.09317606687545776, "learning_rate": 1.2104683471426068e-05, "loss": 8.7548, "step": 119650 }, { "epoch": 0.5975679792254488, "grad_norm": 0.08856779336929321, "learning_rate": 1.2103181556484518e-05, "loss": 8.7495, "step": 119660 }, { "epoch": 0.5976179180503882, "grad_norm": 0.09526504576206207, "learning_rate": 1.2101679641542968e-05, "loss": 8.7478, "step": 119670 }, { "epoch": 0.5976678568753278, "grad_norm": 0.0859864354133606, "learning_rate": 1.2100177726601417e-05, "loss": 8.7426, "step": 119680 }, { "epoch": 0.5977177957002672, "grad_norm": 0.08864925801753998, "learning_rate": 1.2098675811659867e-05, "loss": 8.7371, "step": 119690 }, { "epoch": 0.5977677345252066, "grad_norm": 0.08991964906454086, "learning_rate": 1.2097173896718315e-05, "loss": 8.7372, "step": 119700 }, { "epoch": 0.597817673350146, "grad_norm": 0.09090539813041687, "learning_rate": 1.2095671981776766e-05, "loss": 8.7734, "step": 119710 }, { "epoch": 0.5978676121750855, "grad_norm": 0.09099775552749634, "learning_rate": 1.2094170066835216e-05, "loss": 8.7409, "step": 119720 }, { "epoch": 0.597917551000025, "grad_norm": 0.08712445944547653, "learning_rate": 1.2092668151893664e-05, "loss": 8.7475, "step": 119730 }, { "epoch": 0.5979674898249644, "grad_norm": 0.08881444483995438, "learning_rate": 1.2091166236952114e-05, "loss": 8.7383, "step": 119740 }, { "epoch": 0.5980174286499038, "grad_norm": 0.0887836366891861, "learning_rate": 1.2089664322010565e-05, "loss": 8.7615, "step": 119750 }, { "epoch": 0.5980673674748433, "grad_norm": 0.09183062613010406, "learning_rate": 1.2088162407069013e-05, "loss": 8.7624, "step": 119760 }, { "epoch": 0.5981173062997828, "grad_norm": 0.09438557177782059, "learning_rate": 1.2086660492127463e-05, "loss": 8.7546, "step": 119770 }, { "epoch": 0.5981672451247222, "grad_norm": 0.09165972471237183, "learning_rate": 1.2085158577185912e-05, "loss": 8.7515, "step": 119780 }, { "epoch": 0.5982171839496616, "grad_norm": 0.0887795090675354, "learning_rate": 1.2083656662244362e-05, "loss": 8.7527, "step": 119790 }, { "epoch": 0.5982671227746011, "grad_norm": 0.09268735349178314, "learning_rate": 1.2082154747302812e-05, "loss": 8.7633, "step": 119800 }, { "epoch": 0.5983170615995406, "grad_norm": 0.09194294363260269, "learning_rate": 1.208065283236126e-05, "loss": 8.7406, "step": 119810 }, { "epoch": 0.59836700042448, "grad_norm": 0.09086401760578156, "learning_rate": 1.207915091741971e-05, "loss": 8.7622, "step": 119820 }, { "epoch": 0.5984169392494194, "grad_norm": 0.0923188179731369, "learning_rate": 1.207764900247816e-05, "loss": 8.7406, "step": 119830 }, { "epoch": 0.5984668780743589, "grad_norm": 0.09538555890321732, "learning_rate": 1.207614708753661e-05, "loss": 8.7567, "step": 119840 }, { "epoch": 0.5985168168992984, "grad_norm": 0.09424526989459991, "learning_rate": 1.207464517259506e-05, "loss": 8.7516, "step": 119850 }, { "epoch": 0.5985667557242378, "grad_norm": 0.09127725660800934, "learning_rate": 1.2073143257653508e-05, "loss": 8.762, "step": 119860 }, { "epoch": 0.5986166945491772, "grad_norm": 0.09290522336959839, "learning_rate": 1.2071641342711958e-05, "loss": 8.752, "step": 119870 }, { "epoch": 0.5986666333741167, "grad_norm": 0.09055527299642563, "learning_rate": 1.2070139427770407e-05, "loss": 8.7407, "step": 119880 }, { "epoch": 0.5987165721990562, "grad_norm": 0.09295734018087387, "learning_rate": 1.2068637512828857e-05, "loss": 8.7368, "step": 119890 }, { "epoch": 0.5987665110239956, "grad_norm": 0.09094783663749695, "learning_rate": 1.2067135597887307e-05, "loss": 8.7513, "step": 119900 }, { "epoch": 0.598816449848935, "grad_norm": 0.09828267991542816, "learning_rate": 1.2065633682945757e-05, "loss": 8.7433, "step": 119910 }, { "epoch": 0.5988663886738745, "grad_norm": 0.10480784624814987, "learning_rate": 1.2064131768004206e-05, "loss": 8.729, "step": 119920 }, { "epoch": 0.598916327498814, "grad_norm": 0.09408173710107803, "learning_rate": 1.2062629853062654e-05, "loss": 8.748, "step": 119930 }, { "epoch": 0.5989662663237534, "grad_norm": 0.09466306120157242, "learning_rate": 1.2061127938121104e-05, "loss": 8.739, "step": 119940 }, { "epoch": 0.5990162051486928, "grad_norm": 0.09082759916782379, "learning_rate": 1.2059626023179555e-05, "loss": 8.751, "step": 119950 }, { "epoch": 0.5990661439736323, "grad_norm": 0.09120157361030579, "learning_rate": 1.2058124108238005e-05, "loss": 8.7516, "step": 119960 }, { "epoch": 0.5991160827985718, "grad_norm": 0.08926384896039963, "learning_rate": 1.2056622193296453e-05, "loss": 8.7449, "step": 119970 }, { "epoch": 0.5991660216235112, "grad_norm": 0.09408469498157501, "learning_rate": 1.2055120278354902e-05, "loss": 8.7444, "step": 119980 }, { "epoch": 0.5992159604484506, "grad_norm": 0.09376782178878784, "learning_rate": 1.2053618363413352e-05, "loss": 8.7396, "step": 119990 }, { "epoch": 0.5992658992733901, "grad_norm": 0.0928616002202034, "learning_rate": 1.2052116448471802e-05, "loss": 8.7482, "step": 120000 }, { "epoch": 0.5993158380983296, "grad_norm": 0.09431707859039307, "learning_rate": 1.2050614533530252e-05, "loss": 8.7567, "step": 120010 }, { "epoch": 0.599365776923269, "grad_norm": 0.08612378686666489, "learning_rate": 1.20491126185887e-05, "loss": 8.7719, "step": 120020 }, { "epoch": 0.5994157157482084, "grad_norm": 0.08852338045835495, "learning_rate": 1.204761070364715e-05, "loss": 8.7456, "step": 120030 }, { "epoch": 0.5994656545731479, "grad_norm": 0.09709616750478745, "learning_rate": 1.20461087887056e-05, "loss": 8.7524, "step": 120040 }, { "epoch": 0.5995155933980874, "grad_norm": 0.08951878547668457, "learning_rate": 1.204460687376405e-05, "loss": 8.7406, "step": 120050 }, { "epoch": 0.5995655322230268, "grad_norm": 0.08870140463113785, "learning_rate": 1.20431049588225e-05, "loss": 8.746, "step": 120060 }, { "epoch": 0.5996154710479662, "grad_norm": 0.0954098254442215, "learning_rate": 1.2041603043880948e-05, "loss": 8.7421, "step": 120070 }, { "epoch": 0.5996654098729057, "grad_norm": 0.09251780807971954, "learning_rate": 1.2040101128939397e-05, "loss": 8.726, "step": 120080 }, { "epoch": 0.5997153486978452, "grad_norm": 0.09680180251598358, "learning_rate": 1.2038599213997847e-05, "loss": 8.7528, "step": 120090 }, { "epoch": 0.5997652875227846, "grad_norm": 0.08500494062900543, "learning_rate": 1.2037097299056297e-05, "loss": 8.7571, "step": 120100 }, { "epoch": 0.599815226347724, "grad_norm": 0.08889224380254745, "learning_rate": 1.2035595384114747e-05, "loss": 8.758, "step": 120110 }, { "epoch": 0.5998651651726635, "grad_norm": 0.09139248728752136, "learning_rate": 1.2034093469173198e-05, "loss": 8.7463, "step": 120120 }, { "epoch": 0.599915103997603, "grad_norm": 0.09338033199310303, "learning_rate": 1.2032591554231644e-05, "loss": 8.7608, "step": 120130 }, { "epoch": 0.5999650428225424, "grad_norm": 0.09576421976089478, "learning_rate": 1.2031089639290094e-05, "loss": 8.7344, "step": 120140 }, { "epoch": 0.6000149816474818, "grad_norm": 0.0958356186747551, "learning_rate": 1.2029587724348545e-05, "loss": 8.7332, "step": 120150 }, { "epoch": 0.6000649204724213, "grad_norm": 0.10234443098306656, "learning_rate": 1.2028085809406995e-05, "loss": 8.7325, "step": 120160 }, { "epoch": 0.6001148592973607, "grad_norm": 0.08857684582471848, "learning_rate": 1.2026583894465445e-05, "loss": 8.7573, "step": 120170 }, { "epoch": 0.6001647981223002, "grad_norm": 0.09901963919401169, "learning_rate": 1.2025081979523892e-05, "loss": 8.7471, "step": 120180 }, { "epoch": 0.6002147369472396, "grad_norm": 0.09748201072216034, "learning_rate": 1.2023580064582342e-05, "loss": 8.7285, "step": 120190 }, { "epoch": 0.6002646757721791, "grad_norm": 0.09091214835643768, "learning_rate": 1.2022078149640792e-05, "loss": 8.7214, "step": 120200 }, { "epoch": 0.6003146145971185, "grad_norm": 0.09007634967565536, "learning_rate": 1.2020576234699242e-05, "loss": 8.7256, "step": 120210 }, { "epoch": 0.600364553422058, "grad_norm": 0.0863836258649826, "learning_rate": 1.2019074319757693e-05, "loss": 8.7418, "step": 120220 }, { "epoch": 0.6004144922469974, "grad_norm": 0.09045583009719849, "learning_rate": 1.201757240481614e-05, "loss": 8.7637, "step": 120230 }, { "epoch": 0.6004644310719369, "grad_norm": 0.09015191346406937, "learning_rate": 1.201607048987459e-05, "loss": 8.7327, "step": 120240 }, { "epoch": 0.6005143698968763, "grad_norm": 0.09545261412858963, "learning_rate": 1.201456857493304e-05, "loss": 8.7351, "step": 120250 }, { "epoch": 0.6005643087218158, "grad_norm": 0.09164721518754959, "learning_rate": 1.201306665999149e-05, "loss": 8.7474, "step": 120260 }, { "epoch": 0.6006142475467552, "grad_norm": 0.09745185077190399, "learning_rate": 1.201156474504994e-05, "loss": 8.7403, "step": 120270 }, { "epoch": 0.6006641863716947, "grad_norm": 0.08840557932853699, "learning_rate": 1.2010062830108389e-05, "loss": 8.7636, "step": 120280 }, { "epoch": 0.6007141251966341, "grad_norm": 0.10116366297006607, "learning_rate": 1.2008560915166837e-05, "loss": 8.7441, "step": 120290 }, { "epoch": 0.6007640640215736, "grad_norm": 0.09359109401702881, "learning_rate": 1.2007059000225287e-05, "loss": 8.7407, "step": 120300 }, { "epoch": 0.600814002846513, "grad_norm": 0.09176816046237946, "learning_rate": 1.2005557085283737e-05, "loss": 8.7537, "step": 120310 }, { "epoch": 0.6008639416714525, "grad_norm": 0.09760214388370514, "learning_rate": 1.2004055170342188e-05, "loss": 8.7465, "step": 120320 }, { "epoch": 0.6009138804963919, "grad_norm": 0.09278165549039841, "learning_rate": 1.2002553255400636e-05, "loss": 8.737, "step": 120330 }, { "epoch": 0.6009638193213314, "grad_norm": 0.09701333940029144, "learning_rate": 1.2001051340459085e-05, "loss": 8.7367, "step": 120340 }, { "epoch": 0.6010137581462708, "grad_norm": 0.09638296812772751, "learning_rate": 1.1999549425517535e-05, "loss": 8.7463, "step": 120350 }, { "epoch": 0.6010636969712103, "grad_norm": 0.09861765056848526, "learning_rate": 1.1998047510575985e-05, "loss": 8.7523, "step": 120360 }, { "epoch": 0.6011136357961497, "grad_norm": 0.08915422856807709, "learning_rate": 1.1996545595634435e-05, "loss": 8.7447, "step": 120370 }, { "epoch": 0.6011635746210892, "grad_norm": 0.09036835283041, "learning_rate": 1.1995043680692884e-05, "loss": 8.7562, "step": 120380 }, { "epoch": 0.6012135134460286, "grad_norm": 0.08992304652929306, "learning_rate": 1.1993541765751332e-05, "loss": 8.7521, "step": 120390 }, { "epoch": 0.6012634522709681, "grad_norm": 0.0943126529455185, "learning_rate": 1.1992039850809782e-05, "loss": 8.7471, "step": 120400 }, { "epoch": 0.6013133910959075, "grad_norm": 0.09551472961902618, "learning_rate": 1.1990537935868232e-05, "loss": 8.7448, "step": 120410 }, { "epoch": 0.601363329920847, "grad_norm": 0.08826518803834915, "learning_rate": 1.1989036020926683e-05, "loss": 8.7382, "step": 120420 }, { "epoch": 0.6014132687457864, "grad_norm": 0.09639231860637665, "learning_rate": 1.1987534105985131e-05, "loss": 8.7435, "step": 120430 }, { "epoch": 0.6014632075707259, "grad_norm": 0.09125574678182602, "learning_rate": 1.1986032191043581e-05, "loss": 8.7593, "step": 120440 }, { "epoch": 0.6015131463956653, "grad_norm": 0.08899689465761185, "learning_rate": 1.198453027610203e-05, "loss": 8.761, "step": 120450 }, { "epoch": 0.6015630852206048, "grad_norm": 0.09570741653442383, "learning_rate": 1.198302836116048e-05, "loss": 8.7517, "step": 120460 }, { "epoch": 0.6016130240455442, "grad_norm": 0.09493447840213776, "learning_rate": 1.198152644621893e-05, "loss": 8.7354, "step": 120470 }, { "epoch": 0.6016629628704837, "grad_norm": 0.09143593907356262, "learning_rate": 1.1980024531277379e-05, "loss": 8.7383, "step": 120480 }, { "epoch": 0.6017129016954231, "grad_norm": 0.09237073361873627, "learning_rate": 1.1978522616335829e-05, "loss": 8.7553, "step": 120490 }, { "epoch": 0.6017628405203626, "grad_norm": 0.0897151529788971, "learning_rate": 1.1977020701394277e-05, "loss": 8.7465, "step": 120500 }, { "epoch": 0.601812779345302, "grad_norm": 0.08980879932641983, "learning_rate": 1.1975518786452727e-05, "loss": 8.7424, "step": 120510 }, { "epoch": 0.6018627181702415, "grad_norm": 0.09323468804359436, "learning_rate": 1.1974016871511178e-05, "loss": 8.7519, "step": 120520 }, { "epoch": 0.6019126569951809, "grad_norm": 0.092279814183712, "learning_rate": 1.1972514956569626e-05, "loss": 8.7398, "step": 120530 }, { "epoch": 0.6019625958201203, "grad_norm": 0.09498938918113708, "learning_rate": 1.1971013041628076e-05, "loss": 8.7409, "step": 120540 }, { "epoch": 0.6020125346450598, "grad_norm": 0.09677129238843918, "learning_rate": 1.1969511126686525e-05, "loss": 8.7443, "step": 120550 }, { "epoch": 0.6020624734699993, "grad_norm": 0.0944163128733635, "learning_rate": 1.1968009211744975e-05, "loss": 8.745, "step": 120560 }, { "epoch": 0.6021124122949387, "grad_norm": 0.08608414977788925, "learning_rate": 1.1966507296803425e-05, "loss": 8.7553, "step": 120570 }, { "epoch": 0.6021623511198781, "grad_norm": 0.094789057970047, "learning_rate": 1.1965005381861874e-05, "loss": 8.7447, "step": 120580 }, { "epoch": 0.6022122899448176, "grad_norm": 0.09222722053527832, "learning_rate": 1.1963503466920324e-05, "loss": 8.7381, "step": 120590 }, { "epoch": 0.6022622287697571, "grad_norm": 0.08701804280281067, "learning_rate": 1.1962001551978774e-05, "loss": 8.7559, "step": 120600 }, { "epoch": 0.6023121675946965, "grad_norm": 0.09150071442127228, "learning_rate": 1.1960499637037222e-05, "loss": 8.735, "step": 120610 }, { "epoch": 0.602362106419636, "grad_norm": 0.09063006192445755, "learning_rate": 1.1958997722095673e-05, "loss": 8.7467, "step": 120620 }, { "epoch": 0.6024120452445754, "grad_norm": 0.09188646078109741, "learning_rate": 1.1957495807154121e-05, "loss": 8.752, "step": 120630 }, { "epoch": 0.6024619840695149, "grad_norm": 0.09145389497280121, "learning_rate": 1.1955993892212571e-05, "loss": 8.767, "step": 120640 }, { "epoch": 0.6025119228944543, "grad_norm": 0.0874108150601387, "learning_rate": 1.1954491977271021e-05, "loss": 8.7427, "step": 120650 }, { "epoch": 0.6025618617193937, "grad_norm": 0.09589160233736038, "learning_rate": 1.195299006232947e-05, "loss": 8.7374, "step": 120660 }, { "epoch": 0.6026118005443332, "grad_norm": 0.0927971825003624, "learning_rate": 1.195148814738792e-05, "loss": 8.7538, "step": 120670 }, { "epoch": 0.6026617393692726, "grad_norm": 0.09196257591247559, "learning_rate": 1.1949986232446369e-05, "loss": 8.7561, "step": 120680 }, { "epoch": 0.6027116781942121, "grad_norm": 0.0907442569732666, "learning_rate": 1.1948484317504819e-05, "loss": 8.7335, "step": 120690 }, { "epoch": 0.6027616170191515, "grad_norm": 0.09081535786390305, "learning_rate": 1.1946982402563269e-05, "loss": 8.7354, "step": 120700 }, { "epoch": 0.602811555844091, "grad_norm": 0.0993124321103096, "learning_rate": 1.1945480487621717e-05, "loss": 8.7478, "step": 120710 }, { "epoch": 0.6028614946690304, "grad_norm": 0.09617297351360321, "learning_rate": 1.1943978572680168e-05, "loss": 8.7466, "step": 120720 }, { "epoch": 0.6029114334939699, "grad_norm": 0.09750398993492126, "learning_rate": 1.1942476657738616e-05, "loss": 8.7461, "step": 120730 }, { "epoch": 0.6029613723189093, "grad_norm": 0.09524727612733841, "learning_rate": 1.1940974742797066e-05, "loss": 8.7452, "step": 120740 }, { "epoch": 0.6030113111438488, "grad_norm": 0.08593402057886124, "learning_rate": 1.1939472827855516e-05, "loss": 8.7495, "step": 120750 }, { "epoch": 0.6030612499687882, "grad_norm": 0.0903341993689537, "learning_rate": 1.1937970912913967e-05, "loss": 8.7438, "step": 120760 }, { "epoch": 0.6031111887937277, "grad_norm": 0.09396269917488098, "learning_rate": 1.1936468997972415e-05, "loss": 8.7225, "step": 120770 }, { "epoch": 0.6031611276186671, "grad_norm": 0.09335647523403168, "learning_rate": 1.1934967083030864e-05, "loss": 8.7488, "step": 120780 }, { "epoch": 0.6032110664436066, "grad_norm": 0.09076468646526337, "learning_rate": 1.1933465168089314e-05, "loss": 8.7418, "step": 120790 }, { "epoch": 0.603261005268546, "grad_norm": 0.09674550592899323, "learning_rate": 1.1931963253147764e-05, "loss": 8.7379, "step": 120800 }, { "epoch": 0.6033109440934855, "grad_norm": 0.09369368851184845, "learning_rate": 1.1930461338206214e-05, "loss": 8.7453, "step": 120810 }, { "epoch": 0.6033608829184249, "grad_norm": 0.09193939715623856, "learning_rate": 1.1928959423264663e-05, "loss": 8.7374, "step": 120820 }, { "epoch": 0.6034108217433644, "grad_norm": 0.08715419471263885, "learning_rate": 1.1927457508323111e-05, "loss": 8.7427, "step": 120830 }, { "epoch": 0.6034607605683038, "grad_norm": 0.0879080593585968, "learning_rate": 1.1925955593381561e-05, "loss": 8.7488, "step": 120840 }, { "epoch": 0.6035106993932433, "grad_norm": 0.09147755801677704, "learning_rate": 1.1924453678440011e-05, "loss": 8.7333, "step": 120850 }, { "epoch": 0.6035606382181827, "grad_norm": 0.09098514169454575, "learning_rate": 1.1922951763498462e-05, "loss": 8.7172, "step": 120860 }, { "epoch": 0.6036105770431222, "grad_norm": 0.08796340972185135, "learning_rate": 1.192144984855691e-05, "loss": 8.7379, "step": 120870 }, { "epoch": 0.6036605158680616, "grad_norm": 0.09227326512336731, "learning_rate": 1.1919947933615359e-05, "loss": 8.739, "step": 120880 }, { "epoch": 0.6037104546930011, "grad_norm": 0.09190311282873154, "learning_rate": 1.1918446018673809e-05, "loss": 8.7631, "step": 120890 }, { "epoch": 0.6037603935179405, "grad_norm": 0.09431488811969757, "learning_rate": 1.1916944103732259e-05, "loss": 8.7499, "step": 120900 }, { "epoch": 0.60381033234288, "grad_norm": 0.09246110171079636, "learning_rate": 1.1915442188790709e-05, "loss": 8.7489, "step": 120910 }, { "epoch": 0.6038602711678194, "grad_norm": 0.093075692653656, "learning_rate": 1.191394027384916e-05, "loss": 8.7325, "step": 120920 }, { "epoch": 0.6039102099927589, "grad_norm": 0.09103310108184814, "learning_rate": 1.1912438358907606e-05, "loss": 8.7337, "step": 120930 }, { "epoch": 0.6039601488176983, "grad_norm": 0.1019456684589386, "learning_rate": 1.1910936443966056e-05, "loss": 8.7356, "step": 120940 }, { "epoch": 0.6040100876426377, "grad_norm": 0.08999264985322952, "learning_rate": 1.1909434529024506e-05, "loss": 8.7455, "step": 120950 }, { "epoch": 0.6040600264675772, "grad_norm": 0.09247510880231857, "learning_rate": 1.1907932614082957e-05, "loss": 8.7358, "step": 120960 }, { "epoch": 0.6041099652925167, "grad_norm": 0.0908162072300911, "learning_rate": 1.1906430699141407e-05, "loss": 8.7354, "step": 120970 }, { "epoch": 0.6041599041174561, "grad_norm": 0.09764672070741653, "learning_rate": 1.1904928784199855e-05, "loss": 8.7378, "step": 120980 }, { "epoch": 0.6042098429423955, "grad_norm": 0.09343189001083374, "learning_rate": 1.1903426869258304e-05, "loss": 8.7326, "step": 120990 }, { "epoch": 0.604259781767335, "grad_norm": 0.0914471447467804, "learning_rate": 1.1901924954316754e-05, "loss": 8.7343, "step": 121000 }, { "epoch": 0.6043097205922745, "grad_norm": 0.09239111840724945, "learning_rate": 1.1900423039375204e-05, "loss": 8.7356, "step": 121010 }, { "epoch": 0.6043596594172139, "grad_norm": 0.09467372298240662, "learning_rate": 1.1898921124433654e-05, "loss": 8.7418, "step": 121020 }, { "epoch": 0.6044095982421533, "grad_norm": 0.09846384823322296, "learning_rate": 1.1897419209492103e-05, "loss": 8.7513, "step": 121030 }, { "epoch": 0.6044595370670928, "grad_norm": 0.09008269757032394, "learning_rate": 1.1895917294550551e-05, "loss": 8.7377, "step": 121040 }, { "epoch": 0.6045094758920323, "grad_norm": 0.0888042226433754, "learning_rate": 1.1894415379609001e-05, "loss": 8.7362, "step": 121050 }, { "epoch": 0.6045594147169717, "grad_norm": 0.09224848449230194, "learning_rate": 1.1892913464667452e-05, "loss": 8.768, "step": 121060 }, { "epoch": 0.6046093535419111, "grad_norm": 0.0956854522228241, "learning_rate": 1.1891411549725902e-05, "loss": 8.7352, "step": 121070 }, { "epoch": 0.6046592923668506, "grad_norm": 0.09198862314224243, "learning_rate": 1.188990963478435e-05, "loss": 8.7352, "step": 121080 }, { "epoch": 0.6047092311917901, "grad_norm": 0.09782185405492783, "learning_rate": 1.1888407719842799e-05, "loss": 8.7306, "step": 121090 }, { "epoch": 0.6047591700167295, "grad_norm": 0.0954626128077507, "learning_rate": 1.1886905804901249e-05, "loss": 8.7577, "step": 121100 }, { "epoch": 0.6048091088416689, "grad_norm": 0.0895036831498146, "learning_rate": 1.1885403889959699e-05, "loss": 8.7356, "step": 121110 }, { "epoch": 0.6048590476666084, "grad_norm": 0.09198148548603058, "learning_rate": 1.188390197501815e-05, "loss": 8.743, "step": 121120 }, { "epoch": 0.6049089864915479, "grad_norm": 0.0872054398059845, "learning_rate": 1.18824000600766e-05, "loss": 8.7409, "step": 121130 }, { "epoch": 0.6049589253164873, "grad_norm": 0.0946134701371193, "learning_rate": 1.1880898145135046e-05, "loss": 8.7335, "step": 121140 }, { "epoch": 0.6050088641414267, "grad_norm": 0.08696244657039642, "learning_rate": 1.1879396230193496e-05, "loss": 8.7231, "step": 121150 }, { "epoch": 0.6050588029663662, "grad_norm": 0.09812266379594803, "learning_rate": 1.1877894315251947e-05, "loss": 8.7275, "step": 121160 }, { "epoch": 0.6051087417913057, "grad_norm": 0.09273399412631989, "learning_rate": 1.1876392400310397e-05, "loss": 8.738, "step": 121170 }, { "epoch": 0.6051586806162451, "grad_norm": 0.09106171131134033, "learning_rate": 1.1874890485368847e-05, "loss": 8.7407, "step": 121180 }, { "epoch": 0.6052086194411845, "grad_norm": 0.08590860664844513, "learning_rate": 1.1873388570427294e-05, "loss": 8.743, "step": 121190 }, { "epoch": 0.605258558266124, "grad_norm": 0.09189659357070923, "learning_rate": 1.1871886655485744e-05, "loss": 8.7424, "step": 121200 }, { "epoch": 0.6053084970910635, "grad_norm": 0.10102283954620361, "learning_rate": 1.1870384740544194e-05, "loss": 8.7519, "step": 121210 }, { "epoch": 0.6053584359160029, "grad_norm": 0.09137275069952011, "learning_rate": 1.1868882825602644e-05, "loss": 8.7475, "step": 121220 }, { "epoch": 0.6054083747409423, "grad_norm": 0.09499847143888474, "learning_rate": 1.1867380910661094e-05, "loss": 8.7518, "step": 121230 }, { "epoch": 0.6054583135658818, "grad_norm": 0.09718917310237885, "learning_rate": 1.1865878995719543e-05, "loss": 8.7258, "step": 121240 }, { "epoch": 0.6055082523908213, "grad_norm": 0.08984164148569107, "learning_rate": 1.1864377080777991e-05, "loss": 8.751, "step": 121250 }, { "epoch": 0.6055581912157607, "grad_norm": 0.09550242125988007, "learning_rate": 1.1862875165836442e-05, "loss": 8.7504, "step": 121260 }, { "epoch": 0.6056081300407001, "grad_norm": 0.09198052436113358, "learning_rate": 1.1861373250894892e-05, "loss": 8.7366, "step": 121270 }, { "epoch": 0.6056580688656396, "grad_norm": 0.09440373629331589, "learning_rate": 1.1859871335953342e-05, "loss": 8.7489, "step": 121280 }, { "epoch": 0.6057080076905791, "grad_norm": 0.09069590270519257, "learning_rate": 1.185836942101179e-05, "loss": 8.7381, "step": 121290 }, { "epoch": 0.6057579465155185, "grad_norm": 0.09249866008758545, "learning_rate": 1.1856867506070239e-05, "loss": 8.7219, "step": 121300 }, { "epoch": 0.6058078853404579, "grad_norm": 0.09518305212259293, "learning_rate": 1.1855365591128689e-05, "loss": 8.743, "step": 121310 }, { "epoch": 0.6058578241653974, "grad_norm": 0.09274056553840637, "learning_rate": 1.185386367618714e-05, "loss": 8.7459, "step": 121320 }, { "epoch": 0.6059077629903369, "grad_norm": 0.09070045500993729, "learning_rate": 1.185236176124559e-05, "loss": 8.7359, "step": 121330 }, { "epoch": 0.6059577018152763, "grad_norm": 0.09082917869091034, "learning_rate": 1.1850859846304038e-05, "loss": 8.742, "step": 121340 }, { "epoch": 0.6060076406402157, "grad_norm": 0.09195704758167267, "learning_rate": 1.1849357931362486e-05, "loss": 8.7441, "step": 121350 }, { "epoch": 0.6060575794651551, "grad_norm": 0.0921049565076828, "learning_rate": 1.1847856016420937e-05, "loss": 8.73, "step": 121360 }, { "epoch": 0.6061075182900947, "grad_norm": 0.09024003148078918, "learning_rate": 1.1846354101479387e-05, "loss": 8.7474, "step": 121370 }, { "epoch": 0.6061574571150341, "grad_norm": 0.09810420870780945, "learning_rate": 1.1844852186537837e-05, "loss": 8.7297, "step": 121380 }, { "epoch": 0.6062073959399735, "grad_norm": 0.08957557380199432, "learning_rate": 1.1843350271596285e-05, "loss": 8.7334, "step": 121390 }, { "epoch": 0.606257334764913, "grad_norm": 0.095646932721138, "learning_rate": 1.1841848356654736e-05, "loss": 8.7491, "step": 121400 }, { "epoch": 0.6063072735898525, "grad_norm": 0.09269688278436661, "learning_rate": 1.1840346441713184e-05, "loss": 8.7294, "step": 121410 }, { "epoch": 0.6063572124147919, "grad_norm": 0.09186460822820663, "learning_rate": 1.1838844526771634e-05, "loss": 8.7326, "step": 121420 }, { "epoch": 0.6064071512397313, "grad_norm": 0.09095993638038635, "learning_rate": 1.1837342611830084e-05, "loss": 8.7583, "step": 121430 }, { "epoch": 0.6064570900646707, "grad_norm": 0.0951274111866951, "learning_rate": 1.1835840696888533e-05, "loss": 8.7412, "step": 121440 }, { "epoch": 0.6065070288896103, "grad_norm": 0.09069649875164032, "learning_rate": 1.1834338781946983e-05, "loss": 8.7235, "step": 121450 }, { "epoch": 0.6065569677145497, "grad_norm": 0.09192630648612976, "learning_rate": 1.1832836867005432e-05, "loss": 8.7422, "step": 121460 }, { "epoch": 0.6066069065394891, "grad_norm": 0.09435165673494339, "learning_rate": 1.1831334952063882e-05, "loss": 8.7295, "step": 121470 }, { "epoch": 0.6066568453644285, "grad_norm": 0.09608633071184158, "learning_rate": 1.1829833037122332e-05, "loss": 8.7352, "step": 121480 }, { "epoch": 0.6067067841893681, "grad_norm": 0.09027303010225296, "learning_rate": 1.182833112218078e-05, "loss": 8.7404, "step": 121490 }, { "epoch": 0.6067567230143075, "grad_norm": 0.08934234827756882, "learning_rate": 1.182682920723923e-05, "loss": 8.7349, "step": 121500 }, { "epoch": 0.6068066618392469, "grad_norm": 0.09478524327278137, "learning_rate": 1.1825327292297679e-05, "loss": 8.754, "step": 121510 }, { "epoch": 0.6068566006641863, "grad_norm": 0.09414006769657135, "learning_rate": 1.182382537735613e-05, "loss": 8.7425, "step": 121520 }, { "epoch": 0.6069065394891259, "grad_norm": 0.09682093560695648, "learning_rate": 1.182232346241458e-05, "loss": 8.7536, "step": 121530 }, { "epoch": 0.6069564783140653, "grad_norm": 0.09124701470136642, "learning_rate": 1.1820821547473028e-05, "loss": 8.7449, "step": 121540 }, { "epoch": 0.6070064171390047, "grad_norm": 0.08872292190790176, "learning_rate": 1.1819319632531478e-05, "loss": 8.7353, "step": 121550 }, { "epoch": 0.6070563559639441, "grad_norm": 0.09001273661851883, "learning_rate": 1.1817817717589928e-05, "loss": 8.7428, "step": 121560 }, { "epoch": 0.6071062947888837, "grad_norm": 0.08708401024341583, "learning_rate": 1.1816315802648377e-05, "loss": 8.7396, "step": 121570 }, { "epoch": 0.6071562336138231, "grad_norm": 0.09001478552818298, "learning_rate": 1.1814813887706827e-05, "loss": 8.7425, "step": 121580 }, { "epoch": 0.6072061724387625, "grad_norm": 0.0907810628414154, "learning_rate": 1.1813311972765275e-05, "loss": 8.7552, "step": 121590 }, { "epoch": 0.6072561112637019, "grad_norm": 0.08939887583255768, "learning_rate": 1.1811810057823726e-05, "loss": 8.7563, "step": 121600 }, { "epoch": 0.6073060500886415, "grad_norm": 0.09514924883842468, "learning_rate": 1.1810308142882176e-05, "loss": 8.7546, "step": 121610 }, { "epoch": 0.6073559889135809, "grad_norm": 0.09000418335199356, "learning_rate": 1.1808806227940624e-05, "loss": 8.7427, "step": 121620 }, { "epoch": 0.6074059277385203, "grad_norm": 0.09303897619247437, "learning_rate": 1.1807304312999075e-05, "loss": 8.7311, "step": 121630 }, { "epoch": 0.6074558665634597, "grad_norm": 0.09306817501783371, "learning_rate": 1.1805802398057523e-05, "loss": 8.7395, "step": 121640 }, { "epoch": 0.6075058053883992, "grad_norm": 0.08681345731019974, "learning_rate": 1.1804300483115973e-05, "loss": 8.7435, "step": 121650 }, { "epoch": 0.6075557442133387, "grad_norm": 0.09274129569530487, "learning_rate": 1.1802798568174423e-05, "loss": 8.7352, "step": 121660 }, { "epoch": 0.6076056830382781, "grad_norm": 0.090678870677948, "learning_rate": 1.1801296653232872e-05, "loss": 8.7323, "step": 121670 }, { "epoch": 0.6076556218632175, "grad_norm": 0.09506350755691528, "learning_rate": 1.1799794738291322e-05, "loss": 8.7349, "step": 121680 }, { "epoch": 0.607705560688157, "grad_norm": 0.09269612282514572, "learning_rate": 1.179829282334977e-05, "loss": 8.7393, "step": 121690 }, { "epoch": 0.6077554995130965, "grad_norm": 0.09288372099399567, "learning_rate": 1.179679090840822e-05, "loss": 8.7256, "step": 121700 }, { "epoch": 0.6078054383380359, "grad_norm": 0.09377559274435043, "learning_rate": 1.179528899346667e-05, "loss": 8.7359, "step": 121710 }, { "epoch": 0.6078553771629753, "grad_norm": 0.088566854596138, "learning_rate": 1.1793787078525121e-05, "loss": 8.7409, "step": 121720 }, { "epoch": 0.6079053159879148, "grad_norm": 0.09255025535821915, "learning_rate": 1.179228516358357e-05, "loss": 8.7415, "step": 121730 }, { "epoch": 0.6079552548128543, "grad_norm": 0.09494113177061081, "learning_rate": 1.1790783248642018e-05, "loss": 8.7397, "step": 121740 }, { "epoch": 0.6080051936377937, "grad_norm": 0.08850625157356262, "learning_rate": 1.1789281333700468e-05, "loss": 8.7394, "step": 121750 }, { "epoch": 0.6080551324627331, "grad_norm": 0.09062092751264572, "learning_rate": 1.1787779418758918e-05, "loss": 8.7512, "step": 121760 }, { "epoch": 0.6081050712876725, "grad_norm": 0.0890820100903511, "learning_rate": 1.1786277503817369e-05, "loss": 8.7326, "step": 121770 }, { "epoch": 0.6081550101126121, "grad_norm": 0.09788678586483002, "learning_rate": 1.1784775588875817e-05, "loss": 8.7336, "step": 121780 }, { "epoch": 0.6082049489375515, "grad_norm": 0.09738252311944962, "learning_rate": 1.1783273673934265e-05, "loss": 8.7479, "step": 121790 }, { "epoch": 0.6082548877624909, "grad_norm": 0.09170432388782501, "learning_rate": 1.1781771758992716e-05, "loss": 8.7248, "step": 121800 }, { "epoch": 0.6083048265874303, "grad_norm": 0.0888880267739296, "learning_rate": 1.1780269844051166e-05, "loss": 8.7402, "step": 121810 }, { "epoch": 0.6083547654123699, "grad_norm": 0.09058916568756104, "learning_rate": 1.1778767929109616e-05, "loss": 8.7334, "step": 121820 }, { "epoch": 0.6084047042373093, "grad_norm": 0.09217052161693573, "learning_rate": 1.1777266014168065e-05, "loss": 8.7499, "step": 121830 }, { "epoch": 0.6084546430622487, "grad_norm": 0.09062588959932327, "learning_rate": 1.1775764099226513e-05, "loss": 8.7287, "step": 121840 }, { "epoch": 0.6085045818871881, "grad_norm": 0.09828715771436691, "learning_rate": 1.1774262184284963e-05, "loss": 8.7276, "step": 121850 }, { "epoch": 0.6085545207121277, "grad_norm": 0.08966988325119019, "learning_rate": 1.1772760269343413e-05, "loss": 8.7422, "step": 121860 }, { "epoch": 0.6086044595370671, "grad_norm": 0.08841178566217422, "learning_rate": 1.1771258354401864e-05, "loss": 8.748, "step": 121870 }, { "epoch": 0.6086543983620065, "grad_norm": 0.08782627433538437, "learning_rate": 1.1769756439460314e-05, "loss": 8.7351, "step": 121880 }, { "epoch": 0.6087043371869459, "grad_norm": 0.08963312953710556, "learning_rate": 1.176825452451876e-05, "loss": 8.7342, "step": 121890 }, { "epoch": 0.6087542760118855, "grad_norm": 0.08687534928321838, "learning_rate": 1.176675260957721e-05, "loss": 8.7416, "step": 121900 }, { "epoch": 0.6088042148368249, "grad_norm": 0.08947011828422546, "learning_rate": 1.1765250694635661e-05, "loss": 8.7506, "step": 121910 }, { "epoch": 0.6088541536617643, "grad_norm": 0.09872882813215256, "learning_rate": 1.1763748779694111e-05, "loss": 8.7542, "step": 121920 }, { "epoch": 0.6089040924867037, "grad_norm": 0.09389559924602509, "learning_rate": 1.1762246864752561e-05, "loss": 8.7213, "step": 121930 }, { "epoch": 0.6089540313116433, "grad_norm": 0.09409285336732864, "learning_rate": 1.1760744949811008e-05, "loss": 8.7131, "step": 121940 }, { "epoch": 0.6090039701365827, "grad_norm": 0.08693234622478485, "learning_rate": 1.1759243034869458e-05, "loss": 8.7416, "step": 121950 }, { "epoch": 0.6090539089615221, "grad_norm": 0.09217488020658493, "learning_rate": 1.1757741119927908e-05, "loss": 8.7226, "step": 121960 }, { "epoch": 0.6091038477864615, "grad_norm": 0.09177994728088379, "learning_rate": 1.1756239204986359e-05, "loss": 8.7294, "step": 121970 }, { "epoch": 0.6091537866114011, "grad_norm": 0.09113775938749313, "learning_rate": 1.1754737290044809e-05, "loss": 8.7382, "step": 121980 }, { "epoch": 0.6092037254363405, "grad_norm": 0.09432590007781982, "learning_rate": 1.1753235375103256e-05, "loss": 8.7495, "step": 121990 }, { "epoch": 0.6092536642612799, "grad_norm": 0.09742003679275513, "learning_rate": 1.1751733460161706e-05, "loss": 8.7237, "step": 122000 }, { "epoch": 0.6093036030862193, "grad_norm": 0.09325713664293289, "learning_rate": 1.1750231545220156e-05, "loss": 8.7452, "step": 122010 }, { "epoch": 0.6093535419111589, "grad_norm": 0.09275127202272415, "learning_rate": 1.1748729630278606e-05, "loss": 8.7461, "step": 122020 }, { "epoch": 0.6094034807360983, "grad_norm": 0.09381189197301865, "learning_rate": 1.1747227715337056e-05, "loss": 8.7323, "step": 122030 }, { "epoch": 0.6094534195610377, "grad_norm": 0.09303104132413864, "learning_rate": 1.1745725800395505e-05, "loss": 8.7343, "step": 122040 }, { "epoch": 0.6095033583859771, "grad_norm": 0.09670752286911011, "learning_rate": 1.1744223885453953e-05, "loss": 8.7394, "step": 122050 }, { "epoch": 0.6095532972109167, "grad_norm": 0.09118695557117462, "learning_rate": 1.1742721970512403e-05, "loss": 8.7528, "step": 122060 }, { "epoch": 0.6096032360358561, "grad_norm": 0.09087878465652466, "learning_rate": 1.1741220055570854e-05, "loss": 8.7145, "step": 122070 }, { "epoch": 0.6096531748607955, "grad_norm": 0.0934610515832901, "learning_rate": 1.1739718140629304e-05, "loss": 8.7274, "step": 122080 }, { "epoch": 0.6097031136857349, "grad_norm": 0.0913190245628357, "learning_rate": 1.1738216225687752e-05, "loss": 8.7205, "step": 122090 }, { "epoch": 0.6097530525106745, "grad_norm": 0.09773539006710052, "learning_rate": 1.17367143107462e-05, "loss": 8.738, "step": 122100 }, { "epoch": 0.6098029913356139, "grad_norm": 0.09631845355033875, "learning_rate": 1.1735212395804651e-05, "loss": 8.7265, "step": 122110 }, { "epoch": 0.6098529301605533, "grad_norm": 0.08930249512195587, "learning_rate": 1.1733710480863101e-05, "loss": 8.7458, "step": 122120 }, { "epoch": 0.6099028689854927, "grad_norm": 0.08900859951972961, "learning_rate": 1.1732208565921551e-05, "loss": 8.7685, "step": 122130 }, { "epoch": 0.6099528078104323, "grad_norm": 0.0874401181936264, "learning_rate": 1.173070665098e-05, "loss": 8.739, "step": 122140 }, { "epoch": 0.6100027466353717, "grad_norm": 0.10340723395347595, "learning_rate": 1.1729204736038448e-05, "loss": 8.7406, "step": 122150 }, { "epoch": 0.6100526854603111, "grad_norm": 0.09558750689029694, "learning_rate": 1.1727702821096898e-05, "loss": 8.7341, "step": 122160 }, { "epoch": 0.6101026242852505, "grad_norm": 0.09469115734100342, "learning_rate": 1.1726200906155349e-05, "loss": 8.7295, "step": 122170 }, { "epoch": 0.6101525631101901, "grad_norm": 0.09226387739181519, "learning_rate": 1.1724698991213799e-05, "loss": 8.7433, "step": 122180 }, { "epoch": 0.6102025019351295, "grad_norm": 0.09578849375247955, "learning_rate": 1.1723197076272247e-05, "loss": 8.7148, "step": 122190 }, { "epoch": 0.6102524407600689, "grad_norm": 0.08702340722084045, "learning_rate": 1.1721695161330697e-05, "loss": 8.7389, "step": 122200 }, { "epoch": 0.6103023795850083, "grad_norm": 0.0902688279747963, "learning_rate": 1.1720193246389146e-05, "loss": 8.7441, "step": 122210 }, { "epoch": 0.6103523184099479, "grad_norm": 0.08830910176038742, "learning_rate": 1.1718691331447596e-05, "loss": 8.7218, "step": 122220 }, { "epoch": 0.6104022572348873, "grad_norm": 0.09016358107328415, "learning_rate": 1.1717189416506046e-05, "loss": 8.7398, "step": 122230 }, { "epoch": 0.6104521960598267, "grad_norm": 0.09306544810533524, "learning_rate": 1.1715687501564495e-05, "loss": 8.7381, "step": 122240 }, { "epoch": 0.6105021348847661, "grad_norm": 0.09133855253458023, "learning_rate": 1.1714185586622945e-05, "loss": 8.7466, "step": 122250 }, { "epoch": 0.6105520737097057, "grad_norm": 0.09112872183322906, "learning_rate": 1.1712683671681393e-05, "loss": 8.7304, "step": 122260 }, { "epoch": 0.6106020125346451, "grad_norm": 0.08973613381385803, "learning_rate": 1.1711181756739844e-05, "loss": 8.7265, "step": 122270 }, { "epoch": 0.6106519513595845, "grad_norm": 0.08785367757081985, "learning_rate": 1.1709679841798294e-05, "loss": 8.7227, "step": 122280 }, { "epoch": 0.6107018901845239, "grad_norm": 0.09560448676347733, "learning_rate": 1.1708177926856742e-05, "loss": 8.7347, "step": 122290 }, { "epoch": 0.6107518290094635, "grad_norm": 0.09626337140798569, "learning_rate": 1.1706676011915192e-05, "loss": 8.7301, "step": 122300 }, { "epoch": 0.6108017678344029, "grad_norm": 0.08685650676488876, "learning_rate": 1.1705174096973641e-05, "loss": 8.7399, "step": 122310 }, { "epoch": 0.6108517066593423, "grad_norm": 0.09066285192966461, "learning_rate": 1.1703672182032091e-05, "loss": 8.7531, "step": 122320 }, { "epoch": 0.6109016454842817, "grad_norm": 0.09555506706237793, "learning_rate": 1.1702170267090541e-05, "loss": 8.7348, "step": 122330 }, { "epoch": 0.6109515843092213, "grad_norm": 0.09356008470058441, "learning_rate": 1.170066835214899e-05, "loss": 8.7236, "step": 122340 }, { "epoch": 0.6110015231341607, "grad_norm": 0.0910077691078186, "learning_rate": 1.169916643720744e-05, "loss": 8.7322, "step": 122350 }, { "epoch": 0.6110514619591001, "grad_norm": 0.09046470373868942, "learning_rate": 1.169766452226589e-05, "loss": 8.7302, "step": 122360 }, { "epoch": 0.6111014007840395, "grad_norm": 0.09235422313213348, "learning_rate": 1.1696162607324339e-05, "loss": 8.7346, "step": 122370 }, { "epoch": 0.611151339608979, "grad_norm": 0.09250746667385101, "learning_rate": 1.1694660692382789e-05, "loss": 8.747, "step": 122380 }, { "epoch": 0.6112012784339185, "grad_norm": 0.09652043879032135, "learning_rate": 1.1693158777441237e-05, "loss": 8.7317, "step": 122390 }, { "epoch": 0.6112512172588579, "grad_norm": 0.0955178514122963, "learning_rate": 1.1691656862499687e-05, "loss": 8.7403, "step": 122400 }, { "epoch": 0.6113011560837973, "grad_norm": 0.08899587392807007, "learning_rate": 1.1690154947558138e-05, "loss": 8.7483, "step": 122410 }, { "epoch": 0.6113510949087368, "grad_norm": 0.08871007710695267, "learning_rate": 1.1688653032616586e-05, "loss": 8.7446, "step": 122420 }, { "epoch": 0.6114010337336763, "grad_norm": 0.09327740967273712, "learning_rate": 1.1687151117675036e-05, "loss": 8.7326, "step": 122430 }, { "epoch": 0.6114509725586157, "grad_norm": 0.09347925335168839, "learning_rate": 1.1685649202733485e-05, "loss": 8.7262, "step": 122440 }, { "epoch": 0.6115009113835551, "grad_norm": 0.08919844776391983, "learning_rate": 1.1684147287791935e-05, "loss": 8.7354, "step": 122450 }, { "epoch": 0.6115508502084946, "grad_norm": 0.09506843984127045, "learning_rate": 1.1682645372850385e-05, "loss": 8.7218, "step": 122460 }, { "epoch": 0.6116007890334341, "grad_norm": 0.09201342612504959, "learning_rate": 1.1681143457908834e-05, "loss": 8.7338, "step": 122470 }, { "epoch": 0.6116507278583735, "grad_norm": 0.09908469021320343, "learning_rate": 1.1679641542967284e-05, "loss": 8.7404, "step": 122480 }, { "epoch": 0.6117006666833129, "grad_norm": 0.09065460413694382, "learning_rate": 1.1678139628025732e-05, "loss": 8.7288, "step": 122490 }, { "epoch": 0.6117506055082524, "grad_norm": 0.09107660502195358, "learning_rate": 1.1676637713084182e-05, "loss": 8.7291, "step": 122500 }, { "epoch": 0.6118005443331919, "grad_norm": 0.0926177054643631, "learning_rate": 1.1675135798142633e-05, "loss": 8.734, "step": 122510 }, { "epoch": 0.6118504831581313, "grad_norm": 0.09085045754909515, "learning_rate": 1.1673633883201083e-05, "loss": 8.7309, "step": 122520 }, { "epoch": 0.6119004219830707, "grad_norm": 0.09174468368291855, "learning_rate": 1.1672131968259531e-05, "loss": 8.7436, "step": 122530 }, { "epoch": 0.6119503608080102, "grad_norm": 0.09456495195627213, "learning_rate": 1.167063005331798e-05, "loss": 8.7272, "step": 122540 }, { "epoch": 0.6120002996329497, "grad_norm": 0.09345948696136475, "learning_rate": 1.166912813837643e-05, "loss": 8.7226, "step": 122550 }, { "epoch": 0.6120502384578891, "grad_norm": 0.0974060595035553, "learning_rate": 1.166762622343488e-05, "loss": 8.7425, "step": 122560 }, { "epoch": 0.6121001772828285, "grad_norm": 0.09621378034353256, "learning_rate": 1.166612430849333e-05, "loss": 8.7256, "step": 122570 }, { "epoch": 0.612150116107768, "grad_norm": 0.09098070859909058, "learning_rate": 1.1664622393551779e-05, "loss": 8.7195, "step": 122580 }, { "epoch": 0.6122000549327075, "grad_norm": 0.08891347795724869, "learning_rate": 1.1663120478610227e-05, "loss": 8.739, "step": 122590 }, { "epoch": 0.6122499937576469, "grad_norm": 0.09389282763004303, "learning_rate": 1.1661618563668677e-05, "loss": 8.717, "step": 122600 }, { "epoch": 0.6122999325825863, "grad_norm": 0.0891786739230156, "learning_rate": 1.1660116648727128e-05, "loss": 8.7401, "step": 122610 }, { "epoch": 0.6123498714075257, "grad_norm": 0.09159950911998749, "learning_rate": 1.1658614733785578e-05, "loss": 8.7326, "step": 122620 }, { "epoch": 0.6123998102324653, "grad_norm": 0.0896199643611908, "learning_rate": 1.1657112818844026e-05, "loss": 8.7444, "step": 122630 }, { "epoch": 0.6124497490574047, "grad_norm": 0.0880565494298935, "learning_rate": 1.1655610903902475e-05, "loss": 8.7348, "step": 122640 }, { "epoch": 0.6124996878823441, "grad_norm": 0.08740682154893875, "learning_rate": 1.1654108988960925e-05, "loss": 8.7177, "step": 122650 }, { "epoch": 0.6125496267072835, "grad_norm": 0.09321615844964981, "learning_rate": 1.1652607074019375e-05, "loss": 8.7422, "step": 122660 }, { "epoch": 0.6125995655322231, "grad_norm": 0.09611321985721588, "learning_rate": 1.1651105159077825e-05, "loss": 8.7159, "step": 122670 }, { "epoch": 0.6126495043571625, "grad_norm": 0.08695535361766815, "learning_rate": 1.1649603244136275e-05, "loss": 8.7261, "step": 122680 }, { "epoch": 0.6126994431821019, "grad_norm": 0.09775250405073166, "learning_rate": 1.1648101329194722e-05, "loss": 8.7415, "step": 122690 }, { "epoch": 0.6127493820070413, "grad_norm": 0.094340018928051, "learning_rate": 1.1646599414253172e-05, "loss": 8.7466, "step": 122700 }, { "epoch": 0.6127993208319809, "grad_norm": 0.09047188609838486, "learning_rate": 1.1645097499311623e-05, "loss": 8.7268, "step": 122710 }, { "epoch": 0.6128492596569203, "grad_norm": 0.0914597436785698, "learning_rate": 1.1643595584370073e-05, "loss": 8.7365, "step": 122720 }, { "epoch": 0.6128991984818597, "grad_norm": 0.08962114155292511, "learning_rate": 1.1642093669428523e-05, "loss": 8.731, "step": 122730 }, { "epoch": 0.6129491373067991, "grad_norm": 0.0948721170425415, "learning_rate": 1.164059175448697e-05, "loss": 8.7166, "step": 122740 }, { "epoch": 0.6129990761317387, "grad_norm": 0.08996075391769409, "learning_rate": 1.163908983954542e-05, "loss": 8.7231, "step": 122750 }, { "epoch": 0.6130490149566781, "grad_norm": 0.09304836392402649, "learning_rate": 1.163758792460387e-05, "loss": 8.7423, "step": 122760 }, { "epoch": 0.6130989537816175, "grad_norm": 0.08591028302907944, "learning_rate": 1.163608600966232e-05, "loss": 8.723, "step": 122770 }, { "epoch": 0.6131488926065569, "grad_norm": 0.08985080569982529, "learning_rate": 1.163458409472077e-05, "loss": 8.7389, "step": 122780 }, { "epoch": 0.6131988314314964, "grad_norm": 0.09522908180952072, "learning_rate": 1.1633082179779217e-05, "loss": 8.7362, "step": 122790 }, { "epoch": 0.6132487702564359, "grad_norm": 0.08915939182043076, "learning_rate": 1.1631580264837667e-05, "loss": 8.7425, "step": 122800 }, { "epoch": 0.6132987090813753, "grad_norm": 0.0966191291809082, "learning_rate": 1.1630078349896118e-05, "loss": 8.7361, "step": 122810 }, { "epoch": 0.6133486479063147, "grad_norm": 0.08906877040863037, "learning_rate": 1.1628576434954568e-05, "loss": 8.726, "step": 122820 }, { "epoch": 0.6133985867312542, "grad_norm": 0.0922885611653328, "learning_rate": 1.1627074520013018e-05, "loss": 8.7094, "step": 122830 }, { "epoch": 0.6134485255561937, "grad_norm": 0.0915333479642868, "learning_rate": 1.1625572605071466e-05, "loss": 8.7258, "step": 122840 }, { "epoch": 0.6134984643811331, "grad_norm": 0.09396089613437653, "learning_rate": 1.1624070690129915e-05, "loss": 8.7101, "step": 122850 }, { "epoch": 0.6135484032060725, "grad_norm": 0.09335985779762268, "learning_rate": 1.1622568775188365e-05, "loss": 8.735, "step": 122860 }, { "epoch": 0.613598342031012, "grad_norm": 0.09260796010494232, "learning_rate": 1.1621066860246815e-05, "loss": 8.7354, "step": 122870 }, { "epoch": 0.6136482808559515, "grad_norm": 0.09389949589967728, "learning_rate": 1.1619564945305265e-05, "loss": 8.7136, "step": 122880 }, { "epoch": 0.6136982196808909, "grad_norm": 0.0865430235862732, "learning_rate": 1.1618063030363714e-05, "loss": 8.743, "step": 122890 }, { "epoch": 0.6137481585058303, "grad_norm": 0.08791029453277588, "learning_rate": 1.1616561115422162e-05, "loss": 8.7337, "step": 122900 }, { "epoch": 0.6137980973307698, "grad_norm": 0.0919414758682251, "learning_rate": 1.1615059200480613e-05, "loss": 8.7336, "step": 122910 }, { "epoch": 0.6138480361557093, "grad_norm": 0.08799885213375092, "learning_rate": 1.1613557285539063e-05, "loss": 8.7203, "step": 122920 }, { "epoch": 0.6138979749806487, "grad_norm": 0.08847959339618683, "learning_rate": 1.1612055370597513e-05, "loss": 8.7178, "step": 122930 }, { "epoch": 0.6139479138055881, "grad_norm": 0.10149681568145752, "learning_rate": 1.1610553455655961e-05, "loss": 8.7265, "step": 122940 }, { "epoch": 0.6139978526305276, "grad_norm": 0.09459761530160904, "learning_rate": 1.160905154071441e-05, "loss": 8.7224, "step": 122950 }, { "epoch": 0.6140477914554671, "grad_norm": 0.09227167069911957, "learning_rate": 1.160754962577286e-05, "loss": 8.7274, "step": 122960 }, { "epoch": 0.6140977302804065, "grad_norm": 0.08995702862739563, "learning_rate": 1.160604771083131e-05, "loss": 8.718, "step": 122970 }, { "epoch": 0.6141476691053459, "grad_norm": 0.08922126144170761, "learning_rate": 1.160454579588976e-05, "loss": 8.7368, "step": 122980 }, { "epoch": 0.6141976079302854, "grad_norm": 0.08837445080280304, "learning_rate": 1.1603043880948209e-05, "loss": 8.7421, "step": 122990 }, { "epoch": 0.6142475467552249, "grad_norm": 0.0875106155872345, "learning_rate": 1.1601541966006657e-05, "loss": 8.7198, "step": 123000 }, { "epoch": 0.6142974855801643, "grad_norm": 0.0882338210940361, "learning_rate": 1.1600040051065108e-05, "loss": 8.7162, "step": 123010 }, { "epoch": 0.6143474244051037, "grad_norm": 0.08920801430940628, "learning_rate": 1.1598538136123558e-05, "loss": 8.717, "step": 123020 }, { "epoch": 0.6143973632300432, "grad_norm": 0.08952096849679947, "learning_rate": 1.1597036221182008e-05, "loss": 8.7223, "step": 123030 }, { "epoch": 0.6144473020549827, "grad_norm": 0.09667718410491943, "learning_rate": 1.1595534306240456e-05, "loss": 8.7406, "step": 123040 }, { "epoch": 0.6144972408799221, "grad_norm": 0.08800283074378967, "learning_rate": 1.1594032391298907e-05, "loss": 8.741, "step": 123050 }, { "epoch": 0.6145471797048615, "grad_norm": 0.09324777871370316, "learning_rate": 1.1592530476357355e-05, "loss": 8.7304, "step": 123060 }, { "epoch": 0.614597118529801, "grad_norm": 0.09224963933229446, "learning_rate": 1.1591028561415805e-05, "loss": 8.7463, "step": 123070 }, { "epoch": 0.6146470573547405, "grad_norm": 0.09018932282924652, "learning_rate": 1.1589526646474255e-05, "loss": 8.7387, "step": 123080 }, { "epoch": 0.6146969961796799, "grad_norm": 0.09440066665410995, "learning_rate": 1.1588024731532704e-05, "loss": 8.7013, "step": 123090 }, { "epoch": 0.6147469350046193, "grad_norm": 0.09074559062719345, "learning_rate": 1.1586522816591154e-05, "loss": 8.7298, "step": 123100 }, { "epoch": 0.6147968738295588, "grad_norm": 0.09388012439012527, "learning_rate": 1.1585020901649603e-05, "loss": 8.7206, "step": 123110 }, { "epoch": 0.6148468126544983, "grad_norm": 0.09513651579618454, "learning_rate": 1.1583518986708053e-05, "loss": 8.7438, "step": 123120 }, { "epoch": 0.6148967514794377, "grad_norm": 0.08741681277751923, "learning_rate": 1.1582017071766503e-05, "loss": 8.7586, "step": 123130 }, { "epoch": 0.6149466903043771, "grad_norm": 0.0910298153758049, "learning_rate": 1.1580515156824951e-05, "loss": 8.7249, "step": 123140 }, { "epoch": 0.6149966291293166, "grad_norm": 0.0909765362739563, "learning_rate": 1.1579013241883402e-05, "loss": 8.7324, "step": 123150 }, { "epoch": 0.615046567954256, "grad_norm": 0.09115078300237656, "learning_rate": 1.157751132694185e-05, "loss": 8.7132, "step": 123160 }, { "epoch": 0.6150965067791955, "grad_norm": 0.09587119519710541, "learning_rate": 1.15760094120003e-05, "loss": 8.7349, "step": 123170 }, { "epoch": 0.6151464456041349, "grad_norm": 0.09301666915416718, "learning_rate": 1.157450749705875e-05, "loss": 8.7374, "step": 123180 }, { "epoch": 0.6151963844290744, "grad_norm": 0.0892057865858078, "learning_rate": 1.15730055821172e-05, "loss": 8.7265, "step": 123190 }, { "epoch": 0.6152463232540138, "grad_norm": 0.09476073831319809, "learning_rate": 1.157150366717565e-05, "loss": 8.7264, "step": 123200 }, { "epoch": 0.6152962620789533, "grad_norm": 0.08770003169775009, "learning_rate": 1.15700017522341e-05, "loss": 8.734, "step": 123210 }, { "epoch": 0.6153462009038927, "grad_norm": 0.09061672538518906, "learning_rate": 1.1568499837292548e-05, "loss": 8.737, "step": 123220 }, { "epoch": 0.6153961397288322, "grad_norm": 0.09397213160991669, "learning_rate": 1.1566997922350998e-05, "loss": 8.7247, "step": 123230 }, { "epoch": 0.6154460785537716, "grad_norm": 0.09056683629751205, "learning_rate": 1.1565496007409448e-05, "loss": 8.7302, "step": 123240 }, { "epoch": 0.6154960173787111, "grad_norm": 0.09550472348928452, "learning_rate": 1.1563994092467897e-05, "loss": 8.7345, "step": 123250 }, { "epoch": 0.6155459562036505, "grad_norm": 0.09115252643823624, "learning_rate": 1.1562492177526347e-05, "loss": 8.7392, "step": 123260 }, { "epoch": 0.61559589502859, "grad_norm": 0.09301244467496872, "learning_rate": 1.1560990262584795e-05, "loss": 8.7234, "step": 123270 }, { "epoch": 0.6156458338535294, "grad_norm": 0.0930706262588501, "learning_rate": 1.1559488347643246e-05, "loss": 8.7305, "step": 123280 }, { "epoch": 0.6156957726784689, "grad_norm": 0.09021922200918198, "learning_rate": 1.1557986432701696e-05, "loss": 8.7383, "step": 123290 }, { "epoch": 0.6157457115034083, "grad_norm": 0.09439912438392639, "learning_rate": 1.1556484517760144e-05, "loss": 8.7176, "step": 123300 }, { "epoch": 0.6157956503283478, "grad_norm": 0.09691648930311203, "learning_rate": 1.1554982602818594e-05, "loss": 8.7301, "step": 123310 }, { "epoch": 0.6158455891532872, "grad_norm": 0.09814375638961792, "learning_rate": 1.1553480687877043e-05, "loss": 8.7233, "step": 123320 }, { "epoch": 0.6158955279782267, "grad_norm": 0.09281103312969208, "learning_rate": 1.1551978772935493e-05, "loss": 8.741, "step": 123330 }, { "epoch": 0.6159454668031661, "grad_norm": 0.09556780755519867, "learning_rate": 1.1550476857993943e-05, "loss": 8.7346, "step": 123340 }, { "epoch": 0.6159954056281056, "grad_norm": 0.09661609679460526, "learning_rate": 1.1548974943052392e-05, "loss": 8.7364, "step": 123350 }, { "epoch": 0.616045344453045, "grad_norm": 0.08993745595216751, "learning_rate": 1.1547473028110842e-05, "loss": 8.7326, "step": 123360 }, { "epoch": 0.6160952832779845, "grad_norm": 0.08858168870210648, "learning_rate": 1.1545971113169292e-05, "loss": 8.7335, "step": 123370 }, { "epoch": 0.6161452221029239, "grad_norm": 0.0938563272356987, "learning_rate": 1.154446919822774e-05, "loss": 8.731, "step": 123380 }, { "epoch": 0.6161951609278634, "grad_norm": 0.09345029294490814, "learning_rate": 1.154296728328619e-05, "loss": 8.7307, "step": 123390 }, { "epoch": 0.6162450997528028, "grad_norm": 0.09097786992788315, "learning_rate": 1.154146536834464e-05, "loss": 8.7149, "step": 123400 }, { "epoch": 0.6162950385777423, "grad_norm": 0.09178051352500916, "learning_rate": 1.153996345340309e-05, "loss": 8.7436, "step": 123410 }, { "epoch": 0.6163449774026817, "grad_norm": 0.08830767124891281, "learning_rate": 1.153846153846154e-05, "loss": 8.7355, "step": 123420 }, { "epoch": 0.6163949162276212, "grad_norm": 0.09343507140874863, "learning_rate": 1.1536959623519988e-05, "loss": 8.7295, "step": 123430 }, { "epoch": 0.6164448550525606, "grad_norm": 0.09138908237218857, "learning_rate": 1.1535457708578438e-05, "loss": 8.7319, "step": 123440 }, { "epoch": 0.6164947938775001, "grad_norm": 0.09196456521749496, "learning_rate": 1.1533955793636887e-05, "loss": 8.7181, "step": 123450 }, { "epoch": 0.6165447327024395, "grad_norm": 0.09027263522148132, "learning_rate": 1.1532453878695337e-05, "loss": 8.7359, "step": 123460 }, { "epoch": 0.616594671527379, "grad_norm": 0.09021011739969254, "learning_rate": 1.1530951963753787e-05, "loss": 8.7455, "step": 123470 }, { "epoch": 0.6166446103523184, "grad_norm": 0.08662664890289307, "learning_rate": 1.1529450048812236e-05, "loss": 8.7415, "step": 123480 }, { "epoch": 0.6166945491772579, "grad_norm": 0.09319479018449783, "learning_rate": 1.1527948133870686e-05, "loss": 8.7173, "step": 123490 }, { "epoch": 0.6167444880021973, "grad_norm": 0.08931265026330948, "learning_rate": 1.1526446218929134e-05, "loss": 8.7091, "step": 123500 }, { "epoch": 0.6167944268271368, "grad_norm": 0.09478646516799927, "learning_rate": 1.1524944303987584e-05, "loss": 8.7149, "step": 123510 }, { "epoch": 0.6168443656520762, "grad_norm": 0.08900546282529831, "learning_rate": 1.1523442389046035e-05, "loss": 8.7382, "step": 123520 }, { "epoch": 0.6168943044770157, "grad_norm": 0.09752871096134186, "learning_rate": 1.1521940474104485e-05, "loss": 8.7297, "step": 123530 }, { "epoch": 0.6169442433019551, "grad_norm": 0.09565001726150513, "learning_rate": 1.1520438559162933e-05, "loss": 8.7219, "step": 123540 }, { "epoch": 0.6169941821268946, "grad_norm": 0.09056803584098816, "learning_rate": 1.1518936644221382e-05, "loss": 8.7327, "step": 123550 }, { "epoch": 0.617044120951834, "grad_norm": 0.09336548298597336, "learning_rate": 1.1517434729279832e-05, "loss": 8.7223, "step": 123560 }, { "epoch": 0.6170940597767735, "grad_norm": 0.09907118231058121, "learning_rate": 1.1515932814338282e-05, "loss": 8.7273, "step": 123570 }, { "epoch": 0.6171439986017129, "grad_norm": 0.08720168471336365, "learning_rate": 1.1514430899396732e-05, "loss": 8.7245, "step": 123580 }, { "epoch": 0.6171939374266524, "grad_norm": 0.09530545026063919, "learning_rate": 1.151292898445518e-05, "loss": 8.7177, "step": 123590 }, { "epoch": 0.6172438762515918, "grad_norm": 0.08918207138776779, "learning_rate": 1.151142706951363e-05, "loss": 8.7255, "step": 123600 }, { "epoch": 0.6172938150765312, "grad_norm": 0.09224686026573181, "learning_rate": 1.150992515457208e-05, "loss": 8.7282, "step": 123610 }, { "epoch": 0.6173437539014707, "grad_norm": 0.08980294317007065, "learning_rate": 1.150842323963053e-05, "loss": 8.7338, "step": 123620 }, { "epoch": 0.6173936927264101, "grad_norm": 0.08682075142860413, "learning_rate": 1.150692132468898e-05, "loss": 8.7079, "step": 123630 }, { "epoch": 0.6174436315513496, "grad_norm": 0.0889846533536911, "learning_rate": 1.1505419409747428e-05, "loss": 8.7336, "step": 123640 }, { "epoch": 0.617493570376289, "grad_norm": 0.09437645971775055, "learning_rate": 1.1503917494805877e-05, "loss": 8.73, "step": 123650 }, { "epoch": 0.6175435092012285, "grad_norm": 0.08967822045087814, "learning_rate": 1.1502415579864327e-05, "loss": 8.7262, "step": 123660 }, { "epoch": 0.6175934480261679, "grad_norm": 0.09480708837509155, "learning_rate": 1.1500913664922777e-05, "loss": 8.7245, "step": 123670 }, { "epoch": 0.6176433868511074, "grad_norm": 0.095451220870018, "learning_rate": 1.1499411749981227e-05, "loss": 8.7531, "step": 123680 }, { "epoch": 0.6176933256760468, "grad_norm": 0.08989094942808151, "learning_rate": 1.1497909835039677e-05, "loss": 8.7383, "step": 123690 }, { "epoch": 0.6177432645009863, "grad_norm": 0.08974773436784744, "learning_rate": 1.1496407920098124e-05, "loss": 8.7298, "step": 123700 }, { "epoch": 0.6177932033259257, "grad_norm": 0.08904717862606049, "learning_rate": 1.1494906005156574e-05, "loss": 8.7198, "step": 123710 }, { "epoch": 0.6178431421508652, "grad_norm": 0.0950782522559166, "learning_rate": 1.1493404090215025e-05, "loss": 8.7192, "step": 123720 }, { "epoch": 0.6178930809758046, "grad_norm": 0.08988340198993683, "learning_rate": 1.1491902175273475e-05, "loss": 8.7233, "step": 123730 }, { "epoch": 0.6179430198007441, "grad_norm": 0.08889536559581757, "learning_rate": 1.1490400260331925e-05, "loss": 8.7234, "step": 123740 }, { "epoch": 0.6179929586256835, "grad_norm": 0.08637700229883194, "learning_rate": 1.1488898345390372e-05, "loss": 8.7312, "step": 123750 }, { "epoch": 0.618042897450623, "grad_norm": 0.09260913729667664, "learning_rate": 1.1487396430448822e-05, "loss": 8.7229, "step": 123760 }, { "epoch": 0.6180928362755624, "grad_norm": 0.10173122584819794, "learning_rate": 1.1485894515507272e-05, "loss": 8.7177, "step": 123770 }, { "epoch": 0.6181427751005019, "grad_norm": 0.09701031446456909, "learning_rate": 1.1484392600565722e-05, "loss": 8.7225, "step": 123780 }, { "epoch": 0.6181927139254413, "grad_norm": 0.09118404984474182, "learning_rate": 1.1482890685624172e-05, "loss": 8.7159, "step": 123790 }, { "epoch": 0.6182426527503808, "grad_norm": 0.09571003168821335, "learning_rate": 1.148138877068262e-05, "loss": 8.7244, "step": 123800 }, { "epoch": 0.6182925915753202, "grad_norm": 0.09160356968641281, "learning_rate": 1.147988685574107e-05, "loss": 8.7271, "step": 123810 }, { "epoch": 0.6183425304002597, "grad_norm": 0.10198367387056351, "learning_rate": 1.147838494079952e-05, "loss": 8.728, "step": 123820 }, { "epoch": 0.6183924692251991, "grad_norm": 0.090346559882164, "learning_rate": 1.147688302585797e-05, "loss": 8.7252, "step": 123830 }, { "epoch": 0.6184424080501386, "grad_norm": 0.08791203796863556, "learning_rate": 1.147538111091642e-05, "loss": 8.7254, "step": 123840 }, { "epoch": 0.618492346875078, "grad_norm": 0.09534545242786407, "learning_rate": 1.1473879195974868e-05, "loss": 8.7175, "step": 123850 }, { "epoch": 0.6185422857000175, "grad_norm": 0.09206069260835648, "learning_rate": 1.1472377281033317e-05, "loss": 8.732, "step": 123860 }, { "epoch": 0.6185922245249569, "grad_norm": 0.09079962968826294, "learning_rate": 1.1470875366091767e-05, "loss": 8.7245, "step": 123870 }, { "epoch": 0.6186421633498964, "grad_norm": 0.09148161858320236, "learning_rate": 1.1469373451150217e-05, "loss": 8.7172, "step": 123880 }, { "epoch": 0.6186921021748358, "grad_norm": 0.0891844853758812, "learning_rate": 1.1467871536208667e-05, "loss": 8.721, "step": 123890 }, { "epoch": 0.6187420409997753, "grad_norm": 0.08975985646247864, "learning_rate": 1.1466369621267116e-05, "loss": 8.7102, "step": 123900 }, { "epoch": 0.6187919798247147, "grad_norm": 0.10155780613422394, "learning_rate": 1.1464867706325564e-05, "loss": 8.7252, "step": 123910 }, { "epoch": 0.6188419186496542, "grad_norm": 0.09575510025024414, "learning_rate": 1.1463365791384015e-05, "loss": 8.716, "step": 123920 }, { "epoch": 0.6188918574745936, "grad_norm": 0.0914383977651596, "learning_rate": 1.1461863876442465e-05, "loss": 8.7268, "step": 123930 }, { "epoch": 0.618941796299533, "grad_norm": 0.09883058816194534, "learning_rate": 1.1460361961500915e-05, "loss": 8.7068, "step": 123940 }, { "epoch": 0.6189917351244725, "grad_norm": 0.09328742325305939, "learning_rate": 1.1458860046559363e-05, "loss": 8.7305, "step": 123950 }, { "epoch": 0.619041673949412, "grad_norm": 0.09385626018047333, "learning_rate": 1.1457358131617812e-05, "loss": 8.7238, "step": 123960 }, { "epoch": 0.6190916127743514, "grad_norm": 0.0958520770072937, "learning_rate": 1.1455856216676262e-05, "loss": 8.7139, "step": 123970 }, { "epoch": 0.6191415515992909, "grad_norm": 0.09502740204334259, "learning_rate": 1.1454354301734712e-05, "loss": 8.7134, "step": 123980 }, { "epoch": 0.6191914904242303, "grad_norm": 0.09292402863502502, "learning_rate": 1.1452852386793162e-05, "loss": 8.7092, "step": 123990 }, { "epoch": 0.6192414292491698, "grad_norm": 0.09679784625768661, "learning_rate": 1.1451350471851611e-05, "loss": 8.7259, "step": 124000 }, { "epoch": 0.6192913680741092, "grad_norm": 0.09584274888038635, "learning_rate": 1.1449848556910061e-05, "loss": 8.715, "step": 124010 }, { "epoch": 0.6193413068990486, "grad_norm": 0.08926044404506683, "learning_rate": 1.144834664196851e-05, "loss": 8.7075, "step": 124020 }, { "epoch": 0.6193912457239881, "grad_norm": 0.09470371901988983, "learning_rate": 1.144684472702696e-05, "loss": 8.7184, "step": 124030 }, { "epoch": 0.6194411845489276, "grad_norm": 0.09544707089662552, "learning_rate": 1.144534281208541e-05, "loss": 8.7218, "step": 124040 }, { "epoch": 0.619491123373867, "grad_norm": 0.09077138453722, "learning_rate": 1.1443840897143858e-05, "loss": 8.7214, "step": 124050 }, { "epoch": 0.6195410621988064, "grad_norm": 0.08742060512304306, "learning_rate": 1.1442338982202309e-05, "loss": 8.7439, "step": 124060 }, { "epoch": 0.6195910010237459, "grad_norm": 0.09070980548858643, "learning_rate": 1.1440837067260757e-05, "loss": 8.7162, "step": 124070 }, { "epoch": 0.6196409398486854, "grad_norm": 0.09399957954883575, "learning_rate": 1.1439335152319207e-05, "loss": 8.7223, "step": 124080 }, { "epoch": 0.6196908786736248, "grad_norm": 0.08889596909284592, "learning_rate": 1.1437833237377657e-05, "loss": 8.7342, "step": 124090 }, { "epoch": 0.6197408174985642, "grad_norm": 0.09190160781145096, "learning_rate": 1.1436331322436106e-05, "loss": 8.7455, "step": 124100 }, { "epoch": 0.6197907563235037, "grad_norm": 0.09013140201568604, "learning_rate": 1.1434829407494556e-05, "loss": 8.7439, "step": 124110 }, { "epoch": 0.6198406951484432, "grad_norm": 0.0921657457947731, "learning_rate": 1.1433327492553005e-05, "loss": 8.7196, "step": 124120 }, { "epoch": 0.6198906339733826, "grad_norm": 0.09478151053190231, "learning_rate": 1.1431825577611455e-05, "loss": 8.7308, "step": 124130 }, { "epoch": 0.619940572798322, "grad_norm": 0.09333883225917816, "learning_rate": 1.1430323662669905e-05, "loss": 8.7126, "step": 124140 }, { "epoch": 0.6199905116232615, "grad_norm": 0.0867578536272049, "learning_rate": 1.1428821747728353e-05, "loss": 8.7471, "step": 124150 }, { "epoch": 0.620040450448201, "grad_norm": 0.08902301639318466, "learning_rate": 1.1427319832786804e-05, "loss": 8.7455, "step": 124160 }, { "epoch": 0.6200903892731404, "grad_norm": 0.09362560510635376, "learning_rate": 1.1425817917845254e-05, "loss": 8.7118, "step": 124170 }, { "epoch": 0.6201403280980798, "grad_norm": 0.09488838911056519, "learning_rate": 1.1424316002903702e-05, "loss": 8.7172, "step": 124180 }, { "epoch": 0.6201902669230193, "grad_norm": 0.09388437122106552, "learning_rate": 1.1422814087962152e-05, "loss": 8.7249, "step": 124190 }, { "epoch": 0.6202402057479588, "grad_norm": 0.09056483954191208, "learning_rate": 1.1421312173020601e-05, "loss": 8.7256, "step": 124200 }, { "epoch": 0.6202901445728982, "grad_norm": 0.09356372058391571, "learning_rate": 1.1419810258079051e-05, "loss": 8.7165, "step": 124210 }, { "epoch": 0.6203400833978376, "grad_norm": 0.09414329379796982, "learning_rate": 1.1418308343137501e-05, "loss": 8.7177, "step": 124220 }, { "epoch": 0.6203900222227771, "grad_norm": 0.08978509902954102, "learning_rate": 1.141680642819595e-05, "loss": 8.7225, "step": 124230 }, { "epoch": 0.6204399610477166, "grad_norm": 0.09141732007265091, "learning_rate": 1.14153045132544e-05, "loss": 8.7325, "step": 124240 }, { "epoch": 0.620489899872656, "grad_norm": 0.09441263973712921, "learning_rate": 1.1413802598312848e-05, "loss": 8.719, "step": 124250 }, { "epoch": 0.6205398386975954, "grad_norm": 0.09462641179561615, "learning_rate": 1.1412300683371299e-05, "loss": 8.716, "step": 124260 }, { "epoch": 0.6205897775225349, "grad_norm": 0.09261373430490494, "learning_rate": 1.1410798768429749e-05, "loss": 8.722, "step": 124270 }, { "epoch": 0.6206397163474744, "grad_norm": 0.09427915513515472, "learning_rate": 1.1409296853488197e-05, "loss": 8.7251, "step": 124280 }, { "epoch": 0.6206896551724138, "grad_norm": 0.09489326179027557, "learning_rate": 1.1407794938546647e-05, "loss": 8.7197, "step": 124290 }, { "epoch": 0.6207395939973532, "grad_norm": 0.09282520413398743, "learning_rate": 1.1406293023605096e-05, "loss": 8.7217, "step": 124300 }, { "epoch": 0.6207895328222927, "grad_norm": 0.09794396162033081, "learning_rate": 1.1404791108663546e-05, "loss": 8.7228, "step": 124310 }, { "epoch": 0.6208394716472322, "grad_norm": 0.09146995097398758, "learning_rate": 1.1403289193721996e-05, "loss": 8.7181, "step": 124320 }, { "epoch": 0.6208894104721716, "grad_norm": 0.0925363302230835, "learning_rate": 1.1401787278780446e-05, "loss": 8.7161, "step": 124330 }, { "epoch": 0.620939349297111, "grad_norm": 0.09043951332569122, "learning_rate": 1.1400285363838895e-05, "loss": 8.7191, "step": 124340 }, { "epoch": 0.6209892881220505, "grad_norm": 0.09174945205450058, "learning_rate": 1.1398783448897343e-05, "loss": 8.7329, "step": 124350 }, { "epoch": 0.62103922694699, "grad_norm": 0.09292636811733246, "learning_rate": 1.1397281533955794e-05, "loss": 8.7225, "step": 124360 }, { "epoch": 0.6210891657719294, "grad_norm": 0.09614364802837372, "learning_rate": 1.1395779619014244e-05, "loss": 8.7178, "step": 124370 }, { "epoch": 0.6211391045968688, "grad_norm": 0.09423890709877014, "learning_rate": 1.1394277704072694e-05, "loss": 8.7239, "step": 124380 }, { "epoch": 0.6211890434218083, "grad_norm": 0.08773567527532578, "learning_rate": 1.1392775789131142e-05, "loss": 8.6973, "step": 124390 }, { "epoch": 0.6212389822467478, "grad_norm": 0.09796379506587982, "learning_rate": 1.1391273874189591e-05, "loss": 8.7243, "step": 124400 }, { "epoch": 0.6212889210716872, "grad_norm": 0.08884675800800323, "learning_rate": 1.1389771959248041e-05, "loss": 8.7269, "step": 124410 }, { "epoch": 0.6213388598966266, "grad_norm": 0.09190382063388824, "learning_rate": 1.1388270044306491e-05, "loss": 8.7166, "step": 124420 }, { "epoch": 0.621388798721566, "grad_norm": 0.09155641496181488, "learning_rate": 1.1386768129364941e-05, "loss": 8.712, "step": 124430 }, { "epoch": 0.6214387375465056, "grad_norm": 0.09598276764154434, "learning_rate": 1.138526621442339e-05, "loss": 8.7123, "step": 124440 }, { "epoch": 0.621488676371445, "grad_norm": 0.0909108817577362, "learning_rate": 1.1383764299481838e-05, "loss": 8.7224, "step": 124450 }, { "epoch": 0.6215386151963844, "grad_norm": 0.0917460024356842, "learning_rate": 1.1382262384540289e-05, "loss": 8.72, "step": 124460 }, { "epoch": 0.6215885540213238, "grad_norm": 0.08814991265535355, "learning_rate": 1.1380760469598739e-05, "loss": 8.7155, "step": 124470 }, { "epoch": 0.6216384928462634, "grad_norm": 0.09747850894927979, "learning_rate": 1.1379258554657189e-05, "loss": 8.7379, "step": 124480 }, { "epoch": 0.6216884316712028, "grad_norm": 0.09266173839569092, "learning_rate": 1.137775663971564e-05, "loss": 8.7156, "step": 124490 }, { "epoch": 0.6217383704961422, "grad_norm": 0.09457944333553314, "learning_rate": 1.1376254724774086e-05, "loss": 8.7216, "step": 124500 }, { "epoch": 0.6217883093210816, "grad_norm": 0.08899705111980438, "learning_rate": 1.1374752809832536e-05, "loss": 8.7244, "step": 124510 }, { "epoch": 0.6218382481460212, "grad_norm": 0.0950658842921257, "learning_rate": 1.1373250894890986e-05, "loss": 8.7242, "step": 124520 }, { "epoch": 0.6218881869709606, "grad_norm": 0.0944470465183258, "learning_rate": 1.1371748979949436e-05, "loss": 8.724, "step": 124530 }, { "epoch": 0.6219381257959, "grad_norm": 0.08662784099578857, "learning_rate": 1.1370247065007887e-05, "loss": 8.7335, "step": 124540 }, { "epoch": 0.6219880646208394, "grad_norm": 0.09047723561525345, "learning_rate": 1.1368745150066333e-05, "loss": 8.7357, "step": 124550 }, { "epoch": 0.622038003445779, "grad_norm": 0.09099981188774109, "learning_rate": 1.1367243235124784e-05, "loss": 8.7242, "step": 124560 }, { "epoch": 0.6220879422707184, "grad_norm": 0.0873718112707138, "learning_rate": 1.1365741320183234e-05, "loss": 8.7197, "step": 124570 }, { "epoch": 0.6221378810956578, "grad_norm": 0.0861561968922615, "learning_rate": 1.1364239405241684e-05, "loss": 8.7394, "step": 124580 }, { "epoch": 0.6221878199205972, "grad_norm": 0.10405902564525604, "learning_rate": 1.1362737490300134e-05, "loss": 8.7154, "step": 124590 }, { "epoch": 0.6222377587455367, "grad_norm": 0.09721992909908295, "learning_rate": 1.1361235575358581e-05, "loss": 8.7003, "step": 124600 }, { "epoch": 0.6222876975704762, "grad_norm": 0.09676691889762878, "learning_rate": 1.1359733660417031e-05, "loss": 8.7209, "step": 124610 }, { "epoch": 0.6223376363954156, "grad_norm": 0.09333143383264542, "learning_rate": 1.1358231745475481e-05, "loss": 8.7237, "step": 124620 }, { "epoch": 0.622387575220355, "grad_norm": 0.08884768187999725, "learning_rate": 1.1356729830533931e-05, "loss": 8.7182, "step": 124630 }, { "epoch": 0.6224375140452945, "grad_norm": 0.09150359779596329, "learning_rate": 1.1355227915592382e-05, "loss": 8.7136, "step": 124640 }, { "epoch": 0.622487452870234, "grad_norm": 0.09673064947128296, "learning_rate": 1.135372600065083e-05, "loss": 8.7233, "step": 124650 }, { "epoch": 0.6225373916951734, "grad_norm": 0.09233259409666061, "learning_rate": 1.1352224085709279e-05, "loss": 8.7057, "step": 124660 }, { "epoch": 0.6225873305201128, "grad_norm": 0.09211067855358124, "learning_rate": 1.1350722170767729e-05, "loss": 8.7187, "step": 124670 }, { "epoch": 0.6226372693450523, "grad_norm": 0.09305453300476074, "learning_rate": 1.1349220255826179e-05, "loss": 8.7037, "step": 124680 }, { "epoch": 0.6226872081699918, "grad_norm": 0.08884977549314499, "learning_rate": 1.134771834088463e-05, "loss": 8.7114, "step": 124690 }, { "epoch": 0.6227371469949312, "grad_norm": 0.09412440657615662, "learning_rate": 1.1346216425943078e-05, "loss": 8.7272, "step": 124700 }, { "epoch": 0.6227870858198706, "grad_norm": 0.09514528512954712, "learning_rate": 1.1344714511001526e-05, "loss": 8.7156, "step": 124710 }, { "epoch": 0.6228370246448101, "grad_norm": 0.0961642786860466, "learning_rate": 1.1343212596059976e-05, "loss": 8.7162, "step": 124720 }, { "epoch": 0.6228869634697496, "grad_norm": 0.09910131245851517, "learning_rate": 1.1341710681118427e-05, "loss": 8.7031, "step": 124730 }, { "epoch": 0.622936902294689, "grad_norm": 0.09175023436546326, "learning_rate": 1.1340208766176877e-05, "loss": 8.7131, "step": 124740 }, { "epoch": 0.6229868411196284, "grad_norm": 0.09244109690189362, "learning_rate": 1.1338706851235325e-05, "loss": 8.7122, "step": 124750 }, { "epoch": 0.6230367799445679, "grad_norm": 0.09264490753412247, "learning_rate": 1.1337204936293774e-05, "loss": 8.7155, "step": 124760 }, { "epoch": 0.6230867187695074, "grad_norm": 0.09691507369279861, "learning_rate": 1.1335703021352224e-05, "loss": 8.7146, "step": 124770 }, { "epoch": 0.6231366575944468, "grad_norm": 0.08869668841362, "learning_rate": 1.1334201106410674e-05, "loss": 8.7173, "step": 124780 }, { "epoch": 0.6231865964193862, "grad_norm": 0.09124024212360382, "learning_rate": 1.1332699191469124e-05, "loss": 8.7296, "step": 124790 }, { "epoch": 0.6232365352443257, "grad_norm": 0.09584067761898041, "learning_rate": 1.1331197276527573e-05, "loss": 8.7047, "step": 124800 }, { "epoch": 0.6232864740692652, "grad_norm": 0.08927422761917114, "learning_rate": 1.1329695361586023e-05, "loss": 8.7281, "step": 124810 }, { "epoch": 0.6233364128942046, "grad_norm": 0.09715837985277176, "learning_rate": 1.1328193446644471e-05, "loss": 8.7242, "step": 124820 }, { "epoch": 0.623386351719144, "grad_norm": 0.09197895228862762, "learning_rate": 1.1326691531702922e-05, "loss": 8.7181, "step": 124830 }, { "epoch": 0.6234362905440834, "grad_norm": 0.0883011445403099, "learning_rate": 1.1325189616761372e-05, "loss": 8.7173, "step": 124840 }, { "epoch": 0.623486229369023, "grad_norm": 0.08731172233819962, "learning_rate": 1.132368770181982e-05, "loss": 8.7087, "step": 124850 }, { "epoch": 0.6235361681939624, "grad_norm": 0.09282612055540085, "learning_rate": 1.132218578687827e-05, "loss": 8.7295, "step": 124860 }, { "epoch": 0.6235861070189018, "grad_norm": 0.0898682028055191, "learning_rate": 1.1320683871936719e-05, "loss": 8.7312, "step": 124870 }, { "epoch": 0.6236360458438412, "grad_norm": 0.09973151236772537, "learning_rate": 1.1319181956995169e-05, "loss": 8.7089, "step": 124880 }, { "epoch": 0.6236859846687808, "grad_norm": 0.10166613757610321, "learning_rate": 1.131768004205362e-05, "loss": 8.7065, "step": 124890 }, { "epoch": 0.6237359234937202, "grad_norm": 0.10042425245046616, "learning_rate": 1.1316178127112068e-05, "loss": 8.716, "step": 124900 }, { "epoch": 0.6237858623186596, "grad_norm": 0.09376679360866547, "learning_rate": 1.1314676212170518e-05, "loss": 8.7169, "step": 124910 }, { "epoch": 0.623835801143599, "grad_norm": 0.09402406215667725, "learning_rate": 1.1313174297228966e-05, "loss": 8.7226, "step": 124920 }, { "epoch": 0.6238857399685386, "grad_norm": 0.0880286768078804, "learning_rate": 1.1311672382287417e-05, "loss": 8.7149, "step": 124930 }, { "epoch": 0.623935678793478, "grad_norm": 0.09521077573299408, "learning_rate": 1.1310170467345867e-05, "loss": 8.7109, "step": 124940 }, { "epoch": 0.6239856176184174, "grad_norm": 0.09367305040359497, "learning_rate": 1.1308668552404315e-05, "loss": 8.7349, "step": 124950 }, { "epoch": 0.6240355564433568, "grad_norm": 0.08822840452194214, "learning_rate": 1.1307166637462765e-05, "loss": 8.7135, "step": 124960 }, { "epoch": 0.6240854952682964, "grad_norm": 0.09069608896970749, "learning_rate": 1.1305664722521216e-05, "loss": 8.7098, "step": 124970 }, { "epoch": 0.6241354340932358, "grad_norm": 0.09736108034849167, "learning_rate": 1.1304162807579664e-05, "loss": 8.7174, "step": 124980 }, { "epoch": 0.6241853729181752, "grad_norm": 0.09728176891803741, "learning_rate": 1.1302660892638114e-05, "loss": 8.7019, "step": 124990 }, { "epoch": 0.6242353117431146, "grad_norm": 0.0918160006403923, "learning_rate": 1.1301158977696563e-05, "loss": 8.7286, "step": 125000 }, { "epoch": 0.6242852505680542, "grad_norm": 0.09253043681383133, "learning_rate": 1.1299657062755013e-05, "loss": 8.7112, "step": 125010 }, { "epoch": 0.6243351893929936, "grad_norm": 0.09095212817192078, "learning_rate": 1.1298155147813463e-05, "loss": 8.7239, "step": 125020 }, { "epoch": 0.624385128217933, "grad_norm": 0.09760838001966476, "learning_rate": 1.1296653232871912e-05, "loss": 8.7218, "step": 125030 }, { "epoch": 0.6244350670428724, "grad_norm": 0.09743019938468933, "learning_rate": 1.1295151317930362e-05, "loss": 8.7253, "step": 125040 }, { "epoch": 0.624485005867812, "grad_norm": 0.09515538066625595, "learning_rate": 1.129364940298881e-05, "loss": 8.7092, "step": 125050 }, { "epoch": 0.6245349446927514, "grad_norm": 0.08986768871545792, "learning_rate": 1.129214748804726e-05, "loss": 8.7231, "step": 125060 }, { "epoch": 0.6245848835176908, "grad_norm": 0.09468900412321091, "learning_rate": 1.129064557310571e-05, "loss": 8.7225, "step": 125070 }, { "epoch": 0.6246348223426302, "grad_norm": 0.0945427417755127, "learning_rate": 1.1289143658164159e-05, "loss": 8.7031, "step": 125080 }, { "epoch": 0.6246847611675698, "grad_norm": 0.0921517014503479, "learning_rate": 1.128764174322261e-05, "loss": 8.712, "step": 125090 }, { "epoch": 0.6247346999925092, "grad_norm": 0.09056264162063599, "learning_rate": 1.1286139828281058e-05, "loss": 8.7088, "step": 125100 }, { "epoch": 0.6247846388174486, "grad_norm": 0.0919748991727829, "learning_rate": 1.1284637913339508e-05, "loss": 8.7197, "step": 125110 }, { "epoch": 0.624834577642388, "grad_norm": 0.0946049764752388, "learning_rate": 1.1283135998397958e-05, "loss": 8.7279, "step": 125120 }, { "epoch": 0.6248845164673276, "grad_norm": 0.09442543238401413, "learning_rate": 1.1281634083456408e-05, "loss": 8.7228, "step": 125130 }, { "epoch": 0.624934455292267, "grad_norm": 0.09224551171064377, "learning_rate": 1.1280132168514857e-05, "loss": 8.7196, "step": 125140 }, { "epoch": 0.6249843941172064, "grad_norm": 0.0918484553694725, "learning_rate": 1.1278630253573305e-05, "loss": 8.7279, "step": 125150 }, { "epoch": 0.6250343329421458, "grad_norm": 0.08815157413482666, "learning_rate": 1.1277128338631755e-05, "loss": 8.7135, "step": 125160 }, { "epoch": 0.6250842717670854, "grad_norm": 0.09147334843873978, "learning_rate": 1.1275626423690206e-05, "loss": 8.7166, "step": 125170 }, { "epoch": 0.6251342105920248, "grad_norm": 0.09084770083427429, "learning_rate": 1.1274124508748656e-05, "loss": 8.7209, "step": 125180 }, { "epoch": 0.6251841494169642, "grad_norm": 0.0943610891699791, "learning_rate": 1.1272622593807104e-05, "loss": 8.7096, "step": 125190 }, { "epoch": 0.6252340882419036, "grad_norm": 0.09305550903081894, "learning_rate": 1.1271120678865553e-05, "loss": 8.7275, "step": 125200 }, { "epoch": 0.6252840270668432, "grad_norm": 0.09299591183662415, "learning_rate": 1.1269618763924003e-05, "loss": 8.7027, "step": 125210 }, { "epoch": 0.6253339658917826, "grad_norm": 0.0949455201625824, "learning_rate": 1.1268116848982453e-05, "loss": 8.7092, "step": 125220 }, { "epoch": 0.625383904716722, "grad_norm": 0.09270406514406204, "learning_rate": 1.1266614934040903e-05, "loss": 8.7045, "step": 125230 }, { "epoch": 0.6254338435416614, "grad_norm": 0.09729525446891785, "learning_rate": 1.1265113019099352e-05, "loss": 8.7111, "step": 125240 }, { "epoch": 0.625483782366601, "grad_norm": 0.09073136001825333, "learning_rate": 1.12636111041578e-05, "loss": 8.7025, "step": 125250 }, { "epoch": 0.6255337211915404, "grad_norm": 0.08994272351264954, "learning_rate": 1.126210918921625e-05, "loss": 8.7221, "step": 125260 }, { "epoch": 0.6255836600164798, "grad_norm": 0.09095024317502975, "learning_rate": 1.12606072742747e-05, "loss": 8.7107, "step": 125270 }, { "epoch": 0.6256335988414192, "grad_norm": 0.09382519870996475, "learning_rate": 1.125910535933315e-05, "loss": 8.6977, "step": 125280 }, { "epoch": 0.6256835376663588, "grad_norm": 0.09506542235612869, "learning_rate": 1.1257603444391601e-05, "loss": 8.694, "step": 125290 }, { "epoch": 0.6257334764912982, "grad_norm": 0.08988244086503983, "learning_rate": 1.125610152945005e-05, "loss": 8.7117, "step": 125300 }, { "epoch": 0.6257834153162376, "grad_norm": 0.0954940915107727, "learning_rate": 1.1254599614508498e-05, "loss": 8.7261, "step": 125310 }, { "epoch": 0.625833354141177, "grad_norm": 0.0926600992679596, "learning_rate": 1.1253097699566948e-05, "loss": 8.7071, "step": 125320 }, { "epoch": 0.6258832929661166, "grad_norm": 0.09239501506090164, "learning_rate": 1.1251595784625398e-05, "loss": 8.7221, "step": 125330 }, { "epoch": 0.625933231791056, "grad_norm": 0.09140519052743912, "learning_rate": 1.1250093869683848e-05, "loss": 8.7182, "step": 125340 }, { "epoch": 0.6259831706159954, "grad_norm": 0.09351127594709396, "learning_rate": 1.1248591954742297e-05, "loss": 8.7191, "step": 125350 }, { "epoch": 0.6260331094409348, "grad_norm": 0.09365622699260712, "learning_rate": 1.1247090039800745e-05, "loss": 8.7149, "step": 125360 }, { "epoch": 0.6260830482658744, "grad_norm": 0.09343401342630386, "learning_rate": 1.1245588124859196e-05, "loss": 8.7062, "step": 125370 }, { "epoch": 0.6261329870908138, "grad_norm": 0.09367363899946213, "learning_rate": 1.1244086209917646e-05, "loss": 8.7105, "step": 125380 }, { "epoch": 0.6261829259157532, "grad_norm": 0.09021138399839401, "learning_rate": 1.1242584294976096e-05, "loss": 8.714, "step": 125390 }, { "epoch": 0.6262328647406926, "grad_norm": 0.10367359220981598, "learning_rate": 1.1241082380034544e-05, "loss": 8.7316, "step": 125400 }, { "epoch": 0.6262828035656322, "grad_norm": 0.09110212326049805, "learning_rate": 1.1239580465092993e-05, "loss": 8.723, "step": 125410 }, { "epoch": 0.6263327423905716, "grad_norm": 0.09497211873531342, "learning_rate": 1.1238078550151443e-05, "loss": 8.719, "step": 125420 }, { "epoch": 0.626382681215511, "grad_norm": 0.08793065696954727, "learning_rate": 1.1236576635209893e-05, "loss": 8.7118, "step": 125430 }, { "epoch": 0.6264326200404504, "grad_norm": 0.09238415211439133, "learning_rate": 1.1235074720268343e-05, "loss": 8.7091, "step": 125440 }, { "epoch": 0.62648255886539, "grad_norm": 0.08847446739673615, "learning_rate": 1.1233572805326794e-05, "loss": 8.7071, "step": 125450 }, { "epoch": 0.6265324976903294, "grad_norm": 0.09379098564386368, "learning_rate": 1.123207089038524e-05, "loss": 8.7228, "step": 125460 }, { "epoch": 0.6265824365152688, "grad_norm": 0.09281666576862335, "learning_rate": 1.123056897544369e-05, "loss": 8.7114, "step": 125470 }, { "epoch": 0.6266323753402082, "grad_norm": 0.0954156294465065, "learning_rate": 1.122906706050214e-05, "loss": 8.7198, "step": 125480 }, { "epoch": 0.6266823141651477, "grad_norm": 0.091185063123703, "learning_rate": 1.1227565145560591e-05, "loss": 8.7046, "step": 125490 }, { "epoch": 0.6267322529900872, "grad_norm": 0.09116750210523605, "learning_rate": 1.1226063230619041e-05, "loss": 8.7127, "step": 125500 }, { "epoch": 0.6267821918150266, "grad_norm": 0.09332124143838882, "learning_rate": 1.1224561315677488e-05, "loss": 8.7211, "step": 125510 }, { "epoch": 0.626832130639966, "grad_norm": 0.08798982203006744, "learning_rate": 1.1223059400735938e-05, "loss": 8.6972, "step": 125520 }, { "epoch": 0.6268820694649055, "grad_norm": 0.08885789662599564, "learning_rate": 1.1221557485794388e-05, "loss": 8.7219, "step": 125530 }, { "epoch": 0.626932008289845, "grad_norm": 0.10069695860147476, "learning_rate": 1.1220055570852838e-05, "loss": 8.699, "step": 125540 }, { "epoch": 0.6269819471147844, "grad_norm": 0.0930192843079567, "learning_rate": 1.1218553655911289e-05, "loss": 8.7112, "step": 125550 }, { "epoch": 0.6270318859397238, "grad_norm": 0.09061025083065033, "learning_rate": 1.1217051740969735e-05, "loss": 8.7135, "step": 125560 }, { "epoch": 0.6270818247646633, "grad_norm": 0.08885504305362701, "learning_rate": 1.1215549826028186e-05, "loss": 8.7175, "step": 125570 }, { "epoch": 0.6271317635896028, "grad_norm": 0.09031138569116592, "learning_rate": 1.1214047911086636e-05, "loss": 8.7082, "step": 125580 }, { "epoch": 0.6271817024145422, "grad_norm": 0.09189961850643158, "learning_rate": 1.1212545996145086e-05, "loss": 8.7175, "step": 125590 }, { "epoch": 0.6272316412394816, "grad_norm": 0.09408345818519592, "learning_rate": 1.1211044081203536e-05, "loss": 8.7174, "step": 125600 }, { "epoch": 0.627281580064421, "grad_norm": 0.09308337420225143, "learning_rate": 1.1209542166261985e-05, "loss": 8.7101, "step": 125610 }, { "epoch": 0.6273315188893606, "grad_norm": 0.09275424480438232, "learning_rate": 1.1208040251320433e-05, "loss": 8.7109, "step": 125620 }, { "epoch": 0.6273814577143, "grad_norm": 0.10062972456216812, "learning_rate": 1.1206538336378883e-05, "loss": 8.6987, "step": 125630 }, { "epoch": 0.6274313965392394, "grad_norm": 0.09465427696704865, "learning_rate": 1.1205036421437333e-05, "loss": 8.7145, "step": 125640 }, { "epoch": 0.6274813353641788, "grad_norm": 0.09311853349208832, "learning_rate": 1.1203534506495784e-05, "loss": 8.7253, "step": 125650 }, { "epoch": 0.6275312741891184, "grad_norm": 0.088384248316288, "learning_rate": 1.1202032591554232e-05, "loss": 8.7049, "step": 125660 }, { "epoch": 0.6275812130140578, "grad_norm": 0.09492412954568863, "learning_rate": 1.120053067661268e-05, "loss": 8.7136, "step": 125670 }, { "epoch": 0.6276311518389972, "grad_norm": 0.10065683722496033, "learning_rate": 1.119902876167113e-05, "loss": 8.7108, "step": 125680 }, { "epoch": 0.6276810906639366, "grad_norm": 0.0918295755982399, "learning_rate": 1.1197526846729581e-05, "loss": 8.7085, "step": 125690 }, { "epoch": 0.6277310294888762, "grad_norm": 0.09792115539312363, "learning_rate": 1.1196024931788031e-05, "loss": 8.6919, "step": 125700 }, { "epoch": 0.6277809683138156, "grad_norm": 0.08935079723596573, "learning_rate": 1.119452301684648e-05, "loss": 8.7047, "step": 125710 }, { "epoch": 0.627830907138755, "grad_norm": 0.09444247931241989, "learning_rate": 1.1193021101904928e-05, "loss": 8.7183, "step": 125720 }, { "epoch": 0.6278808459636944, "grad_norm": 0.09201852232217789, "learning_rate": 1.1191519186963378e-05, "loss": 8.7243, "step": 125730 }, { "epoch": 0.627930784788634, "grad_norm": 0.09014883637428284, "learning_rate": 1.1190017272021828e-05, "loss": 8.7091, "step": 125740 }, { "epoch": 0.6279807236135734, "grad_norm": 0.091966412961483, "learning_rate": 1.1188515357080279e-05, "loss": 8.7245, "step": 125750 }, { "epoch": 0.6280306624385128, "grad_norm": 0.08749305456876755, "learning_rate": 1.1187013442138727e-05, "loss": 8.7155, "step": 125760 }, { "epoch": 0.6280806012634522, "grad_norm": 0.09407436102628708, "learning_rate": 1.1185511527197177e-05, "loss": 8.7034, "step": 125770 }, { "epoch": 0.6281305400883918, "grad_norm": 0.09107963740825653, "learning_rate": 1.1184009612255626e-05, "loss": 8.7061, "step": 125780 }, { "epoch": 0.6281804789133312, "grad_norm": 0.09338110685348511, "learning_rate": 1.1182507697314076e-05, "loss": 8.7152, "step": 125790 }, { "epoch": 0.6282304177382706, "grad_norm": 0.09084051847457886, "learning_rate": 1.1181005782372526e-05, "loss": 8.7097, "step": 125800 }, { "epoch": 0.62828035656321, "grad_norm": 0.09835775941610336, "learning_rate": 1.1179503867430975e-05, "loss": 8.7238, "step": 125810 }, { "epoch": 0.6283302953881496, "grad_norm": 0.09301155060529709, "learning_rate": 1.1178001952489425e-05, "loss": 8.7027, "step": 125820 }, { "epoch": 0.628380234213089, "grad_norm": 0.08785326033830643, "learning_rate": 1.1176500037547873e-05, "loss": 8.7189, "step": 125830 }, { "epoch": 0.6284301730380284, "grad_norm": 0.08803771436214447, "learning_rate": 1.1174998122606323e-05, "loss": 8.7123, "step": 125840 }, { "epoch": 0.6284801118629678, "grad_norm": 0.0908784344792366, "learning_rate": 1.1173496207664774e-05, "loss": 8.7182, "step": 125850 }, { "epoch": 0.6285300506879073, "grad_norm": 0.0862756222486496, "learning_rate": 1.1171994292723222e-05, "loss": 8.7111, "step": 125860 }, { "epoch": 0.6285799895128468, "grad_norm": 0.09046090394258499, "learning_rate": 1.1170492377781672e-05, "loss": 8.7195, "step": 125870 }, { "epoch": 0.6286299283377862, "grad_norm": 0.08847752958536148, "learning_rate": 1.116899046284012e-05, "loss": 8.7092, "step": 125880 }, { "epoch": 0.6286798671627256, "grad_norm": 0.09415357559919357, "learning_rate": 1.1167488547898571e-05, "loss": 8.6979, "step": 125890 }, { "epoch": 0.6287298059876651, "grad_norm": 0.09321362525224686, "learning_rate": 1.1165986632957021e-05, "loss": 8.7019, "step": 125900 }, { "epoch": 0.6287797448126046, "grad_norm": 0.09205802530050278, "learning_rate": 1.116448471801547e-05, "loss": 8.7374, "step": 125910 }, { "epoch": 0.628829683637544, "grad_norm": 0.0888669490814209, "learning_rate": 1.116298280307392e-05, "loss": 8.7149, "step": 125920 }, { "epoch": 0.6288796224624834, "grad_norm": 0.08783555775880814, "learning_rate": 1.1161480888132368e-05, "loss": 8.7195, "step": 125930 }, { "epoch": 0.628929561287423, "grad_norm": 0.09451974183320999, "learning_rate": 1.1159978973190818e-05, "loss": 8.7245, "step": 125940 }, { "epoch": 0.6289795001123624, "grad_norm": 0.09017028659582138, "learning_rate": 1.1158477058249269e-05, "loss": 8.7217, "step": 125950 }, { "epoch": 0.6290294389373018, "grad_norm": 0.09248178452253342, "learning_rate": 1.1156975143307717e-05, "loss": 8.7001, "step": 125960 }, { "epoch": 0.6290793777622412, "grad_norm": 0.09288008511066437, "learning_rate": 1.1155473228366167e-05, "loss": 8.6956, "step": 125970 }, { "epoch": 0.6291293165871807, "grad_norm": 0.08859378844499588, "learning_rate": 1.1153971313424617e-05, "loss": 8.7073, "step": 125980 }, { "epoch": 0.6291792554121202, "grad_norm": 0.09456798434257507, "learning_rate": 1.1152469398483066e-05, "loss": 8.7203, "step": 125990 }, { "epoch": 0.6292291942370596, "grad_norm": 0.09040806442499161, "learning_rate": 1.1150967483541516e-05, "loss": 8.711, "step": 126000 }, { "epoch": 0.629279133061999, "grad_norm": 0.09092914313077927, "learning_rate": 1.1149465568599965e-05, "loss": 8.7157, "step": 126010 }, { "epoch": 0.6293290718869385, "grad_norm": 0.08959808945655823, "learning_rate": 1.1147963653658415e-05, "loss": 8.7037, "step": 126020 }, { "epoch": 0.629379010711878, "grad_norm": 0.09273937344551086, "learning_rate": 1.1146461738716865e-05, "loss": 8.7036, "step": 126030 }, { "epoch": 0.6294289495368174, "grad_norm": 0.09658925980329514, "learning_rate": 1.1144959823775313e-05, "loss": 8.7071, "step": 126040 }, { "epoch": 0.6294788883617568, "grad_norm": 0.09372569620609283, "learning_rate": 1.1143457908833764e-05, "loss": 8.7113, "step": 126050 }, { "epoch": 0.6295288271866963, "grad_norm": 0.08820081502199173, "learning_rate": 1.1141955993892212e-05, "loss": 8.7279, "step": 126060 }, { "epoch": 0.6295787660116358, "grad_norm": 0.09399138391017914, "learning_rate": 1.1140454078950662e-05, "loss": 8.7203, "step": 126070 }, { "epoch": 0.6296287048365752, "grad_norm": 0.09462126344442368, "learning_rate": 1.1138952164009112e-05, "loss": 8.7135, "step": 126080 }, { "epoch": 0.6296786436615146, "grad_norm": 0.09695866703987122, "learning_rate": 1.1137450249067561e-05, "loss": 8.6986, "step": 126090 }, { "epoch": 0.6297285824864541, "grad_norm": 0.09450877457857132, "learning_rate": 1.1135948334126011e-05, "loss": 8.7052, "step": 126100 }, { "epoch": 0.6297785213113936, "grad_norm": 0.09167155623435974, "learning_rate": 1.113444641918446e-05, "loss": 8.7027, "step": 126110 }, { "epoch": 0.629828460136333, "grad_norm": 0.09358896315097809, "learning_rate": 1.113294450424291e-05, "loss": 8.7079, "step": 126120 }, { "epoch": 0.6298783989612724, "grad_norm": 0.10010679066181183, "learning_rate": 1.113144258930136e-05, "loss": 8.701, "step": 126130 }, { "epoch": 0.6299283377862119, "grad_norm": 0.09216713160276413, "learning_rate": 1.112994067435981e-05, "loss": 8.7026, "step": 126140 }, { "epoch": 0.6299782766111514, "grad_norm": 0.08950921893119812, "learning_rate": 1.1128438759418259e-05, "loss": 8.6985, "step": 126150 }, { "epoch": 0.6300282154360908, "grad_norm": 0.09360364824533463, "learning_rate": 1.1126936844476707e-05, "loss": 8.728, "step": 126160 }, { "epoch": 0.6300781542610302, "grad_norm": 0.09034190326929092, "learning_rate": 1.1125434929535157e-05, "loss": 8.7074, "step": 126170 }, { "epoch": 0.6301280930859697, "grad_norm": 0.09222965687513351, "learning_rate": 1.1123933014593607e-05, "loss": 8.7075, "step": 126180 }, { "epoch": 0.6301780319109092, "grad_norm": 0.09020216763019562, "learning_rate": 1.1122431099652058e-05, "loss": 8.6951, "step": 126190 }, { "epoch": 0.6302279707358486, "grad_norm": 0.09144456684589386, "learning_rate": 1.1120929184710506e-05, "loss": 8.7062, "step": 126200 }, { "epoch": 0.630277909560788, "grad_norm": 0.09432315826416016, "learning_rate": 1.1119427269768955e-05, "loss": 8.712, "step": 126210 }, { "epoch": 0.6303278483857275, "grad_norm": 0.09091579914093018, "learning_rate": 1.1117925354827405e-05, "loss": 8.7356, "step": 126220 }, { "epoch": 0.630377787210667, "grad_norm": 0.08974793553352356, "learning_rate": 1.1116423439885855e-05, "loss": 8.7139, "step": 126230 }, { "epoch": 0.6304277260356064, "grad_norm": 0.09444157779216766, "learning_rate": 1.1114921524944305e-05, "loss": 8.7199, "step": 126240 }, { "epoch": 0.6304776648605458, "grad_norm": 0.09196534752845764, "learning_rate": 1.1113419610002754e-05, "loss": 8.7038, "step": 126250 }, { "epoch": 0.6305276036854853, "grad_norm": 0.09097559005022049, "learning_rate": 1.1111917695061202e-05, "loss": 8.7136, "step": 126260 }, { "epoch": 0.6305775425104247, "grad_norm": 0.08905323594808578, "learning_rate": 1.1110415780119652e-05, "loss": 8.7215, "step": 126270 }, { "epoch": 0.6306274813353642, "grad_norm": 0.09112866222858429, "learning_rate": 1.1108913865178103e-05, "loss": 8.7085, "step": 126280 }, { "epoch": 0.6306774201603036, "grad_norm": 0.08942396938800812, "learning_rate": 1.1107411950236553e-05, "loss": 8.7192, "step": 126290 }, { "epoch": 0.6307273589852431, "grad_norm": 0.09311738610267639, "learning_rate": 1.1105910035295003e-05, "loss": 8.6981, "step": 126300 }, { "epoch": 0.6307772978101825, "grad_norm": 0.09299565851688385, "learning_rate": 1.110440812035345e-05, "loss": 8.71, "step": 126310 }, { "epoch": 0.630827236635122, "grad_norm": 0.09112560003995895, "learning_rate": 1.11029062054119e-05, "loss": 8.6983, "step": 126320 }, { "epoch": 0.6308771754600614, "grad_norm": 0.09203418344259262, "learning_rate": 1.110140429047035e-05, "loss": 8.7096, "step": 126330 }, { "epoch": 0.6309271142850009, "grad_norm": 0.09365582466125488, "learning_rate": 1.10999023755288e-05, "loss": 8.6942, "step": 126340 }, { "epoch": 0.6309770531099403, "grad_norm": 0.09594764560461044, "learning_rate": 1.109840046058725e-05, "loss": 8.7237, "step": 126350 }, { "epoch": 0.6310269919348798, "grad_norm": 0.08934248983860016, "learning_rate": 1.1096898545645697e-05, "loss": 8.702, "step": 126360 }, { "epoch": 0.6310769307598192, "grad_norm": 0.09041931480169296, "learning_rate": 1.1095396630704147e-05, "loss": 8.7068, "step": 126370 }, { "epoch": 0.6311268695847587, "grad_norm": 0.09372342377901077, "learning_rate": 1.1093894715762598e-05, "loss": 8.6993, "step": 126380 }, { "epoch": 0.6311768084096981, "grad_norm": 0.09366505593061447, "learning_rate": 1.1092392800821048e-05, "loss": 8.7181, "step": 126390 }, { "epoch": 0.6312267472346376, "grad_norm": 0.09063727408647537, "learning_rate": 1.1090890885879498e-05, "loss": 8.7059, "step": 126400 }, { "epoch": 0.631276686059577, "grad_norm": 0.0880524069070816, "learning_rate": 1.1089388970937945e-05, "loss": 8.7184, "step": 126410 }, { "epoch": 0.6313266248845165, "grad_norm": 0.09939765185117722, "learning_rate": 1.1087887055996395e-05, "loss": 8.7115, "step": 126420 }, { "epoch": 0.6313765637094559, "grad_norm": 0.09165541082620621, "learning_rate": 1.1086385141054845e-05, "loss": 8.7044, "step": 126430 }, { "epoch": 0.6314265025343954, "grad_norm": 0.08993414044380188, "learning_rate": 1.1084883226113295e-05, "loss": 8.7114, "step": 126440 }, { "epoch": 0.6314764413593348, "grad_norm": 0.09997765719890594, "learning_rate": 1.1083381311171745e-05, "loss": 8.7137, "step": 126450 }, { "epoch": 0.6315263801842743, "grad_norm": 0.09372782707214355, "learning_rate": 1.1081879396230194e-05, "loss": 8.6919, "step": 126460 }, { "epoch": 0.6315763190092137, "grad_norm": 0.09590387344360352, "learning_rate": 1.1080377481288642e-05, "loss": 8.7069, "step": 126470 }, { "epoch": 0.6316262578341532, "grad_norm": 0.09233605861663818, "learning_rate": 1.1078875566347093e-05, "loss": 8.694, "step": 126480 }, { "epoch": 0.6316761966590926, "grad_norm": 0.09469950944185257, "learning_rate": 1.1077373651405543e-05, "loss": 8.7218, "step": 126490 }, { "epoch": 0.6317261354840321, "grad_norm": 0.08625823259353638, "learning_rate": 1.1075871736463993e-05, "loss": 8.7083, "step": 126500 }, { "epoch": 0.6317760743089715, "grad_norm": 0.09083887934684753, "learning_rate": 1.1074369821522441e-05, "loss": 8.6994, "step": 126510 }, { "epoch": 0.631826013133911, "grad_norm": 0.097719706594944, "learning_rate": 1.107286790658089e-05, "loss": 8.7016, "step": 126520 }, { "epoch": 0.6318759519588504, "grad_norm": 0.09654133021831512, "learning_rate": 1.107136599163934e-05, "loss": 8.7169, "step": 126530 }, { "epoch": 0.6319258907837899, "grad_norm": 0.09275779873132706, "learning_rate": 1.106986407669779e-05, "loss": 8.7157, "step": 126540 }, { "epoch": 0.6319758296087293, "grad_norm": 0.08812788128852844, "learning_rate": 1.106836216175624e-05, "loss": 8.6997, "step": 126550 }, { "epoch": 0.6320257684336688, "grad_norm": 0.09277535229921341, "learning_rate": 1.1066860246814689e-05, "loss": 8.7109, "step": 126560 }, { "epoch": 0.6320757072586082, "grad_norm": 0.09549902379512787, "learning_rate": 1.1065358331873137e-05, "loss": 8.701, "step": 126570 }, { "epoch": 0.6321256460835476, "grad_norm": 0.08873405307531357, "learning_rate": 1.1063856416931588e-05, "loss": 8.7066, "step": 126580 }, { "epoch": 0.6321755849084871, "grad_norm": 0.09165298938751221, "learning_rate": 1.1062354501990038e-05, "loss": 8.7016, "step": 126590 }, { "epoch": 0.6322255237334266, "grad_norm": 0.09139636904001236, "learning_rate": 1.1060852587048488e-05, "loss": 8.7104, "step": 126600 }, { "epoch": 0.632275462558366, "grad_norm": 0.09368384629487991, "learning_rate": 1.1059350672106936e-05, "loss": 8.7085, "step": 126610 }, { "epoch": 0.6323254013833054, "grad_norm": 0.0939926728606224, "learning_rate": 1.1057848757165387e-05, "loss": 8.7038, "step": 126620 }, { "epoch": 0.6323753402082449, "grad_norm": 0.0963914766907692, "learning_rate": 1.1056346842223835e-05, "loss": 8.7162, "step": 126630 }, { "epoch": 0.6324252790331844, "grad_norm": 0.08765320479869843, "learning_rate": 1.1054844927282285e-05, "loss": 8.7177, "step": 126640 }, { "epoch": 0.6324752178581238, "grad_norm": 0.09075529873371124, "learning_rate": 1.1053343012340735e-05, "loss": 8.7002, "step": 126650 }, { "epoch": 0.6325251566830632, "grad_norm": 0.09381550550460815, "learning_rate": 1.1051841097399184e-05, "loss": 8.7145, "step": 126660 }, { "epoch": 0.6325750955080027, "grad_norm": 0.09270758181810379, "learning_rate": 1.1050339182457634e-05, "loss": 8.7087, "step": 126670 }, { "epoch": 0.6326250343329421, "grad_norm": 0.08957044035196304, "learning_rate": 1.1048837267516083e-05, "loss": 8.7222, "step": 126680 }, { "epoch": 0.6326749731578816, "grad_norm": 0.09274608641862869, "learning_rate": 1.1047335352574533e-05, "loss": 8.7215, "step": 126690 }, { "epoch": 0.632724911982821, "grad_norm": 0.08785900473594666, "learning_rate": 1.1045833437632983e-05, "loss": 8.7077, "step": 126700 }, { "epoch": 0.6327748508077605, "grad_norm": 0.0963745191693306, "learning_rate": 1.1044331522691431e-05, "loss": 8.7214, "step": 126710 }, { "epoch": 0.6328247896327, "grad_norm": 0.09362554550170898, "learning_rate": 1.1042829607749882e-05, "loss": 8.6988, "step": 126720 }, { "epoch": 0.6328747284576394, "grad_norm": 0.09830887615680695, "learning_rate": 1.104132769280833e-05, "loss": 8.7249, "step": 126730 }, { "epoch": 0.6329246672825788, "grad_norm": 0.09366270899772644, "learning_rate": 1.103982577786678e-05, "loss": 8.714, "step": 126740 }, { "epoch": 0.6329746061075183, "grad_norm": 0.09500569850206375, "learning_rate": 1.103832386292523e-05, "loss": 8.7131, "step": 126750 }, { "epoch": 0.6330245449324577, "grad_norm": 0.09202902019023895, "learning_rate": 1.1036821947983679e-05, "loss": 8.71, "step": 126760 }, { "epoch": 0.6330744837573972, "grad_norm": 0.09248954057693481, "learning_rate": 1.1035320033042129e-05, "loss": 8.6971, "step": 126770 }, { "epoch": 0.6331244225823366, "grad_norm": 0.09548656642436981, "learning_rate": 1.103381811810058e-05, "loss": 8.7115, "step": 126780 }, { "epoch": 0.6331743614072761, "grad_norm": 0.09098765254020691, "learning_rate": 1.1032316203159028e-05, "loss": 8.6862, "step": 126790 }, { "epoch": 0.6332243002322155, "grad_norm": 0.08989952504634857, "learning_rate": 1.1030814288217478e-05, "loss": 8.7133, "step": 126800 }, { "epoch": 0.633274239057155, "grad_norm": 0.09226398915052414, "learning_rate": 1.1029312373275926e-05, "loss": 8.7104, "step": 126810 }, { "epoch": 0.6333241778820944, "grad_norm": 0.09326343238353729, "learning_rate": 1.1027810458334377e-05, "loss": 8.7034, "step": 126820 }, { "epoch": 0.6333741167070339, "grad_norm": 0.09733281284570694, "learning_rate": 1.1026308543392827e-05, "loss": 8.6897, "step": 126830 }, { "epoch": 0.6334240555319733, "grad_norm": 0.0914359837770462, "learning_rate": 1.1024806628451275e-05, "loss": 8.7046, "step": 126840 }, { "epoch": 0.6334739943569128, "grad_norm": 0.08934281766414642, "learning_rate": 1.1023304713509725e-05, "loss": 8.7141, "step": 126850 }, { "epoch": 0.6335239331818522, "grad_norm": 0.09313992410898209, "learning_rate": 1.1021802798568174e-05, "loss": 8.7073, "step": 126860 }, { "epoch": 0.6335738720067917, "grad_norm": 0.09446176141500473, "learning_rate": 1.1020300883626624e-05, "loss": 8.6979, "step": 126870 }, { "epoch": 0.6336238108317311, "grad_norm": 0.09279874712228775, "learning_rate": 1.1018798968685074e-05, "loss": 8.6945, "step": 126880 }, { "epoch": 0.6336737496566706, "grad_norm": 0.09727776795625687, "learning_rate": 1.1017297053743523e-05, "loss": 8.6937, "step": 126890 }, { "epoch": 0.63372368848161, "grad_norm": 0.09010564535856247, "learning_rate": 1.1015795138801973e-05, "loss": 8.7025, "step": 126900 }, { "epoch": 0.6337736273065495, "grad_norm": 0.08602187037467957, "learning_rate": 1.1014293223860421e-05, "loss": 8.7096, "step": 126910 }, { "epoch": 0.6338235661314889, "grad_norm": 0.09525911509990692, "learning_rate": 1.1012791308918872e-05, "loss": 8.7116, "step": 126920 }, { "epoch": 0.6338735049564284, "grad_norm": 0.08880488574504852, "learning_rate": 1.1011289393977322e-05, "loss": 8.7006, "step": 126930 }, { "epoch": 0.6339234437813678, "grad_norm": 0.09625755250453949, "learning_rate": 1.1009787479035772e-05, "loss": 8.6934, "step": 126940 }, { "epoch": 0.6339733826063073, "grad_norm": 0.0941665768623352, "learning_rate": 1.100828556409422e-05, "loss": 8.7106, "step": 126950 }, { "epoch": 0.6340233214312467, "grad_norm": 0.08782870322465897, "learning_rate": 1.1006783649152669e-05, "loss": 8.7069, "step": 126960 }, { "epoch": 0.6340732602561862, "grad_norm": 0.08948568999767303, "learning_rate": 1.1005281734211119e-05, "loss": 8.7111, "step": 126970 }, { "epoch": 0.6341231990811256, "grad_norm": 0.09504511207342148, "learning_rate": 1.100377981926957e-05, "loss": 8.7132, "step": 126980 }, { "epoch": 0.6341731379060651, "grad_norm": 0.09073743224143982, "learning_rate": 1.100227790432802e-05, "loss": 8.6928, "step": 126990 }, { "epoch": 0.6342230767310045, "grad_norm": 0.09450805932283401, "learning_rate": 1.1000775989386468e-05, "loss": 8.6902, "step": 127000 }, { "epoch": 0.634273015555944, "grad_norm": 0.08821328729391098, "learning_rate": 1.0999274074444916e-05, "loss": 8.7069, "step": 127010 }, { "epoch": 0.6343229543808834, "grad_norm": 0.09215594828128815, "learning_rate": 1.0997772159503367e-05, "loss": 8.6957, "step": 127020 }, { "epoch": 0.6343728932058229, "grad_norm": 0.09893883019685745, "learning_rate": 1.0996270244561817e-05, "loss": 8.6884, "step": 127030 }, { "epoch": 0.6344228320307623, "grad_norm": 0.0937436893582344, "learning_rate": 1.0994768329620267e-05, "loss": 8.7161, "step": 127040 }, { "epoch": 0.6344727708557018, "grad_norm": 0.08972315490245819, "learning_rate": 1.0993266414678715e-05, "loss": 8.6941, "step": 127050 }, { "epoch": 0.6345227096806412, "grad_norm": 0.09105207026004791, "learning_rate": 1.0991764499737164e-05, "loss": 8.7012, "step": 127060 }, { "epoch": 0.6345726485055807, "grad_norm": 0.09180324524641037, "learning_rate": 1.0990262584795614e-05, "loss": 8.6961, "step": 127070 }, { "epoch": 0.6346225873305201, "grad_norm": 0.09050474315881729, "learning_rate": 1.0988760669854064e-05, "loss": 8.7129, "step": 127080 }, { "epoch": 0.6346725261554595, "grad_norm": 0.08975604176521301, "learning_rate": 1.0987258754912514e-05, "loss": 8.7069, "step": 127090 }, { "epoch": 0.634722464980399, "grad_norm": 0.09653442353010178, "learning_rate": 1.0985756839970965e-05, "loss": 8.6998, "step": 127100 }, { "epoch": 0.6347724038053385, "grad_norm": 0.09031674265861511, "learning_rate": 1.0984254925029411e-05, "loss": 8.7128, "step": 127110 }, { "epoch": 0.6348223426302779, "grad_norm": 0.09196629375219345, "learning_rate": 1.0982753010087862e-05, "loss": 8.7115, "step": 127120 }, { "epoch": 0.6348722814552173, "grad_norm": 0.09125792980194092, "learning_rate": 1.0981251095146312e-05, "loss": 8.6958, "step": 127130 }, { "epoch": 0.6349222202801568, "grad_norm": 0.09218353778123856, "learning_rate": 1.0979749180204762e-05, "loss": 8.7032, "step": 127140 }, { "epoch": 0.6349721591050963, "grad_norm": 0.09349720180034637, "learning_rate": 1.0978247265263212e-05, "loss": 8.698, "step": 127150 }, { "epoch": 0.6350220979300357, "grad_norm": 0.0936102643609047, "learning_rate": 1.0976745350321659e-05, "loss": 8.6991, "step": 127160 }, { "epoch": 0.6350720367549751, "grad_norm": 0.0921725407242775, "learning_rate": 1.0975243435380109e-05, "loss": 8.7104, "step": 127170 }, { "epoch": 0.6351219755799146, "grad_norm": 0.09616363793611526, "learning_rate": 1.097374152043856e-05, "loss": 8.7191, "step": 127180 }, { "epoch": 0.6351719144048541, "grad_norm": 0.09801583737134933, "learning_rate": 1.097223960549701e-05, "loss": 8.702, "step": 127190 }, { "epoch": 0.6352218532297935, "grad_norm": 0.09138066321611404, "learning_rate": 1.097073769055546e-05, "loss": 8.7066, "step": 127200 }, { "epoch": 0.6352717920547329, "grad_norm": 0.09149125218391418, "learning_rate": 1.0969235775613906e-05, "loss": 8.7125, "step": 127210 }, { "epoch": 0.6353217308796724, "grad_norm": 0.0975177139043808, "learning_rate": 1.0967733860672357e-05, "loss": 8.7044, "step": 127220 }, { "epoch": 0.6353716697046119, "grad_norm": 0.08766981214284897, "learning_rate": 1.0966231945730807e-05, "loss": 8.7172, "step": 127230 }, { "epoch": 0.6354216085295513, "grad_norm": 0.08835364878177643, "learning_rate": 1.0964730030789257e-05, "loss": 8.7012, "step": 127240 }, { "epoch": 0.6354715473544907, "grad_norm": 0.09096872806549072, "learning_rate": 1.0963228115847707e-05, "loss": 8.702, "step": 127250 }, { "epoch": 0.6355214861794302, "grad_norm": 0.09311988204717636, "learning_rate": 1.0961726200906156e-05, "loss": 8.6887, "step": 127260 }, { "epoch": 0.6355714250043697, "grad_norm": 0.0934610664844513, "learning_rate": 1.0960224285964604e-05, "loss": 8.7063, "step": 127270 }, { "epoch": 0.6356213638293091, "grad_norm": 0.098729208111763, "learning_rate": 1.0958722371023054e-05, "loss": 8.7185, "step": 127280 }, { "epoch": 0.6356713026542485, "grad_norm": 0.08784567564725876, "learning_rate": 1.0957220456081504e-05, "loss": 8.6937, "step": 127290 }, { "epoch": 0.635721241479188, "grad_norm": 0.08716242760419846, "learning_rate": 1.0955718541139955e-05, "loss": 8.7219, "step": 127300 }, { "epoch": 0.6357711803041275, "grad_norm": 0.093071848154068, "learning_rate": 1.0954216626198403e-05, "loss": 8.7022, "step": 127310 }, { "epoch": 0.6358211191290669, "grad_norm": 0.09252569824457169, "learning_rate": 1.0952714711256852e-05, "loss": 8.7088, "step": 127320 }, { "epoch": 0.6358710579540063, "grad_norm": 0.09083006531000137, "learning_rate": 1.0951212796315302e-05, "loss": 8.7163, "step": 127330 }, { "epoch": 0.6359209967789458, "grad_norm": 0.08883119374513626, "learning_rate": 1.0949710881373752e-05, "loss": 8.6963, "step": 127340 }, { "epoch": 0.6359709356038853, "grad_norm": 0.09394201636314392, "learning_rate": 1.0948208966432202e-05, "loss": 8.7119, "step": 127350 }, { "epoch": 0.6360208744288247, "grad_norm": 0.10139136761426926, "learning_rate": 1.094670705149065e-05, "loss": 8.704, "step": 127360 }, { "epoch": 0.6360708132537641, "grad_norm": 0.09166630357503891, "learning_rate": 1.0945205136549099e-05, "loss": 8.6997, "step": 127370 }, { "epoch": 0.6361207520787036, "grad_norm": 0.10158146917819977, "learning_rate": 1.094370322160755e-05, "loss": 8.7121, "step": 127380 }, { "epoch": 0.6361706909036431, "grad_norm": 0.08964638411998749, "learning_rate": 1.0942201306666e-05, "loss": 8.6963, "step": 127390 }, { "epoch": 0.6362206297285825, "grad_norm": 0.09380380064249039, "learning_rate": 1.094069939172445e-05, "loss": 8.7023, "step": 127400 }, { "epoch": 0.6362705685535219, "grad_norm": 0.09249448776245117, "learning_rate": 1.0939197476782898e-05, "loss": 8.7212, "step": 127410 }, { "epoch": 0.6363205073784614, "grad_norm": 0.09347539395093918, "learning_rate": 1.0937695561841348e-05, "loss": 8.6968, "step": 127420 }, { "epoch": 0.6363704462034009, "grad_norm": 0.09082402288913727, "learning_rate": 1.0936193646899797e-05, "loss": 8.7038, "step": 127430 }, { "epoch": 0.6364203850283403, "grad_norm": 0.09499524533748627, "learning_rate": 1.0934691731958247e-05, "loss": 8.7015, "step": 127440 }, { "epoch": 0.6364703238532797, "grad_norm": 0.09662103652954102, "learning_rate": 1.0933189817016697e-05, "loss": 8.6867, "step": 127450 }, { "epoch": 0.6365202626782192, "grad_norm": 0.10028364509344101, "learning_rate": 1.0931687902075146e-05, "loss": 8.697, "step": 127460 }, { "epoch": 0.6365702015031587, "grad_norm": 0.09399396926164627, "learning_rate": 1.0930185987133596e-05, "loss": 8.7017, "step": 127470 }, { "epoch": 0.6366201403280981, "grad_norm": 0.0959121435880661, "learning_rate": 1.0928684072192044e-05, "loss": 8.6941, "step": 127480 }, { "epoch": 0.6366700791530375, "grad_norm": 0.09251075237989426, "learning_rate": 1.0927182157250494e-05, "loss": 8.6763, "step": 127490 }, { "epoch": 0.636720017977977, "grad_norm": 0.09388341009616852, "learning_rate": 1.0925680242308945e-05, "loss": 8.698, "step": 127500 }, { "epoch": 0.6367699568029165, "grad_norm": 0.09439099580049515, "learning_rate": 1.0924178327367393e-05, "loss": 8.721, "step": 127510 }, { "epoch": 0.6368198956278559, "grad_norm": 0.0929148718714714, "learning_rate": 1.0922676412425843e-05, "loss": 8.7023, "step": 127520 }, { "epoch": 0.6368698344527953, "grad_norm": 0.08788886666297913, "learning_rate": 1.0921174497484292e-05, "loss": 8.7087, "step": 127530 }, { "epoch": 0.6369197732777347, "grad_norm": 0.09236535429954529, "learning_rate": 1.0919672582542742e-05, "loss": 8.7026, "step": 127540 }, { "epoch": 0.6369697121026743, "grad_norm": 0.08757699280977249, "learning_rate": 1.0918170667601192e-05, "loss": 8.6932, "step": 127550 }, { "epoch": 0.6370196509276137, "grad_norm": 0.09204018861055374, "learning_rate": 1.0916668752659642e-05, "loss": 8.7039, "step": 127560 }, { "epoch": 0.6370695897525531, "grad_norm": 0.0933895856142044, "learning_rate": 1.091516683771809e-05, "loss": 8.6862, "step": 127570 }, { "epoch": 0.6371195285774925, "grad_norm": 0.09441374242305756, "learning_rate": 1.0913664922776541e-05, "loss": 8.712, "step": 127580 }, { "epoch": 0.637169467402432, "grad_norm": 0.08799993246793747, "learning_rate": 1.091216300783499e-05, "loss": 8.69, "step": 127590 }, { "epoch": 0.6372194062273715, "grad_norm": 0.08958584070205688, "learning_rate": 1.091066109289344e-05, "loss": 8.6959, "step": 127600 }, { "epoch": 0.6372693450523109, "grad_norm": 0.09037229418754578, "learning_rate": 1.090915917795189e-05, "loss": 8.7096, "step": 127610 }, { "epoch": 0.6373192838772503, "grad_norm": 0.08842466026544571, "learning_rate": 1.0907657263010338e-05, "loss": 8.7115, "step": 127620 }, { "epoch": 0.6373692227021898, "grad_norm": 0.09405070543289185, "learning_rate": 1.0906155348068788e-05, "loss": 8.7059, "step": 127630 }, { "epoch": 0.6374191615271293, "grad_norm": 0.09778458625078201, "learning_rate": 1.0904653433127237e-05, "loss": 8.7081, "step": 127640 }, { "epoch": 0.6374691003520687, "grad_norm": 0.09865093231201172, "learning_rate": 1.0903151518185687e-05, "loss": 8.7095, "step": 127650 }, { "epoch": 0.6375190391770081, "grad_norm": 0.09222712367773056, "learning_rate": 1.0901649603244137e-05, "loss": 8.6945, "step": 127660 }, { "epoch": 0.6375689780019476, "grad_norm": 0.09677907079458237, "learning_rate": 1.0900147688302586e-05, "loss": 8.6983, "step": 127670 }, { "epoch": 0.6376189168268871, "grad_norm": 0.08906341344118118, "learning_rate": 1.0898645773361036e-05, "loss": 8.7033, "step": 127680 }, { "epoch": 0.6376688556518265, "grad_norm": 0.09072484076023102, "learning_rate": 1.0897143858419484e-05, "loss": 8.7092, "step": 127690 }, { "epoch": 0.6377187944767659, "grad_norm": 0.09344867616891861, "learning_rate": 1.0895641943477935e-05, "loss": 8.6877, "step": 127700 }, { "epoch": 0.6377687333017054, "grad_norm": 0.093172587454319, "learning_rate": 1.0894140028536385e-05, "loss": 8.679, "step": 127710 }, { "epoch": 0.6378186721266449, "grad_norm": 0.08979550749063492, "learning_rate": 1.0892638113594833e-05, "loss": 8.7013, "step": 127720 }, { "epoch": 0.6378686109515843, "grad_norm": 0.09762118011713028, "learning_rate": 1.0891136198653283e-05, "loss": 8.7104, "step": 127730 }, { "epoch": 0.6379185497765237, "grad_norm": 0.09290577471256256, "learning_rate": 1.0889634283711734e-05, "loss": 8.7004, "step": 127740 }, { "epoch": 0.6379684886014632, "grad_norm": 0.0906912311911583, "learning_rate": 1.0888132368770182e-05, "loss": 8.7001, "step": 127750 }, { "epoch": 0.6380184274264027, "grad_norm": 0.08934336155653, "learning_rate": 1.0886630453828632e-05, "loss": 8.6983, "step": 127760 }, { "epoch": 0.6380683662513421, "grad_norm": 0.09342038631439209, "learning_rate": 1.088512853888708e-05, "loss": 8.6941, "step": 127770 }, { "epoch": 0.6381183050762815, "grad_norm": 0.0915367603302002, "learning_rate": 1.0883626623945531e-05, "loss": 8.696, "step": 127780 }, { "epoch": 0.638168243901221, "grad_norm": 0.08702074736356735, "learning_rate": 1.0882124709003981e-05, "loss": 8.6942, "step": 127790 }, { "epoch": 0.6382181827261605, "grad_norm": 0.09180362522602081, "learning_rate": 1.088062279406243e-05, "loss": 8.7023, "step": 127800 }, { "epoch": 0.6382681215510999, "grad_norm": 0.0904599204659462, "learning_rate": 1.087912087912088e-05, "loss": 8.7151, "step": 127810 }, { "epoch": 0.6383180603760393, "grad_norm": 0.09203129261732101, "learning_rate": 1.0877618964179328e-05, "loss": 8.7078, "step": 127820 }, { "epoch": 0.6383679992009788, "grad_norm": 0.08823869377374649, "learning_rate": 1.0876117049237779e-05, "loss": 8.7192, "step": 127830 }, { "epoch": 0.6384179380259183, "grad_norm": 0.09404473751783371, "learning_rate": 1.0874615134296229e-05, "loss": 8.7175, "step": 127840 }, { "epoch": 0.6384678768508577, "grad_norm": 0.09303843975067139, "learning_rate": 1.0873113219354677e-05, "loss": 8.7135, "step": 127850 }, { "epoch": 0.6385178156757971, "grad_norm": 0.09125247597694397, "learning_rate": 1.0871611304413127e-05, "loss": 8.7052, "step": 127860 }, { "epoch": 0.6385677545007366, "grad_norm": 0.0958453044295311, "learning_rate": 1.0870109389471576e-05, "loss": 8.6845, "step": 127870 }, { "epoch": 0.6386176933256761, "grad_norm": 0.09031480550765991, "learning_rate": 1.0868607474530026e-05, "loss": 8.7015, "step": 127880 }, { "epoch": 0.6386676321506155, "grad_norm": 0.10492537170648575, "learning_rate": 1.0867105559588476e-05, "loss": 8.7136, "step": 127890 }, { "epoch": 0.6387175709755549, "grad_norm": 0.09470899403095245, "learning_rate": 1.0865603644646926e-05, "loss": 8.6868, "step": 127900 }, { "epoch": 0.6387675098004943, "grad_norm": 0.09199722856283188, "learning_rate": 1.0864101729705375e-05, "loss": 8.6939, "step": 127910 }, { "epoch": 0.6388174486254339, "grad_norm": 0.08992622792720795, "learning_rate": 1.0862599814763823e-05, "loss": 8.6928, "step": 127920 }, { "epoch": 0.6388673874503733, "grad_norm": 0.09477774053812027, "learning_rate": 1.0861097899822274e-05, "loss": 8.6969, "step": 127930 }, { "epoch": 0.6389173262753127, "grad_norm": 0.09002884477376938, "learning_rate": 1.0859595984880724e-05, "loss": 8.6955, "step": 127940 }, { "epoch": 0.6389672651002521, "grad_norm": 0.09048038721084595, "learning_rate": 1.0858094069939174e-05, "loss": 8.7043, "step": 127950 }, { "epoch": 0.6390172039251917, "grad_norm": 0.08667916804552078, "learning_rate": 1.0856592154997622e-05, "loss": 8.6913, "step": 127960 }, { "epoch": 0.6390671427501311, "grad_norm": 0.09208723902702332, "learning_rate": 1.085509024005607e-05, "loss": 8.7021, "step": 127970 }, { "epoch": 0.6391170815750705, "grad_norm": 0.09197891503572464, "learning_rate": 1.0853588325114521e-05, "loss": 8.7092, "step": 127980 }, { "epoch": 0.6391670204000099, "grad_norm": 0.09264016896486282, "learning_rate": 1.0852086410172971e-05, "loss": 8.6802, "step": 127990 }, { "epoch": 0.6392169592249495, "grad_norm": 0.0906987339258194, "learning_rate": 1.0850584495231421e-05, "loss": 8.6835, "step": 128000 }, { "epoch": 0.6392668980498889, "grad_norm": 0.09663955122232437, "learning_rate": 1.084908258028987e-05, "loss": 8.6855, "step": 128010 }, { "epoch": 0.6393168368748283, "grad_norm": 0.08810370415449142, "learning_rate": 1.0847580665348318e-05, "loss": 8.7045, "step": 128020 }, { "epoch": 0.6393667756997677, "grad_norm": 0.09239162504673004, "learning_rate": 1.0846078750406769e-05, "loss": 8.7063, "step": 128030 }, { "epoch": 0.6394167145247073, "grad_norm": 0.09910788387060165, "learning_rate": 1.0844576835465219e-05, "loss": 8.6895, "step": 128040 }, { "epoch": 0.6394666533496467, "grad_norm": 0.09570180624723434, "learning_rate": 1.0843074920523669e-05, "loss": 8.7115, "step": 128050 }, { "epoch": 0.6395165921745861, "grad_norm": 0.09080067276954651, "learning_rate": 1.0841573005582119e-05, "loss": 8.6992, "step": 128060 }, { "epoch": 0.6395665309995255, "grad_norm": 0.09045545756816864, "learning_rate": 1.0840071090640566e-05, "loss": 8.7097, "step": 128070 }, { "epoch": 0.6396164698244651, "grad_norm": 0.09077303856611252, "learning_rate": 1.0838569175699016e-05, "loss": 8.6892, "step": 128080 }, { "epoch": 0.6396664086494045, "grad_norm": 0.09273795038461685, "learning_rate": 1.0837067260757466e-05, "loss": 8.6948, "step": 128090 }, { "epoch": 0.6397163474743439, "grad_norm": 0.09408220648765564, "learning_rate": 1.0835565345815916e-05, "loss": 8.6957, "step": 128100 }, { "epoch": 0.6397662862992833, "grad_norm": 0.09319932758808136, "learning_rate": 1.0834063430874367e-05, "loss": 8.6996, "step": 128110 }, { "epoch": 0.6398162251242229, "grad_norm": 0.09193559736013412, "learning_rate": 1.0832561515932813e-05, "loss": 8.7085, "step": 128120 }, { "epoch": 0.6398661639491623, "grad_norm": 0.09312306344509125, "learning_rate": 1.0831059600991264e-05, "loss": 8.6832, "step": 128130 }, { "epoch": 0.6399161027741017, "grad_norm": 0.09437835961580276, "learning_rate": 1.0829557686049714e-05, "loss": 8.7107, "step": 128140 }, { "epoch": 0.6399660415990411, "grad_norm": 0.09023882448673248, "learning_rate": 1.0828055771108164e-05, "loss": 8.6888, "step": 128150 }, { "epoch": 0.6400159804239807, "grad_norm": 0.10062866657972336, "learning_rate": 1.0826553856166614e-05, "loss": 8.6884, "step": 128160 }, { "epoch": 0.6400659192489201, "grad_norm": 0.09322546422481537, "learning_rate": 1.0825051941225061e-05, "loss": 8.7025, "step": 128170 }, { "epoch": 0.6401158580738595, "grad_norm": 0.09134891629219055, "learning_rate": 1.0823550026283511e-05, "loss": 8.7113, "step": 128180 }, { "epoch": 0.6401657968987989, "grad_norm": 0.0888613611459732, "learning_rate": 1.0822048111341961e-05, "loss": 8.6908, "step": 128190 }, { "epoch": 0.6402157357237385, "grad_norm": 0.0894928053021431, "learning_rate": 1.0820546196400411e-05, "loss": 8.7003, "step": 128200 }, { "epoch": 0.6402656745486779, "grad_norm": 0.09011103212833405, "learning_rate": 1.0819044281458862e-05, "loss": 8.6731, "step": 128210 }, { "epoch": 0.6403156133736173, "grad_norm": 0.08623740077018738, "learning_rate": 1.081754236651731e-05, "loss": 8.699, "step": 128220 }, { "epoch": 0.6403655521985567, "grad_norm": 0.09192583709955215, "learning_rate": 1.0816040451575759e-05, "loss": 8.7016, "step": 128230 }, { "epoch": 0.6404154910234963, "grad_norm": 0.0926465094089508, "learning_rate": 1.0814538536634209e-05, "loss": 8.69, "step": 128240 }, { "epoch": 0.6404654298484357, "grad_norm": 0.09219156205654144, "learning_rate": 1.0813036621692659e-05, "loss": 8.6993, "step": 128250 }, { "epoch": 0.6405153686733751, "grad_norm": 0.09209728986024857, "learning_rate": 1.0811534706751109e-05, "loss": 8.6976, "step": 128260 }, { "epoch": 0.6405653074983145, "grad_norm": 0.09860700368881226, "learning_rate": 1.0810032791809558e-05, "loss": 8.6936, "step": 128270 }, { "epoch": 0.6406152463232541, "grad_norm": 0.09730758517980576, "learning_rate": 1.0808530876868006e-05, "loss": 8.7031, "step": 128280 }, { "epoch": 0.6406651851481935, "grad_norm": 0.09535746276378632, "learning_rate": 1.0807028961926456e-05, "loss": 8.6945, "step": 128290 }, { "epoch": 0.6407151239731329, "grad_norm": 0.08811869472265244, "learning_rate": 1.0805527046984906e-05, "loss": 8.7013, "step": 128300 }, { "epoch": 0.6407650627980723, "grad_norm": 0.08866571635007858, "learning_rate": 1.0804025132043357e-05, "loss": 8.7039, "step": 128310 }, { "epoch": 0.6408150016230119, "grad_norm": 0.0883893147110939, "learning_rate": 1.0802523217101805e-05, "loss": 8.7151, "step": 128320 }, { "epoch": 0.6408649404479513, "grad_norm": 0.09385412186384201, "learning_rate": 1.0801021302160254e-05, "loss": 8.6897, "step": 128330 }, { "epoch": 0.6409148792728907, "grad_norm": 0.09245074540376663, "learning_rate": 1.0799519387218704e-05, "loss": 8.7014, "step": 128340 }, { "epoch": 0.6409648180978301, "grad_norm": 0.09483639895915985, "learning_rate": 1.0798017472277154e-05, "loss": 8.6969, "step": 128350 }, { "epoch": 0.6410147569227697, "grad_norm": 0.09574125707149506, "learning_rate": 1.0796515557335604e-05, "loss": 8.6871, "step": 128360 }, { "epoch": 0.6410646957477091, "grad_norm": 0.09570538997650146, "learning_rate": 1.0795013642394053e-05, "loss": 8.6784, "step": 128370 }, { "epoch": 0.6411146345726485, "grad_norm": 0.08854909241199493, "learning_rate": 1.0793511727452503e-05, "loss": 8.7082, "step": 128380 }, { "epoch": 0.6411645733975879, "grad_norm": 0.09440670907497406, "learning_rate": 1.0792009812510951e-05, "loss": 8.7027, "step": 128390 }, { "epoch": 0.6412145122225275, "grad_norm": 0.08996603637933731, "learning_rate": 1.0790507897569401e-05, "loss": 8.7015, "step": 128400 }, { "epoch": 0.6412644510474669, "grad_norm": 0.0914885401725769, "learning_rate": 1.0789005982627852e-05, "loss": 8.698, "step": 128410 }, { "epoch": 0.6413143898724063, "grad_norm": 0.08596347272396088, "learning_rate": 1.07875040676863e-05, "loss": 8.7038, "step": 128420 }, { "epoch": 0.6413643286973457, "grad_norm": 0.09728407114744186, "learning_rate": 1.078600215274475e-05, "loss": 8.6863, "step": 128430 }, { "epoch": 0.6414142675222853, "grad_norm": 0.08998657763004303, "learning_rate": 1.0784500237803199e-05, "loss": 8.6925, "step": 128440 }, { "epoch": 0.6414642063472247, "grad_norm": 0.09361448884010315, "learning_rate": 1.0782998322861649e-05, "loss": 8.7054, "step": 128450 }, { "epoch": 0.6415141451721641, "grad_norm": 0.09714844077825546, "learning_rate": 1.0781496407920099e-05, "loss": 8.6882, "step": 128460 }, { "epoch": 0.6415640839971035, "grad_norm": 0.09411020576953888, "learning_rate": 1.0779994492978548e-05, "loss": 8.6916, "step": 128470 }, { "epoch": 0.641614022822043, "grad_norm": 0.09303989261388779, "learning_rate": 1.0778492578036998e-05, "loss": 8.6849, "step": 128480 }, { "epoch": 0.6416639616469825, "grad_norm": 0.0919375792145729, "learning_rate": 1.0776990663095446e-05, "loss": 8.6868, "step": 128490 }, { "epoch": 0.6417139004719219, "grad_norm": 0.08758861571550369, "learning_rate": 1.0775488748153896e-05, "loss": 8.715, "step": 128500 }, { "epoch": 0.6417638392968613, "grad_norm": 0.09430398046970367, "learning_rate": 1.0773986833212347e-05, "loss": 8.6882, "step": 128510 }, { "epoch": 0.6418137781218008, "grad_norm": 0.09385448694229126, "learning_rate": 1.0772484918270795e-05, "loss": 8.699, "step": 128520 }, { "epoch": 0.6418637169467403, "grad_norm": 0.08902585506439209, "learning_rate": 1.0770983003329245e-05, "loss": 8.714, "step": 128530 }, { "epoch": 0.6419136557716797, "grad_norm": 0.08999849855899811, "learning_rate": 1.0769481088387695e-05, "loss": 8.6958, "step": 128540 }, { "epoch": 0.6419635945966191, "grad_norm": 0.09851477295160294, "learning_rate": 1.0767979173446144e-05, "loss": 8.7072, "step": 128550 }, { "epoch": 0.6420135334215585, "grad_norm": 0.0901130884885788, "learning_rate": 1.0766477258504594e-05, "loss": 8.7057, "step": 128560 }, { "epoch": 0.6420634722464981, "grad_norm": 0.0934019386768341, "learning_rate": 1.0764975343563043e-05, "loss": 8.6784, "step": 128570 }, { "epoch": 0.6421134110714375, "grad_norm": 0.0917850062251091, "learning_rate": 1.0763473428621493e-05, "loss": 8.696, "step": 128580 }, { "epoch": 0.6421633498963769, "grad_norm": 0.09072545170783997, "learning_rate": 1.0761971513679943e-05, "loss": 8.7021, "step": 128590 }, { "epoch": 0.6422132887213163, "grad_norm": 0.08813299983739853, "learning_rate": 1.0760469598738391e-05, "loss": 8.6961, "step": 128600 }, { "epoch": 0.6422632275462559, "grad_norm": 0.08745348453521729, "learning_rate": 1.0758967683796842e-05, "loss": 8.678, "step": 128610 }, { "epoch": 0.6423131663711953, "grad_norm": 0.09347406774759293, "learning_rate": 1.075746576885529e-05, "loss": 8.7121, "step": 128620 }, { "epoch": 0.6423631051961347, "grad_norm": 0.09024196863174438, "learning_rate": 1.075596385391374e-05, "loss": 8.6969, "step": 128630 }, { "epoch": 0.6424130440210741, "grad_norm": 0.0957035943865776, "learning_rate": 1.075446193897219e-05, "loss": 8.6935, "step": 128640 }, { "epoch": 0.6424629828460137, "grad_norm": 0.09494053572416306, "learning_rate": 1.0752960024030639e-05, "loss": 8.6902, "step": 128650 }, { "epoch": 0.6425129216709531, "grad_norm": 0.09406720846891403, "learning_rate": 1.0751458109089089e-05, "loss": 8.6785, "step": 128660 }, { "epoch": 0.6425628604958925, "grad_norm": 0.10135558992624283, "learning_rate": 1.0749956194147538e-05, "loss": 8.6853, "step": 128670 }, { "epoch": 0.6426127993208319, "grad_norm": 0.09328481554985046, "learning_rate": 1.0748454279205988e-05, "loss": 8.6928, "step": 128680 }, { "epoch": 0.6426627381457715, "grad_norm": 0.09335169196128845, "learning_rate": 1.0746952364264438e-05, "loss": 8.7002, "step": 128690 }, { "epoch": 0.6427126769707109, "grad_norm": 0.09129825979471207, "learning_rate": 1.0745450449322888e-05, "loss": 8.7094, "step": 128700 }, { "epoch": 0.6427626157956503, "grad_norm": 0.09808053821325302, "learning_rate": 1.0743948534381337e-05, "loss": 8.693, "step": 128710 }, { "epoch": 0.6428125546205897, "grad_norm": 0.0911967009305954, "learning_rate": 1.0742446619439785e-05, "loss": 8.7, "step": 128720 }, { "epoch": 0.6428624934455293, "grad_norm": 0.09128003567457199, "learning_rate": 1.0740944704498235e-05, "loss": 8.6906, "step": 128730 }, { "epoch": 0.6429124322704687, "grad_norm": 0.09504960477352142, "learning_rate": 1.0739442789556685e-05, "loss": 8.7072, "step": 128740 }, { "epoch": 0.6429623710954081, "grad_norm": 0.09051109105348587, "learning_rate": 1.0737940874615136e-05, "loss": 8.692, "step": 128750 }, { "epoch": 0.6430123099203475, "grad_norm": 0.09383983165025711, "learning_rate": 1.0736438959673584e-05, "loss": 8.6927, "step": 128760 }, { "epoch": 0.6430622487452871, "grad_norm": 0.09124093502759933, "learning_rate": 1.0734937044732033e-05, "loss": 8.6917, "step": 128770 }, { "epoch": 0.6431121875702265, "grad_norm": 0.09348580986261368, "learning_rate": 1.0733435129790483e-05, "loss": 8.6897, "step": 128780 }, { "epoch": 0.6431621263951659, "grad_norm": 0.08800901472568512, "learning_rate": 1.0731933214848933e-05, "loss": 8.692, "step": 128790 }, { "epoch": 0.6432120652201053, "grad_norm": 0.08735651522874832, "learning_rate": 1.0730431299907383e-05, "loss": 8.697, "step": 128800 }, { "epoch": 0.6432620040450449, "grad_norm": 0.09434789419174194, "learning_rate": 1.0728929384965832e-05, "loss": 8.6856, "step": 128810 }, { "epoch": 0.6433119428699843, "grad_norm": 0.09529183804988861, "learning_rate": 1.072742747002428e-05, "loss": 8.7082, "step": 128820 }, { "epoch": 0.6433618816949237, "grad_norm": 0.08833926171064377, "learning_rate": 1.072592555508273e-05, "loss": 8.6816, "step": 128830 }, { "epoch": 0.6434118205198631, "grad_norm": 0.09038601070642471, "learning_rate": 1.072442364014118e-05, "loss": 8.6931, "step": 128840 }, { "epoch": 0.6434617593448027, "grad_norm": 0.09003665298223495, "learning_rate": 1.072292172519963e-05, "loss": 8.685, "step": 128850 }, { "epoch": 0.6435116981697421, "grad_norm": 0.08987587690353394, "learning_rate": 1.0721419810258079e-05, "loss": 8.707, "step": 128860 }, { "epoch": 0.6435616369946815, "grad_norm": 0.09192398190498352, "learning_rate": 1.0719917895316528e-05, "loss": 8.6899, "step": 128870 }, { "epoch": 0.6436115758196209, "grad_norm": 0.09186169505119324, "learning_rate": 1.0718415980374978e-05, "loss": 8.682, "step": 128880 }, { "epoch": 0.6436615146445605, "grad_norm": 0.0946180447936058, "learning_rate": 1.0716914065433428e-05, "loss": 8.6664, "step": 128890 }, { "epoch": 0.6437114534694999, "grad_norm": 0.08595947921276093, "learning_rate": 1.0715412150491878e-05, "loss": 8.6969, "step": 128900 }, { "epoch": 0.6437613922944393, "grad_norm": 0.0891309604048729, "learning_rate": 1.0713910235550328e-05, "loss": 8.7002, "step": 128910 }, { "epoch": 0.6438113311193787, "grad_norm": 0.08861633390188217, "learning_rate": 1.0712408320608775e-05, "loss": 8.7062, "step": 128920 }, { "epoch": 0.6438612699443182, "grad_norm": 0.0909549817442894, "learning_rate": 1.0710906405667225e-05, "loss": 8.7044, "step": 128930 }, { "epoch": 0.6439112087692577, "grad_norm": 0.09526665508747101, "learning_rate": 1.0709404490725675e-05, "loss": 8.6959, "step": 128940 }, { "epoch": 0.6439611475941971, "grad_norm": 0.0844038724899292, "learning_rate": 1.0707902575784126e-05, "loss": 8.6776, "step": 128950 }, { "epoch": 0.6440110864191365, "grad_norm": 0.09909462928771973, "learning_rate": 1.0706400660842576e-05, "loss": 8.6958, "step": 128960 }, { "epoch": 0.644061025244076, "grad_norm": 0.09768630564212799, "learning_rate": 1.0704898745901023e-05, "loss": 8.6804, "step": 128970 }, { "epoch": 0.6441109640690155, "grad_norm": 0.09060562402009964, "learning_rate": 1.0703396830959473e-05, "loss": 8.6874, "step": 128980 }, { "epoch": 0.6441609028939549, "grad_norm": 0.09415677189826965, "learning_rate": 1.0701894916017923e-05, "loss": 8.6893, "step": 128990 }, { "epoch": 0.6442108417188943, "grad_norm": 0.08882970362901688, "learning_rate": 1.0700393001076373e-05, "loss": 8.685, "step": 129000 }, { "epoch": 0.6442607805438338, "grad_norm": 0.09341847151517868, "learning_rate": 1.0698891086134823e-05, "loss": 8.6986, "step": 129010 }, { "epoch": 0.6443107193687733, "grad_norm": 0.08802028000354767, "learning_rate": 1.069738917119327e-05, "loss": 8.6974, "step": 129020 }, { "epoch": 0.6443606581937127, "grad_norm": 0.09880810230970383, "learning_rate": 1.069588725625172e-05, "loss": 8.7002, "step": 129030 }, { "epoch": 0.6444105970186521, "grad_norm": 0.09482931345701218, "learning_rate": 1.069438534131017e-05, "loss": 8.7117, "step": 129040 }, { "epoch": 0.6444605358435916, "grad_norm": 0.09154761582612991, "learning_rate": 1.069288342636862e-05, "loss": 8.7054, "step": 129050 }, { "epoch": 0.6445104746685311, "grad_norm": 0.08899923413991928, "learning_rate": 1.069138151142707e-05, "loss": 8.7215, "step": 129060 }, { "epoch": 0.6445604134934705, "grad_norm": 0.09512390941381454, "learning_rate": 1.068987959648552e-05, "loss": 8.6868, "step": 129070 }, { "epoch": 0.6446103523184099, "grad_norm": 0.09039557725191116, "learning_rate": 1.0688377681543968e-05, "loss": 8.6942, "step": 129080 }, { "epoch": 0.6446602911433494, "grad_norm": 0.09328074753284454, "learning_rate": 1.0686875766602418e-05, "loss": 8.6928, "step": 129090 }, { "epoch": 0.6447102299682889, "grad_norm": 0.0936678946018219, "learning_rate": 1.0685373851660868e-05, "loss": 8.6925, "step": 129100 }, { "epoch": 0.6447601687932283, "grad_norm": 0.09343737363815308, "learning_rate": 1.0683871936719318e-05, "loss": 8.698, "step": 129110 }, { "epoch": 0.6448101076181677, "grad_norm": 0.08791700750589371, "learning_rate": 1.0682370021777767e-05, "loss": 8.6843, "step": 129120 }, { "epoch": 0.6448600464431072, "grad_norm": 0.09414328634738922, "learning_rate": 1.0680868106836215e-05, "loss": 8.7037, "step": 129130 }, { "epoch": 0.6449099852680467, "grad_norm": 0.0889165922999382, "learning_rate": 1.0679366191894665e-05, "loss": 8.701, "step": 129140 }, { "epoch": 0.6449599240929861, "grad_norm": 0.09565069526433945, "learning_rate": 1.0677864276953116e-05, "loss": 8.6856, "step": 129150 }, { "epoch": 0.6450098629179255, "grad_norm": 0.09142804890871048, "learning_rate": 1.0676362362011566e-05, "loss": 8.694, "step": 129160 }, { "epoch": 0.645059801742865, "grad_norm": 0.09595470130443573, "learning_rate": 1.0674860447070014e-05, "loss": 8.7063, "step": 129170 }, { "epoch": 0.6451097405678045, "grad_norm": 0.09262866526842117, "learning_rate": 1.0673358532128463e-05, "loss": 8.687, "step": 129180 }, { "epoch": 0.6451596793927439, "grad_norm": 0.09345003217458725, "learning_rate": 1.0671856617186913e-05, "loss": 8.6825, "step": 129190 }, { "epoch": 0.6452096182176833, "grad_norm": 0.09604539722204208, "learning_rate": 1.0670354702245363e-05, "loss": 8.685, "step": 129200 }, { "epoch": 0.6452595570426228, "grad_norm": 0.09135550260543823, "learning_rate": 1.0668852787303813e-05, "loss": 8.6817, "step": 129210 }, { "epoch": 0.6453094958675623, "grad_norm": 0.09230076521635056, "learning_rate": 1.0667350872362262e-05, "loss": 8.7034, "step": 129220 }, { "epoch": 0.6453594346925017, "grad_norm": 0.08917170763015747, "learning_rate": 1.0665848957420712e-05, "loss": 8.6952, "step": 129230 }, { "epoch": 0.6454093735174411, "grad_norm": 0.09029002487659454, "learning_rate": 1.066434704247916e-05, "loss": 8.7049, "step": 129240 }, { "epoch": 0.6454593123423806, "grad_norm": 0.09072355180978775, "learning_rate": 1.066284512753761e-05, "loss": 8.6823, "step": 129250 }, { "epoch": 0.64550925116732, "grad_norm": 0.09148232638835907, "learning_rate": 1.066134321259606e-05, "loss": 8.6949, "step": 129260 }, { "epoch": 0.6455591899922595, "grad_norm": 0.09294047951698303, "learning_rate": 1.065984129765451e-05, "loss": 8.6964, "step": 129270 }, { "epoch": 0.6456091288171989, "grad_norm": 0.09537001699209213, "learning_rate": 1.065833938271296e-05, "loss": 8.6854, "step": 129280 }, { "epoch": 0.6456590676421384, "grad_norm": 0.08766493201255798, "learning_rate": 1.0656837467771408e-05, "loss": 8.6707, "step": 129290 }, { "epoch": 0.6457090064670779, "grad_norm": 0.09294747561216354, "learning_rate": 1.0655335552829858e-05, "loss": 8.691, "step": 129300 }, { "epoch": 0.6457589452920173, "grad_norm": 0.09923656284809113, "learning_rate": 1.0653833637888308e-05, "loss": 8.6914, "step": 129310 }, { "epoch": 0.6458088841169567, "grad_norm": 0.09559011459350586, "learning_rate": 1.0652331722946757e-05, "loss": 8.6932, "step": 129320 }, { "epoch": 0.6458588229418962, "grad_norm": 0.09223086386919022, "learning_rate": 1.0650829808005207e-05, "loss": 8.6739, "step": 129330 }, { "epoch": 0.6459087617668356, "grad_norm": 0.09738924354314804, "learning_rate": 1.0649327893063655e-05, "loss": 8.6669, "step": 129340 }, { "epoch": 0.6459587005917751, "grad_norm": 0.09521890431642532, "learning_rate": 1.0647825978122106e-05, "loss": 8.7012, "step": 129350 }, { "epoch": 0.6460086394167145, "grad_norm": 0.08749169111251831, "learning_rate": 1.0646324063180556e-05, "loss": 8.7065, "step": 129360 }, { "epoch": 0.646058578241654, "grad_norm": 0.09419877082109451, "learning_rate": 1.0644822148239004e-05, "loss": 8.6892, "step": 129370 }, { "epoch": 0.6461085170665934, "grad_norm": 0.1010696068406105, "learning_rate": 1.0643320233297455e-05, "loss": 8.6767, "step": 129380 }, { "epoch": 0.6461584558915329, "grad_norm": 0.09025359898805618, "learning_rate": 1.0641818318355905e-05, "loss": 8.6869, "step": 129390 }, { "epoch": 0.6462083947164723, "grad_norm": 0.10446906089782715, "learning_rate": 1.0640316403414353e-05, "loss": 8.6966, "step": 129400 }, { "epoch": 0.6462583335414118, "grad_norm": 0.09092262387275696, "learning_rate": 1.0638814488472803e-05, "loss": 8.6835, "step": 129410 }, { "epoch": 0.6463082723663512, "grad_norm": 0.08932240307331085, "learning_rate": 1.0637312573531252e-05, "loss": 8.6936, "step": 129420 }, { "epoch": 0.6463582111912907, "grad_norm": 0.09258059412240982, "learning_rate": 1.0635810658589702e-05, "loss": 8.6914, "step": 129430 }, { "epoch": 0.6464081500162301, "grad_norm": 0.09351666271686554, "learning_rate": 1.0634308743648152e-05, "loss": 8.7064, "step": 129440 }, { "epoch": 0.6464580888411696, "grad_norm": 0.08932211250066757, "learning_rate": 1.06328068287066e-05, "loss": 8.7065, "step": 129450 }, { "epoch": 0.646508027666109, "grad_norm": 0.0951404944062233, "learning_rate": 1.0631304913765051e-05, "loss": 8.6996, "step": 129460 }, { "epoch": 0.6465579664910485, "grad_norm": 0.09667675197124481, "learning_rate": 1.06298029988235e-05, "loss": 8.6869, "step": 129470 }, { "epoch": 0.6466079053159879, "grad_norm": 0.08983483165502548, "learning_rate": 1.062830108388195e-05, "loss": 8.6919, "step": 129480 }, { "epoch": 0.6466578441409274, "grad_norm": 0.09399787336587906, "learning_rate": 1.06267991689404e-05, "loss": 8.6946, "step": 129490 }, { "epoch": 0.6467077829658668, "grad_norm": 0.09137116372585297, "learning_rate": 1.0625297253998848e-05, "loss": 8.7024, "step": 129500 }, { "epoch": 0.6467577217908063, "grad_norm": 0.09180816262960434, "learning_rate": 1.0623795339057298e-05, "loss": 8.6889, "step": 129510 }, { "epoch": 0.6468076606157457, "grad_norm": 0.08744052797555923, "learning_rate": 1.0622293424115747e-05, "loss": 8.6927, "step": 129520 }, { "epoch": 0.6468575994406852, "grad_norm": 0.09338006377220154, "learning_rate": 1.0620791509174197e-05, "loss": 8.6732, "step": 129530 }, { "epoch": 0.6469075382656246, "grad_norm": 0.0890432596206665, "learning_rate": 1.0619289594232647e-05, "loss": 8.682, "step": 129540 }, { "epoch": 0.6469574770905641, "grad_norm": 0.08677448332309723, "learning_rate": 1.0617787679291097e-05, "loss": 8.6879, "step": 129550 }, { "epoch": 0.6470074159155035, "grad_norm": 0.08942339569330215, "learning_rate": 1.0616285764349546e-05, "loss": 8.6885, "step": 129560 }, { "epoch": 0.6470573547404429, "grad_norm": 0.09570731967687607, "learning_rate": 1.0614783849407994e-05, "loss": 8.7066, "step": 129570 }, { "epoch": 0.6471072935653824, "grad_norm": 0.09744849801063538, "learning_rate": 1.0613281934466445e-05, "loss": 8.6889, "step": 129580 }, { "epoch": 0.6471572323903219, "grad_norm": 0.09440560638904572, "learning_rate": 1.0611780019524895e-05, "loss": 8.6876, "step": 129590 }, { "epoch": 0.6472071712152613, "grad_norm": 0.09413042664527893, "learning_rate": 1.0610278104583345e-05, "loss": 8.7024, "step": 129600 }, { "epoch": 0.6472571100402007, "grad_norm": 0.09831690788269043, "learning_rate": 1.0608776189641793e-05, "loss": 8.702, "step": 129610 }, { "epoch": 0.6473070488651402, "grad_norm": 0.0940280631184578, "learning_rate": 1.0607274274700242e-05, "loss": 8.6957, "step": 129620 }, { "epoch": 0.6473569876900797, "grad_norm": 0.09738873690366745, "learning_rate": 1.0605772359758692e-05, "loss": 8.6943, "step": 129630 }, { "epoch": 0.6474069265150191, "grad_norm": 0.09439673274755478, "learning_rate": 1.0604270444817142e-05, "loss": 8.6919, "step": 129640 }, { "epoch": 0.6474568653399585, "grad_norm": 0.08990797400474548, "learning_rate": 1.0602768529875592e-05, "loss": 8.6936, "step": 129650 }, { "epoch": 0.647506804164898, "grad_norm": 0.09267980605363846, "learning_rate": 1.0601266614934041e-05, "loss": 8.6866, "step": 129660 }, { "epoch": 0.6475567429898375, "grad_norm": 0.09749335050582886, "learning_rate": 1.0599764699992491e-05, "loss": 8.6843, "step": 129670 }, { "epoch": 0.6476066818147769, "grad_norm": 0.09197567403316498, "learning_rate": 1.059826278505094e-05, "loss": 8.6853, "step": 129680 }, { "epoch": 0.6476566206397163, "grad_norm": 0.08990548551082611, "learning_rate": 1.059676087010939e-05, "loss": 8.6833, "step": 129690 }, { "epoch": 0.6477065594646558, "grad_norm": 0.09163473546504974, "learning_rate": 1.059525895516784e-05, "loss": 8.6803, "step": 129700 }, { "epoch": 0.6477564982895953, "grad_norm": 0.08452335745096207, "learning_rate": 1.059375704022629e-05, "loss": 8.6832, "step": 129710 }, { "epoch": 0.6478064371145347, "grad_norm": 0.09139879792928696, "learning_rate": 1.0592255125284739e-05, "loss": 8.6855, "step": 129720 }, { "epoch": 0.6478563759394741, "grad_norm": 0.09565936774015427, "learning_rate": 1.0590753210343187e-05, "loss": 8.6953, "step": 129730 }, { "epoch": 0.6479063147644136, "grad_norm": 0.09335167706012726, "learning_rate": 1.0589251295401637e-05, "loss": 8.6954, "step": 129740 }, { "epoch": 0.647956253589353, "grad_norm": 0.09695900231599808, "learning_rate": 1.0587749380460087e-05, "loss": 8.6963, "step": 129750 }, { "epoch": 0.6480061924142925, "grad_norm": 0.09421364217996597, "learning_rate": 1.0586247465518538e-05, "loss": 8.7094, "step": 129760 }, { "epoch": 0.6480561312392319, "grad_norm": 0.09024903178215027, "learning_rate": 1.0584745550576986e-05, "loss": 8.6867, "step": 129770 }, { "epoch": 0.6481060700641714, "grad_norm": 0.09890251606702805, "learning_rate": 1.0583243635635435e-05, "loss": 8.6979, "step": 129780 }, { "epoch": 0.6481560088891108, "grad_norm": 0.09448616206645966, "learning_rate": 1.0581741720693885e-05, "loss": 8.6953, "step": 129790 }, { "epoch": 0.6482059477140503, "grad_norm": 0.08602569997310638, "learning_rate": 1.0580239805752335e-05, "loss": 8.6916, "step": 129800 }, { "epoch": 0.6482558865389897, "grad_norm": 0.08742332458496094, "learning_rate": 1.0578737890810785e-05, "loss": 8.6816, "step": 129810 }, { "epoch": 0.6483058253639292, "grad_norm": 0.09227047860622406, "learning_rate": 1.0577235975869234e-05, "loss": 8.7025, "step": 129820 }, { "epoch": 0.6483557641888686, "grad_norm": 0.09513872116804123, "learning_rate": 1.0575734060927682e-05, "loss": 8.6642, "step": 129830 }, { "epoch": 0.6484057030138081, "grad_norm": 0.08885648846626282, "learning_rate": 1.0574232145986132e-05, "loss": 8.6826, "step": 129840 }, { "epoch": 0.6484556418387475, "grad_norm": 0.09655725210905075, "learning_rate": 1.0572730231044582e-05, "loss": 8.6796, "step": 129850 }, { "epoch": 0.648505580663687, "grad_norm": 0.0928000807762146, "learning_rate": 1.0571228316103033e-05, "loss": 8.6778, "step": 129860 }, { "epoch": 0.6485555194886264, "grad_norm": 0.09117357432842255, "learning_rate": 1.0569726401161483e-05, "loss": 8.6699, "step": 129870 }, { "epoch": 0.6486054583135659, "grad_norm": 0.09002866595983505, "learning_rate": 1.056822448621993e-05, "loss": 8.686, "step": 129880 }, { "epoch": 0.6486553971385053, "grad_norm": 0.09259200841188431, "learning_rate": 1.056672257127838e-05, "loss": 8.6844, "step": 129890 }, { "epoch": 0.6487053359634448, "grad_norm": 0.0892748162150383, "learning_rate": 1.056522065633683e-05, "loss": 8.6899, "step": 129900 }, { "epoch": 0.6487552747883842, "grad_norm": 0.0917670950293541, "learning_rate": 1.056371874139528e-05, "loss": 8.6836, "step": 129910 }, { "epoch": 0.6488052136133237, "grad_norm": 0.09188613295555115, "learning_rate": 1.056221682645373e-05, "loss": 8.7191, "step": 129920 }, { "epoch": 0.6488551524382631, "grad_norm": 0.09064072370529175, "learning_rate": 1.0560714911512177e-05, "loss": 8.6913, "step": 129930 }, { "epoch": 0.6489050912632026, "grad_norm": 0.09041624516248703, "learning_rate": 1.0559212996570627e-05, "loss": 8.6797, "step": 129940 }, { "epoch": 0.648955030088142, "grad_norm": 0.09263037890195847, "learning_rate": 1.0557711081629077e-05, "loss": 8.6839, "step": 129950 }, { "epoch": 0.6490049689130815, "grad_norm": 0.0885956883430481, "learning_rate": 1.0556209166687528e-05, "loss": 8.6841, "step": 129960 }, { "epoch": 0.6490549077380209, "grad_norm": 0.08921640366315842, "learning_rate": 1.0554707251745978e-05, "loss": 8.6874, "step": 129970 }, { "epoch": 0.6491048465629604, "grad_norm": 0.09531452506780624, "learning_rate": 1.0553205336804425e-05, "loss": 8.6952, "step": 129980 }, { "epoch": 0.6491547853878998, "grad_norm": 0.09411237388849258, "learning_rate": 1.0551703421862875e-05, "loss": 8.6968, "step": 129990 }, { "epoch": 0.6492047242128393, "grad_norm": 0.09411599487066269, "learning_rate": 1.0550201506921325e-05, "loss": 8.6859, "step": 130000 }, { "epoch": 0.6492546630377787, "grad_norm": 0.09481288492679596, "learning_rate": 1.0548699591979775e-05, "loss": 8.695, "step": 130010 }, { "epoch": 0.6493046018627182, "grad_norm": 0.09503282606601715, "learning_rate": 1.0547197677038225e-05, "loss": 8.6801, "step": 130020 }, { "epoch": 0.6493545406876576, "grad_norm": 0.09125657379627228, "learning_rate": 1.0545695762096674e-05, "loss": 8.6882, "step": 130030 }, { "epoch": 0.649404479512597, "grad_norm": 0.09231728315353394, "learning_rate": 1.0544193847155122e-05, "loss": 8.6794, "step": 130040 }, { "epoch": 0.6494544183375365, "grad_norm": 0.09627514332532883, "learning_rate": 1.0542691932213572e-05, "loss": 8.6888, "step": 130050 }, { "epoch": 0.649504357162476, "grad_norm": 0.0894688069820404, "learning_rate": 1.0541190017272023e-05, "loss": 8.6832, "step": 130060 }, { "epoch": 0.6495542959874154, "grad_norm": 0.09295932203531265, "learning_rate": 1.0539688102330473e-05, "loss": 8.6835, "step": 130070 }, { "epoch": 0.6496042348123549, "grad_norm": 0.09354085475206375, "learning_rate": 1.0538186187388921e-05, "loss": 8.6896, "step": 130080 }, { "epoch": 0.6496541736372943, "grad_norm": 0.09849437326192856, "learning_rate": 1.053668427244737e-05, "loss": 8.67, "step": 130090 }, { "epoch": 0.6497041124622338, "grad_norm": 0.09284999966621399, "learning_rate": 1.053518235750582e-05, "loss": 8.6605, "step": 130100 }, { "epoch": 0.6497540512871732, "grad_norm": 0.0957464724779129, "learning_rate": 1.053368044256427e-05, "loss": 8.6809, "step": 130110 }, { "epoch": 0.6498039901121127, "grad_norm": 0.09025140851736069, "learning_rate": 1.053217852762272e-05, "loss": 8.6906, "step": 130120 }, { "epoch": 0.6498539289370521, "grad_norm": 0.0904148668050766, "learning_rate": 1.0530676612681169e-05, "loss": 8.6876, "step": 130130 }, { "epoch": 0.6499038677619916, "grad_norm": 0.08794094622135162, "learning_rate": 1.0529174697739617e-05, "loss": 8.6838, "step": 130140 }, { "epoch": 0.649953806586931, "grad_norm": 0.09247595816850662, "learning_rate": 1.0527672782798067e-05, "loss": 8.68, "step": 130150 }, { "epoch": 0.6500037454118704, "grad_norm": 0.09172753244638443, "learning_rate": 1.0526170867856518e-05, "loss": 8.6877, "step": 130160 }, { "epoch": 0.6500536842368099, "grad_norm": 0.09523764252662659, "learning_rate": 1.0524668952914968e-05, "loss": 8.6913, "step": 130170 }, { "epoch": 0.6501036230617494, "grad_norm": 0.08924904465675354, "learning_rate": 1.0523167037973416e-05, "loss": 8.681, "step": 130180 }, { "epoch": 0.6501535618866888, "grad_norm": 0.08906625956296921, "learning_rate": 1.0521665123031866e-05, "loss": 8.675, "step": 130190 }, { "epoch": 0.6502035007116282, "grad_norm": 0.09328590333461761, "learning_rate": 1.0520163208090315e-05, "loss": 8.6944, "step": 130200 }, { "epoch": 0.6502534395365677, "grad_norm": 0.09101434797048569, "learning_rate": 1.0518661293148765e-05, "loss": 8.6864, "step": 130210 }, { "epoch": 0.6503033783615072, "grad_norm": 0.09618061780929565, "learning_rate": 1.0517159378207215e-05, "loss": 8.6875, "step": 130220 }, { "epoch": 0.6503533171864466, "grad_norm": 0.0945608913898468, "learning_rate": 1.0515657463265664e-05, "loss": 8.689, "step": 130230 }, { "epoch": 0.650403256011386, "grad_norm": 0.0871725007891655, "learning_rate": 1.0514155548324114e-05, "loss": 8.6919, "step": 130240 }, { "epoch": 0.6504531948363255, "grad_norm": 0.09153078496456146, "learning_rate": 1.0512653633382562e-05, "loss": 8.6813, "step": 130250 }, { "epoch": 0.650503133661265, "grad_norm": 0.09095324575901031, "learning_rate": 1.0511151718441013e-05, "loss": 8.6822, "step": 130260 }, { "epoch": 0.6505530724862044, "grad_norm": 0.09055155515670776, "learning_rate": 1.0509649803499463e-05, "loss": 8.676, "step": 130270 }, { "epoch": 0.6506030113111438, "grad_norm": 0.08829725533723831, "learning_rate": 1.0508147888557911e-05, "loss": 8.6923, "step": 130280 }, { "epoch": 0.6506529501360833, "grad_norm": 0.09116893261671066, "learning_rate": 1.0506645973616361e-05, "loss": 8.6698, "step": 130290 }, { "epoch": 0.6507028889610228, "grad_norm": 0.08940432965755463, "learning_rate": 1.050514405867481e-05, "loss": 8.6763, "step": 130300 }, { "epoch": 0.6507528277859622, "grad_norm": 0.09223427623510361, "learning_rate": 1.050364214373326e-05, "loss": 8.6929, "step": 130310 }, { "epoch": 0.6508027666109016, "grad_norm": 0.09368874877691269, "learning_rate": 1.050214022879171e-05, "loss": 8.6783, "step": 130320 }, { "epoch": 0.6508527054358411, "grad_norm": 0.09133809059858322, "learning_rate": 1.0500638313850159e-05, "loss": 8.6861, "step": 130330 }, { "epoch": 0.6509026442607806, "grad_norm": 0.09096644818782806, "learning_rate": 1.0499136398908609e-05, "loss": 8.6741, "step": 130340 }, { "epoch": 0.65095258308572, "grad_norm": 0.09941566735506058, "learning_rate": 1.0497634483967059e-05, "loss": 8.6934, "step": 130350 }, { "epoch": 0.6510025219106594, "grad_norm": 0.09109698235988617, "learning_rate": 1.0496132569025508e-05, "loss": 8.6798, "step": 130360 }, { "epoch": 0.6510524607355989, "grad_norm": 0.09425774961709976, "learning_rate": 1.0494630654083958e-05, "loss": 8.6717, "step": 130370 }, { "epoch": 0.6511023995605384, "grad_norm": 0.09574547410011292, "learning_rate": 1.0493128739142406e-05, "loss": 8.6847, "step": 130380 }, { "epoch": 0.6511523383854778, "grad_norm": 0.08903608471155167, "learning_rate": 1.0491626824200856e-05, "loss": 8.6854, "step": 130390 }, { "epoch": 0.6512022772104172, "grad_norm": 0.10021170973777771, "learning_rate": 1.0490124909259307e-05, "loss": 8.6747, "step": 130400 }, { "epoch": 0.6512522160353567, "grad_norm": 0.09193012118339539, "learning_rate": 1.0488622994317755e-05, "loss": 8.6764, "step": 130410 }, { "epoch": 0.6513021548602962, "grad_norm": 0.09240026026964188, "learning_rate": 1.0487121079376205e-05, "loss": 8.7007, "step": 130420 }, { "epoch": 0.6513520936852356, "grad_norm": 0.0931842103600502, "learning_rate": 1.0485619164434654e-05, "loss": 8.6803, "step": 130430 }, { "epoch": 0.651402032510175, "grad_norm": 0.0900469720363617, "learning_rate": 1.0484117249493104e-05, "loss": 8.6859, "step": 130440 }, { "epoch": 0.6514519713351145, "grad_norm": 0.08829685300588608, "learning_rate": 1.0482615334551554e-05, "loss": 8.679, "step": 130450 }, { "epoch": 0.651501910160054, "grad_norm": 0.08664678037166595, "learning_rate": 1.0481113419610003e-05, "loss": 8.6902, "step": 130460 }, { "epoch": 0.6515518489849934, "grad_norm": 0.08698765188455582, "learning_rate": 1.0479611504668453e-05, "loss": 8.686, "step": 130470 }, { "epoch": 0.6516017878099328, "grad_norm": 0.09792865067720413, "learning_rate": 1.0478109589726901e-05, "loss": 8.6769, "step": 130480 }, { "epoch": 0.6516517266348723, "grad_norm": 0.08901020884513855, "learning_rate": 1.0476607674785351e-05, "loss": 8.6788, "step": 130490 }, { "epoch": 0.6517016654598118, "grad_norm": 0.0966918021440506, "learning_rate": 1.0475105759843802e-05, "loss": 8.6681, "step": 130500 }, { "epoch": 0.6517516042847512, "grad_norm": 0.0899975523352623, "learning_rate": 1.0473603844902252e-05, "loss": 8.7017, "step": 130510 }, { "epoch": 0.6518015431096906, "grad_norm": 0.09501145780086517, "learning_rate": 1.04721019299607e-05, "loss": 8.698, "step": 130520 }, { "epoch": 0.65185148193463, "grad_norm": 0.09583033621311188, "learning_rate": 1.0470600015019149e-05, "loss": 8.6757, "step": 130530 }, { "epoch": 0.6519014207595695, "grad_norm": 0.09208723902702332, "learning_rate": 1.0469098100077599e-05, "loss": 8.6855, "step": 130540 }, { "epoch": 0.651951359584509, "grad_norm": 0.09319637715816498, "learning_rate": 1.0467596185136049e-05, "loss": 8.6914, "step": 130550 }, { "epoch": 0.6520012984094484, "grad_norm": 0.09361417591571808, "learning_rate": 1.04660942701945e-05, "loss": 8.683, "step": 130560 }, { "epoch": 0.6520512372343878, "grad_norm": 0.0895400270819664, "learning_rate": 1.0464592355252948e-05, "loss": 8.6841, "step": 130570 }, { "epoch": 0.6521011760593273, "grad_norm": 0.09408006817102432, "learning_rate": 1.0463090440311396e-05, "loss": 8.6757, "step": 130580 }, { "epoch": 0.6521511148842668, "grad_norm": 0.09309608489274979, "learning_rate": 1.0461588525369846e-05, "loss": 8.6885, "step": 130590 }, { "epoch": 0.6522010537092062, "grad_norm": 0.1002884954214096, "learning_rate": 1.0460086610428297e-05, "loss": 8.6925, "step": 130600 }, { "epoch": 0.6522509925341456, "grad_norm": 0.09665568172931671, "learning_rate": 1.0458584695486747e-05, "loss": 8.6746, "step": 130610 }, { "epoch": 0.6523009313590851, "grad_norm": 0.09286937117576599, "learning_rate": 1.0457082780545195e-05, "loss": 8.6669, "step": 130620 }, { "epoch": 0.6523508701840246, "grad_norm": 0.094349205493927, "learning_rate": 1.0455580865603644e-05, "loss": 8.674, "step": 130630 }, { "epoch": 0.652400809008964, "grad_norm": 0.0887681245803833, "learning_rate": 1.0454078950662094e-05, "loss": 8.6897, "step": 130640 }, { "epoch": 0.6524507478339034, "grad_norm": 0.09428498893976212, "learning_rate": 1.0452577035720544e-05, "loss": 8.6805, "step": 130650 }, { "epoch": 0.6525006866588429, "grad_norm": 0.09223032742738724, "learning_rate": 1.0451075120778994e-05, "loss": 8.685, "step": 130660 }, { "epoch": 0.6525506254837824, "grad_norm": 0.09503738582134247, "learning_rate": 1.0449573205837445e-05, "loss": 8.6903, "step": 130670 }, { "epoch": 0.6526005643087218, "grad_norm": 0.08950851857662201, "learning_rate": 1.0448071290895891e-05, "loss": 8.6922, "step": 130680 }, { "epoch": 0.6526505031336612, "grad_norm": 0.08932535350322723, "learning_rate": 1.0446569375954341e-05, "loss": 8.6843, "step": 130690 }, { "epoch": 0.6527004419586007, "grad_norm": 0.093544140458107, "learning_rate": 1.0445067461012792e-05, "loss": 8.6965, "step": 130700 }, { "epoch": 0.6527503807835402, "grad_norm": 0.09570880979299545, "learning_rate": 1.0443565546071242e-05, "loss": 8.6699, "step": 130710 }, { "epoch": 0.6528003196084796, "grad_norm": 0.095237135887146, "learning_rate": 1.0442063631129692e-05, "loss": 8.6778, "step": 130720 }, { "epoch": 0.652850258433419, "grad_norm": 0.10413196682929993, "learning_rate": 1.0440561716188139e-05, "loss": 8.6738, "step": 130730 }, { "epoch": 0.6529001972583585, "grad_norm": 0.08957455307245255, "learning_rate": 1.0439059801246589e-05, "loss": 8.6821, "step": 130740 }, { "epoch": 0.652950136083298, "grad_norm": 0.1010318323969841, "learning_rate": 1.043755788630504e-05, "loss": 8.6882, "step": 130750 }, { "epoch": 0.6530000749082374, "grad_norm": 0.09466128796339035, "learning_rate": 1.043605597136349e-05, "loss": 8.6749, "step": 130760 }, { "epoch": 0.6530500137331768, "grad_norm": 0.0850997343659401, "learning_rate": 1.043455405642194e-05, "loss": 8.6898, "step": 130770 }, { "epoch": 0.6530999525581163, "grad_norm": 0.09206338971853256, "learning_rate": 1.0433052141480386e-05, "loss": 8.6861, "step": 130780 }, { "epoch": 0.6531498913830558, "grad_norm": 0.09347966313362122, "learning_rate": 1.0431550226538836e-05, "loss": 8.7026, "step": 130790 }, { "epoch": 0.6531998302079952, "grad_norm": 0.08403073996305466, "learning_rate": 1.0430048311597287e-05, "loss": 8.6844, "step": 130800 }, { "epoch": 0.6532497690329346, "grad_norm": 0.08621867746114731, "learning_rate": 1.0428546396655737e-05, "loss": 8.6709, "step": 130810 }, { "epoch": 0.6532997078578741, "grad_norm": 0.08873015642166138, "learning_rate": 1.0427044481714187e-05, "loss": 8.6875, "step": 130820 }, { "epoch": 0.6533496466828136, "grad_norm": 0.08667482435703278, "learning_rate": 1.0425542566772636e-05, "loss": 8.679, "step": 130830 }, { "epoch": 0.653399585507753, "grad_norm": 0.09268020838499069, "learning_rate": 1.0424040651831084e-05, "loss": 8.6803, "step": 130840 }, { "epoch": 0.6534495243326924, "grad_norm": 0.1045554131269455, "learning_rate": 1.0422538736889534e-05, "loss": 8.6686, "step": 130850 }, { "epoch": 0.6534994631576319, "grad_norm": 0.08979924023151398, "learning_rate": 1.0421036821947984e-05, "loss": 8.6894, "step": 130860 }, { "epoch": 0.6535494019825714, "grad_norm": 0.09369828552007675, "learning_rate": 1.0419534907006435e-05, "loss": 8.6807, "step": 130870 }, { "epoch": 0.6535993408075108, "grad_norm": 0.09341225028038025, "learning_rate": 1.0418032992064883e-05, "loss": 8.6915, "step": 130880 }, { "epoch": 0.6536492796324502, "grad_norm": 0.09005949646234512, "learning_rate": 1.0416531077123331e-05, "loss": 8.6985, "step": 130890 }, { "epoch": 0.6536992184573897, "grad_norm": 0.08998000621795654, "learning_rate": 1.0415029162181782e-05, "loss": 8.6858, "step": 130900 }, { "epoch": 0.6537491572823292, "grad_norm": 0.09265704452991486, "learning_rate": 1.0413527247240232e-05, "loss": 8.667, "step": 130910 }, { "epoch": 0.6537990961072686, "grad_norm": 0.09195978194475174, "learning_rate": 1.0412025332298682e-05, "loss": 8.6813, "step": 130920 }, { "epoch": 0.653849034932208, "grad_norm": 0.09529465436935425, "learning_rate": 1.041052341735713e-05, "loss": 8.6916, "step": 130930 }, { "epoch": 0.6538989737571475, "grad_norm": 0.0947846919298172, "learning_rate": 1.0409021502415579e-05, "loss": 8.6761, "step": 130940 }, { "epoch": 0.653948912582087, "grad_norm": 0.08519770950078964, "learning_rate": 1.040751958747403e-05, "loss": 8.6598, "step": 130950 }, { "epoch": 0.6539988514070264, "grad_norm": 0.0898909941315651, "learning_rate": 1.040601767253248e-05, "loss": 8.6744, "step": 130960 }, { "epoch": 0.6540487902319658, "grad_norm": 0.09745825827121735, "learning_rate": 1.040451575759093e-05, "loss": 8.6766, "step": 130970 }, { "epoch": 0.6540987290569052, "grad_norm": 0.09335152804851532, "learning_rate": 1.0403013842649378e-05, "loss": 8.6872, "step": 130980 }, { "epoch": 0.6541486678818448, "grad_norm": 0.09373294562101364, "learning_rate": 1.0401511927707828e-05, "loss": 8.6794, "step": 130990 }, { "epoch": 0.6541986067067842, "grad_norm": 0.09135058522224426, "learning_rate": 1.0400010012766277e-05, "loss": 8.6976, "step": 131000 }, { "epoch": 0.6542485455317236, "grad_norm": 0.09245934337377548, "learning_rate": 1.0398508097824727e-05, "loss": 8.6859, "step": 131010 }, { "epoch": 0.654298484356663, "grad_norm": 0.08991315215826035, "learning_rate": 1.0397006182883177e-05, "loss": 8.6771, "step": 131020 }, { "epoch": 0.6543484231816026, "grad_norm": 0.09018691629171371, "learning_rate": 1.0395504267941626e-05, "loss": 8.6944, "step": 131030 }, { "epoch": 0.654398362006542, "grad_norm": 0.09176444262266159, "learning_rate": 1.0394002353000076e-05, "loss": 8.6827, "step": 131040 }, { "epoch": 0.6544483008314814, "grad_norm": 0.09148535877466202, "learning_rate": 1.0392500438058524e-05, "loss": 8.68, "step": 131050 }, { "epoch": 0.6544982396564208, "grad_norm": 0.09757846593856812, "learning_rate": 1.0390998523116974e-05, "loss": 8.6791, "step": 131060 }, { "epoch": 0.6545481784813604, "grad_norm": 0.0926646739244461, "learning_rate": 1.0389496608175425e-05, "loss": 8.6906, "step": 131070 }, { "epoch": 0.6545981173062998, "grad_norm": 0.09516071528196335, "learning_rate": 1.0387994693233873e-05, "loss": 8.674, "step": 131080 }, { "epoch": 0.6546480561312392, "grad_norm": 0.09518009424209595, "learning_rate": 1.0386492778292323e-05, "loss": 8.68, "step": 131090 }, { "epoch": 0.6546979949561786, "grad_norm": 0.09322939813137054, "learning_rate": 1.0384990863350772e-05, "loss": 8.677, "step": 131100 }, { "epoch": 0.6547479337811182, "grad_norm": 0.0915323942899704, "learning_rate": 1.0383488948409222e-05, "loss": 8.6732, "step": 131110 }, { "epoch": 0.6547978726060576, "grad_norm": 0.0918693020939827, "learning_rate": 1.0381987033467672e-05, "loss": 8.6658, "step": 131120 }, { "epoch": 0.654847811430997, "grad_norm": 0.09240451455116272, "learning_rate": 1.038048511852612e-05, "loss": 8.6933, "step": 131130 }, { "epoch": 0.6548977502559364, "grad_norm": 0.09553481638431549, "learning_rate": 1.037898320358457e-05, "loss": 8.6893, "step": 131140 }, { "epoch": 0.654947689080876, "grad_norm": 0.09170468896627426, "learning_rate": 1.0377481288643021e-05, "loss": 8.6906, "step": 131150 }, { "epoch": 0.6549976279058154, "grad_norm": 0.09421038627624512, "learning_rate": 1.037597937370147e-05, "loss": 8.6669, "step": 131160 }, { "epoch": 0.6550475667307548, "grad_norm": 0.08922944962978363, "learning_rate": 1.037447745875992e-05, "loss": 8.6888, "step": 131170 }, { "epoch": 0.6550975055556942, "grad_norm": 0.09422342479228973, "learning_rate": 1.0372975543818368e-05, "loss": 8.6767, "step": 131180 }, { "epoch": 0.6551474443806338, "grad_norm": 0.09131404012441635, "learning_rate": 1.0371473628876818e-05, "loss": 8.6735, "step": 131190 }, { "epoch": 0.6551973832055732, "grad_norm": 0.08546370267868042, "learning_rate": 1.0369971713935268e-05, "loss": 8.6832, "step": 131200 }, { "epoch": 0.6552473220305126, "grad_norm": 0.09401803463697433, "learning_rate": 1.0368469798993717e-05, "loss": 8.68, "step": 131210 }, { "epoch": 0.655297260855452, "grad_norm": 0.09075015783309937, "learning_rate": 1.0366967884052167e-05, "loss": 8.6723, "step": 131220 }, { "epoch": 0.6553471996803916, "grad_norm": 0.09611096978187561, "learning_rate": 1.0365465969110616e-05, "loss": 8.6929, "step": 131230 }, { "epoch": 0.655397138505331, "grad_norm": 0.09546373039484024, "learning_rate": 1.0363964054169066e-05, "loss": 8.6781, "step": 131240 }, { "epoch": 0.6554470773302704, "grad_norm": 0.09710092097520828, "learning_rate": 1.0362462139227516e-05, "loss": 8.6739, "step": 131250 }, { "epoch": 0.6554970161552098, "grad_norm": 0.09392837435007095, "learning_rate": 1.0360960224285964e-05, "loss": 8.6824, "step": 131260 }, { "epoch": 0.6555469549801494, "grad_norm": 0.09128575772047043, "learning_rate": 1.0359458309344415e-05, "loss": 8.6771, "step": 131270 }, { "epoch": 0.6555968938050888, "grad_norm": 0.09115424007177353, "learning_rate": 1.0357956394402863e-05, "loss": 8.6769, "step": 131280 }, { "epoch": 0.6556468326300282, "grad_norm": 0.09360767900943756, "learning_rate": 1.0356454479461313e-05, "loss": 8.6999, "step": 131290 }, { "epoch": 0.6556967714549676, "grad_norm": 0.09261327236890793, "learning_rate": 1.0354952564519763e-05, "loss": 8.6671, "step": 131300 }, { "epoch": 0.6557467102799072, "grad_norm": 0.09055551886558533, "learning_rate": 1.0353450649578214e-05, "loss": 8.6813, "step": 131310 }, { "epoch": 0.6557966491048466, "grad_norm": 0.0952824130654335, "learning_rate": 1.0351948734636662e-05, "loss": 8.682, "step": 131320 }, { "epoch": 0.655846587929786, "grad_norm": 0.09080826491117477, "learning_rate": 1.035044681969511e-05, "loss": 8.6825, "step": 131330 }, { "epoch": 0.6558965267547254, "grad_norm": 0.0927935540676117, "learning_rate": 1.034894490475356e-05, "loss": 8.6942, "step": 131340 }, { "epoch": 0.655946465579665, "grad_norm": 0.09244295954704285, "learning_rate": 1.0347442989812011e-05, "loss": 8.6901, "step": 131350 }, { "epoch": 0.6559964044046044, "grad_norm": 0.09503054618835449, "learning_rate": 1.0345941074870461e-05, "loss": 8.6633, "step": 131360 }, { "epoch": 0.6560463432295438, "grad_norm": 0.09178107976913452, "learning_rate": 1.034443915992891e-05, "loss": 8.6971, "step": 131370 }, { "epoch": 0.6560962820544832, "grad_norm": 0.09662067145109177, "learning_rate": 1.0342937244987358e-05, "loss": 8.675, "step": 131380 }, { "epoch": 0.6561462208794228, "grad_norm": 0.09024110436439514, "learning_rate": 1.0341435330045808e-05, "loss": 8.6675, "step": 131390 }, { "epoch": 0.6561961597043622, "grad_norm": 0.09327442944049835, "learning_rate": 1.0339933415104258e-05, "loss": 8.6674, "step": 131400 }, { "epoch": 0.6562460985293016, "grad_norm": 0.09160305559635162, "learning_rate": 1.0338431500162709e-05, "loss": 8.6841, "step": 131410 }, { "epoch": 0.656296037354241, "grad_norm": 0.09517280757427216, "learning_rate": 1.0336929585221157e-05, "loss": 8.6751, "step": 131420 }, { "epoch": 0.6563459761791806, "grad_norm": 0.09651865065097809, "learning_rate": 1.0335427670279606e-05, "loss": 8.6741, "step": 131430 }, { "epoch": 0.65639591500412, "grad_norm": 0.09696118533611298, "learning_rate": 1.0333925755338056e-05, "loss": 8.6646, "step": 131440 }, { "epoch": 0.6564458538290594, "grad_norm": 0.09164685755968094, "learning_rate": 1.0332423840396506e-05, "loss": 8.6919, "step": 131450 }, { "epoch": 0.6564957926539988, "grad_norm": 0.09211573749780655, "learning_rate": 1.0330921925454956e-05, "loss": 8.6674, "step": 131460 }, { "epoch": 0.6565457314789384, "grad_norm": 0.09793127328157425, "learning_rate": 1.0329420010513406e-05, "loss": 8.6897, "step": 131470 }, { "epoch": 0.6565956703038778, "grad_norm": 0.09273996949195862, "learning_rate": 1.0327918095571853e-05, "loss": 8.6733, "step": 131480 }, { "epoch": 0.6566456091288172, "grad_norm": 0.08516659587621689, "learning_rate": 1.0326416180630303e-05, "loss": 8.6978, "step": 131490 }, { "epoch": 0.6566955479537566, "grad_norm": 0.09348917752504349, "learning_rate": 1.0324914265688753e-05, "loss": 8.6678, "step": 131500 }, { "epoch": 0.6567454867786962, "grad_norm": 0.09061364084482193, "learning_rate": 1.0323412350747204e-05, "loss": 8.688, "step": 131510 }, { "epoch": 0.6567954256036356, "grad_norm": 0.09217441827058792, "learning_rate": 1.0321910435805654e-05, "loss": 8.6757, "step": 131520 }, { "epoch": 0.656845364428575, "grad_norm": 0.09263032674789429, "learning_rate": 1.03204085208641e-05, "loss": 8.682, "step": 131530 }, { "epoch": 0.6568953032535144, "grad_norm": 0.09283848851919174, "learning_rate": 1.031890660592255e-05, "loss": 8.6898, "step": 131540 }, { "epoch": 0.6569452420784538, "grad_norm": 0.09214113652706146, "learning_rate": 1.0317404690981001e-05, "loss": 8.6754, "step": 131550 }, { "epoch": 0.6569951809033934, "grad_norm": 0.09393681585788727, "learning_rate": 1.0315902776039451e-05, "loss": 8.6623, "step": 131560 }, { "epoch": 0.6570451197283328, "grad_norm": 0.08560718595981598, "learning_rate": 1.0314400861097901e-05, "loss": 8.6695, "step": 131570 }, { "epoch": 0.6570950585532722, "grad_norm": 0.09654083102941513, "learning_rate": 1.0312898946156348e-05, "loss": 8.6811, "step": 131580 }, { "epoch": 0.6571449973782116, "grad_norm": 0.09015607833862305, "learning_rate": 1.0311397031214798e-05, "loss": 8.6836, "step": 131590 }, { "epoch": 0.6571949362031512, "grad_norm": 0.08412002772092819, "learning_rate": 1.0309895116273248e-05, "loss": 8.6766, "step": 131600 }, { "epoch": 0.6572448750280906, "grad_norm": 0.09406512975692749, "learning_rate": 1.0308393201331699e-05, "loss": 8.6751, "step": 131610 }, { "epoch": 0.65729481385303, "grad_norm": 0.09097196906805038, "learning_rate": 1.0306891286390149e-05, "loss": 8.6757, "step": 131620 }, { "epoch": 0.6573447526779694, "grad_norm": 0.09862224012613297, "learning_rate": 1.0305389371448597e-05, "loss": 8.6832, "step": 131630 }, { "epoch": 0.657394691502909, "grad_norm": 0.09050843119621277, "learning_rate": 1.0303887456507046e-05, "loss": 8.6709, "step": 131640 }, { "epoch": 0.6574446303278484, "grad_norm": 0.09337736666202545, "learning_rate": 1.0302385541565496e-05, "loss": 8.6862, "step": 131650 }, { "epoch": 0.6574945691527878, "grad_norm": 0.08687882125377655, "learning_rate": 1.0300883626623946e-05, "loss": 8.684, "step": 131660 }, { "epoch": 0.6575445079777272, "grad_norm": 0.09270884841680527, "learning_rate": 1.0299381711682396e-05, "loss": 8.6805, "step": 131670 }, { "epoch": 0.6575944468026668, "grad_norm": 0.08802757412195206, "learning_rate": 1.0297879796740845e-05, "loss": 8.6734, "step": 131680 }, { "epoch": 0.6576443856276062, "grad_norm": 0.09502147138118744, "learning_rate": 1.0296377881799293e-05, "loss": 8.6866, "step": 131690 }, { "epoch": 0.6576943244525456, "grad_norm": 0.08749578148126602, "learning_rate": 1.0294875966857743e-05, "loss": 8.664, "step": 131700 }, { "epoch": 0.657744263277485, "grad_norm": 0.09266682714223862, "learning_rate": 1.0293374051916194e-05, "loss": 8.6682, "step": 131710 }, { "epoch": 0.6577942021024246, "grad_norm": 0.08939957618713379, "learning_rate": 1.0291872136974644e-05, "loss": 8.6629, "step": 131720 }, { "epoch": 0.657844140927364, "grad_norm": 0.09014017879962921, "learning_rate": 1.0290370222033092e-05, "loss": 8.6807, "step": 131730 }, { "epoch": 0.6578940797523034, "grad_norm": 0.0930858924984932, "learning_rate": 1.028886830709154e-05, "loss": 8.673, "step": 131740 }, { "epoch": 0.6579440185772428, "grad_norm": 0.09012752026319504, "learning_rate": 1.0287366392149991e-05, "loss": 8.6753, "step": 131750 }, { "epoch": 0.6579939574021824, "grad_norm": 0.09860425442457199, "learning_rate": 1.0285864477208441e-05, "loss": 8.6818, "step": 131760 }, { "epoch": 0.6580438962271218, "grad_norm": 0.08776333183050156, "learning_rate": 1.0284362562266891e-05, "loss": 8.6717, "step": 131770 }, { "epoch": 0.6580938350520612, "grad_norm": 0.09285827726125717, "learning_rate": 1.028286064732534e-05, "loss": 8.6907, "step": 131780 }, { "epoch": 0.6581437738770006, "grad_norm": 0.09714893996715546, "learning_rate": 1.0281358732383788e-05, "loss": 8.6782, "step": 131790 }, { "epoch": 0.6581937127019402, "grad_norm": 0.09962324798107147, "learning_rate": 1.0279856817442238e-05, "loss": 8.6517, "step": 131800 }, { "epoch": 0.6582436515268796, "grad_norm": 0.0935245156288147, "learning_rate": 1.0278354902500689e-05, "loss": 8.6855, "step": 131810 }, { "epoch": 0.658293590351819, "grad_norm": 0.09169341623783112, "learning_rate": 1.0276852987559139e-05, "loss": 8.6795, "step": 131820 }, { "epoch": 0.6583435291767584, "grad_norm": 0.09153547137975693, "learning_rate": 1.0275351072617587e-05, "loss": 8.6665, "step": 131830 }, { "epoch": 0.658393468001698, "grad_norm": 0.0904143676161766, "learning_rate": 1.0273849157676037e-05, "loss": 8.6895, "step": 131840 }, { "epoch": 0.6584434068266374, "grad_norm": 0.09252644330263138, "learning_rate": 1.0272347242734486e-05, "loss": 8.681, "step": 131850 }, { "epoch": 0.6584933456515768, "grad_norm": 0.08688996732234955, "learning_rate": 1.0270845327792936e-05, "loss": 8.6822, "step": 131860 }, { "epoch": 0.6585432844765162, "grad_norm": 0.09266286343336105, "learning_rate": 1.0269343412851386e-05, "loss": 8.6785, "step": 131870 }, { "epoch": 0.6585932233014558, "grad_norm": 0.09570097178220749, "learning_rate": 1.0267841497909835e-05, "loss": 8.6812, "step": 131880 }, { "epoch": 0.6586431621263952, "grad_norm": 0.08836355060338974, "learning_rate": 1.0266339582968285e-05, "loss": 8.6714, "step": 131890 }, { "epoch": 0.6586931009513346, "grad_norm": 0.09223395586013794, "learning_rate": 1.0264837668026733e-05, "loss": 8.6717, "step": 131900 }, { "epoch": 0.658743039776274, "grad_norm": 0.09718294441699982, "learning_rate": 1.0263335753085184e-05, "loss": 8.6782, "step": 131910 }, { "epoch": 0.6587929786012136, "grad_norm": 0.09103352576494217, "learning_rate": 1.0261833838143634e-05, "loss": 8.6824, "step": 131920 }, { "epoch": 0.658842917426153, "grad_norm": 0.09198730438947678, "learning_rate": 1.0260331923202084e-05, "loss": 8.6817, "step": 131930 }, { "epoch": 0.6588928562510924, "grad_norm": 0.09134362637996674, "learning_rate": 1.0258830008260532e-05, "loss": 8.6747, "step": 131940 }, { "epoch": 0.6589427950760318, "grad_norm": 0.09106438606977463, "learning_rate": 1.0257328093318981e-05, "loss": 8.6674, "step": 131950 }, { "epoch": 0.6589927339009714, "grad_norm": 0.08696383237838745, "learning_rate": 1.0255826178377431e-05, "loss": 8.6701, "step": 131960 }, { "epoch": 0.6590426727259108, "grad_norm": 0.0889415293931961, "learning_rate": 1.0254324263435881e-05, "loss": 8.6741, "step": 131970 }, { "epoch": 0.6590926115508502, "grad_norm": 0.09402123838663101, "learning_rate": 1.0252822348494331e-05, "loss": 8.6776, "step": 131980 }, { "epoch": 0.6591425503757896, "grad_norm": 0.0896662175655365, "learning_rate": 1.025132043355278e-05, "loss": 8.6766, "step": 131990 }, { "epoch": 0.6591924892007291, "grad_norm": 0.09504416584968567, "learning_rate": 1.024981851861123e-05, "loss": 8.6715, "step": 132000 }, { "epoch": 0.6592424280256686, "grad_norm": 0.0929967388510704, "learning_rate": 1.0248316603669679e-05, "loss": 8.6769, "step": 132010 }, { "epoch": 0.659292366850608, "grad_norm": 0.09154746681451797, "learning_rate": 1.0246814688728129e-05, "loss": 8.6683, "step": 132020 }, { "epoch": 0.6593423056755474, "grad_norm": 0.09038006514310837, "learning_rate": 1.0245312773786579e-05, "loss": 8.6774, "step": 132030 }, { "epoch": 0.659392244500487, "grad_norm": 0.08670198172330856, "learning_rate": 1.0243810858845027e-05, "loss": 8.6843, "step": 132040 }, { "epoch": 0.6594421833254264, "grad_norm": 0.08812010288238525, "learning_rate": 1.0242308943903478e-05, "loss": 8.6761, "step": 132050 }, { "epoch": 0.6594921221503658, "grad_norm": 0.08771144598722458, "learning_rate": 1.0240807028961926e-05, "loss": 8.6834, "step": 132060 }, { "epoch": 0.6595420609753052, "grad_norm": 0.08678391575813293, "learning_rate": 1.0239305114020376e-05, "loss": 8.6703, "step": 132070 }, { "epoch": 0.6595919998002447, "grad_norm": 0.09273753315210342, "learning_rate": 1.0237803199078826e-05, "loss": 8.6742, "step": 132080 }, { "epoch": 0.6596419386251842, "grad_norm": 0.09024372696876526, "learning_rate": 1.0236301284137275e-05, "loss": 8.6711, "step": 132090 }, { "epoch": 0.6596918774501236, "grad_norm": 0.09069012850522995, "learning_rate": 1.0234799369195725e-05, "loss": 8.6687, "step": 132100 }, { "epoch": 0.659741816275063, "grad_norm": 0.0902455523610115, "learning_rate": 1.0233297454254174e-05, "loss": 8.667, "step": 132110 }, { "epoch": 0.6597917551000025, "grad_norm": 0.08929167687892914, "learning_rate": 1.0231795539312624e-05, "loss": 8.6656, "step": 132120 }, { "epoch": 0.659841693924942, "grad_norm": 0.08881199359893799, "learning_rate": 1.0230293624371074e-05, "loss": 8.6793, "step": 132130 }, { "epoch": 0.6598916327498814, "grad_norm": 0.09602124989032745, "learning_rate": 1.0228791709429522e-05, "loss": 8.6794, "step": 132140 }, { "epoch": 0.6599415715748208, "grad_norm": 0.09030324965715408, "learning_rate": 1.0227289794487973e-05, "loss": 8.6628, "step": 132150 }, { "epoch": 0.6599915103997603, "grad_norm": 0.09483480453491211, "learning_rate": 1.0225787879546423e-05, "loss": 8.668, "step": 132160 }, { "epoch": 0.6600414492246998, "grad_norm": 0.09794318675994873, "learning_rate": 1.0224285964604871e-05, "loss": 8.6631, "step": 132170 }, { "epoch": 0.6600913880496392, "grad_norm": 0.09133700281381607, "learning_rate": 1.0222784049663321e-05, "loss": 8.6633, "step": 132180 }, { "epoch": 0.6601413268745786, "grad_norm": 0.09112256020307541, "learning_rate": 1.022128213472177e-05, "loss": 8.6794, "step": 132190 }, { "epoch": 0.6601912656995181, "grad_norm": 0.09767073392868042, "learning_rate": 1.021978021978022e-05, "loss": 8.6609, "step": 132200 }, { "epoch": 0.6602412045244576, "grad_norm": 0.09043072909116745, "learning_rate": 1.021827830483867e-05, "loss": 8.6865, "step": 132210 }, { "epoch": 0.660291143349397, "grad_norm": 0.08856266736984253, "learning_rate": 1.0216776389897119e-05, "loss": 8.6811, "step": 132220 }, { "epoch": 0.6603410821743364, "grad_norm": 0.09094087034463882, "learning_rate": 1.0215274474955569e-05, "loss": 8.6836, "step": 132230 }, { "epoch": 0.6603910209992759, "grad_norm": 0.09180308133363724, "learning_rate": 1.0213772560014017e-05, "loss": 8.677, "step": 132240 }, { "epoch": 0.6604409598242154, "grad_norm": 0.09490694850683212, "learning_rate": 1.0212270645072468e-05, "loss": 8.6798, "step": 132250 }, { "epoch": 0.6604908986491548, "grad_norm": 0.09207696467638016, "learning_rate": 1.0210768730130918e-05, "loss": 8.6676, "step": 132260 }, { "epoch": 0.6605408374740942, "grad_norm": 0.08791854977607727, "learning_rate": 1.0209266815189366e-05, "loss": 8.6882, "step": 132270 }, { "epoch": 0.6605907762990337, "grad_norm": 0.09454667568206787, "learning_rate": 1.0207764900247816e-05, "loss": 8.6748, "step": 132280 }, { "epoch": 0.6606407151239732, "grad_norm": 0.08923207223415375, "learning_rate": 1.0206262985306265e-05, "loss": 8.6915, "step": 132290 }, { "epoch": 0.6606906539489126, "grad_norm": 0.08957415819168091, "learning_rate": 1.0204761070364715e-05, "loss": 8.6757, "step": 132300 }, { "epoch": 0.660740592773852, "grad_norm": 0.09061232954263687, "learning_rate": 1.0203259155423165e-05, "loss": 8.675, "step": 132310 }, { "epoch": 0.6607905315987915, "grad_norm": 0.0913691371679306, "learning_rate": 1.0201757240481616e-05, "loss": 8.6773, "step": 132320 }, { "epoch": 0.660840470423731, "grad_norm": 0.08861444145441055, "learning_rate": 1.0200255325540064e-05, "loss": 8.6856, "step": 132330 }, { "epoch": 0.6608904092486704, "grad_norm": 0.08731677383184433, "learning_rate": 1.0198753410598512e-05, "loss": 8.6783, "step": 132340 }, { "epoch": 0.6609403480736098, "grad_norm": 0.09219668805599213, "learning_rate": 1.0197251495656963e-05, "loss": 8.6718, "step": 132350 }, { "epoch": 0.6609902868985493, "grad_norm": 0.08887942135334015, "learning_rate": 1.0195749580715413e-05, "loss": 8.6755, "step": 132360 }, { "epoch": 0.6610402257234888, "grad_norm": 0.09542329609394073, "learning_rate": 1.0194247665773863e-05, "loss": 8.6684, "step": 132370 }, { "epoch": 0.6610901645484282, "grad_norm": 0.0939764529466629, "learning_rate": 1.0192745750832312e-05, "loss": 8.6675, "step": 132380 }, { "epoch": 0.6611401033733676, "grad_norm": 0.09207078814506531, "learning_rate": 1.019124383589076e-05, "loss": 8.6756, "step": 132390 }, { "epoch": 0.6611900421983071, "grad_norm": 0.09722395241260529, "learning_rate": 1.018974192094921e-05, "loss": 8.6686, "step": 132400 }, { "epoch": 0.6612399810232465, "grad_norm": 0.10081882029771805, "learning_rate": 1.018824000600766e-05, "loss": 8.6651, "step": 132410 }, { "epoch": 0.661289919848186, "grad_norm": 0.09268742799758911, "learning_rate": 1.018673809106611e-05, "loss": 8.6535, "step": 132420 }, { "epoch": 0.6613398586731254, "grad_norm": 0.08973268419504166, "learning_rate": 1.0185236176124559e-05, "loss": 8.6679, "step": 132430 }, { "epoch": 0.6613897974980649, "grad_norm": 0.08811484277248383, "learning_rate": 1.0183734261183007e-05, "loss": 8.6566, "step": 132440 }, { "epoch": 0.6614397363230043, "grad_norm": 0.09202300012111664, "learning_rate": 1.0182232346241458e-05, "loss": 8.6779, "step": 132450 }, { "epoch": 0.6614896751479438, "grad_norm": 0.08758775144815445, "learning_rate": 1.0180730431299908e-05, "loss": 8.6919, "step": 132460 }, { "epoch": 0.6615396139728832, "grad_norm": 0.08985673636198044, "learning_rate": 1.0179228516358358e-05, "loss": 8.678, "step": 132470 }, { "epoch": 0.6615895527978227, "grad_norm": 0.09474221616983414, "learning_rate": 1.0177726601416808e-05, "loss": 8.6547, "step": 132480 }, { "epoch": 0.6616394916227621, "grad_norm": 0.09064290672540665, "learning_rate": 1.0176224686475255e-05, "loss": 8.6735, "step": 132490 }, { "epoch": 0.6616894304477016, "grad_norm": 0.09543025493621826, "learning_rate": 1.0174722771533705e-05, "loss": 8.6677, "step": 132500 }, { "epoch": 0.661739369272641, "grad_norm": 0.08833234757184982, "learning_rate": 1.0173220856592155e-05, "loss": 8.6784, "step": 132510 }, { "epoch": 0.6617893080975804, "grad_norm": 0.09526517242193222, "learning_rate": 1.0171718941650606e-05, "loss": 8.6595, "step": 132520 }, { "epoch": 0.6618392469225199, "grad_norm": 0.09490874409675598, "learning_rate": 1.0170217026709056e-05, "loss": 8.6708, "step": 132530 }, { "epoch": 0.6618891857474594, "grad_norm": 0.0921601727604866, "learning_rate": 1.0168715111767502e-05, "loss": 8.6814, "step": 132540 }, { "epoch": 0.6619391245723988, "grad_norm": 0.09030061960220337, "learning_rate": 1.0167213196825953e-05, "loss": 8.6737, "step": 132550 }, { "epoch": 0.6619890633973382, "grad_norm": 0.09003777801990509, "learning_rate": 1.0165711281884403e-05, "loss": 8.6747, "step": 132560 }, { "epoch": 0.6620390022222777, "grad_norm": 0.09126746654510498, "learning_rate": 1.0164209366942853e-05, "loss": 8.6858, "step": 132570 }, { "epoch": 0.6620889410472172, "grad_norm": 0.08809841424226761, "learning_rate": 1.0162707452001303e-05, "loss": 8.6722, "step": 132580 }, { "epoch": 0.6621388798721566, "grad_norm": 0.09087785333395004, "learning_rate": 1.016120553705975e-05, "loss": 8.6795, "step": 132590 }, { "epoch": 0.662188818697096, "grad_norm": 0.09890290349721909, "learning_rate": 1.01597036221182e-05, "loss": 8.6719, "step": 132600 }, { "epoch": 0.6622387575220355, "grad_norm": 0.08723946660757065, "learning_rate": 1.015820170717665e-05, "loss": 8.6765, "step": 132610 }, { "epoch": 0.662288696346975, "grad_norm": 0.09441614151000977, "learning_rate": 1.01566997922351e-05, "loss": 8.6709, "step": 132620 }, { "epoch": 0.6623386351719144, "grad_norm": 0.09669451415538788, "learning_rate": 1.015519787729355e-05, "loss": 8.6856, "step": 132630 }, { "epoch": 0.6623885739968538, "grad_norm": 0.0917108878493309, "learning_rate": 1.0153695962352e-05, "loss": 8.6664, "step": 132640 }, { "epoch": 0.6624385128217933, "grad_norm": 0.0893852636218071, "learning_rate": 1.0152194047410448e-05, "loss": 8.6694, "step": 132650 }, { "epoch": 0.6624884516467328, "grad_norm": 0.09049538522958755, "learning_rate": 1.0150692132468898e-05, "loss": 8.6595, "step": 132660 }, { "epoch": 0.6625383904716722, "grad_norm": 0.0896064043045044, "learning_rate": 1.0149190217527348e-05, "loss": 8.6785, "step": 132670 }, { "epoch": 0.6625883292966116, "grad_norm": 0.09562838077545166, "learning_rate": 1.0147688302585798e-05, "loss": 8.6778, "step": 132680 }, { "epoch": 0.6626382681215511, "grad_norm": 0.08565637469291687, "learning_rate": 1.0146186387644247e-05, "loss": 8.6761, "step": 132690 }, { "epoch": 0.6626882069464906, "grad_norm": 0.08810480684041977, "learning_rate": 1.0144684472702695e-05, "loss": 8.6872, "step": 132700 }, { "epoch": 0.66273814577143, "grad_norm": 0.09386193752288818, "learning_rate": 1.0143182557761145e-05, "loss": 8.6697, "step": 132710 }, { "epoch": 0.6627880845963694, "grad_norm": 0.08664627373218536, "learning_rate": 1.0141680642819596e-05, "loss": 8.6757, "step": 132720 }, { "epoch": 0.6628380234213089, "grad_norm": 0.09146647155284882, "learning_rate": 1.0140178727878046e-05, "loss": 8.6781, "step": 132730 }, { "epoch": 0.6628879622462484, "grad_norm": 0.09313146770000458, "learning_rate": 1.0138676812936494e-05, "loss": 8.6754, "step": 132740 }, { "epoch": 0.6629379010711878, "grad_norm": 0.08888036757707596, "learning_rate": 1.0137174897994943e-05, "loss": 8.6916, "step": 132750 }, { "epoch": 0.6629878398961272, "grad_norm": 0.09621097892522812, "learning_rate": 1.0135672983053393e-05, "loss": 8.6865, "step": 132760 }, { "epoch": 0.6630377787210667, "grad_norm": 0.09131909161806107, "learning_rate": 1.0134171068111843e-05, "loss": 8.6703, "step": 132770 }, { "epoch": 0.6630877175460062, "grad_norm": 0.09367981553077698, "learning_rate": 1.0132669153170293e-05, "loss": 8.6669, "step": 132780 }, { "epoch": 0.6631376563709456, "grad_norm": 0.09454252570867538, "learning_rate": 1.0131167238228742e-05, "loss": 8.6777, "step": 132790 }, { "epoch": 0.663187595195885, "grad_norm": 0.09207130968570709, "learning_rate": 1.0129665323287192e-05, "loss": 8.6612, "step": 132800 }, { "epoch": 0.6632375340208245, "grad_norm": 0.08936728537082672, "learning_rate": 1.012816340834564e-05, "loss": 8.6755, "step": 132810 }, { "epoch": 0.663287472845764, "grad_norm": 0.09480737149715424, "learning_rate": 1.012666149340409e-05, "loss": 8.6621, "step": 132820 }, { "epoch": 0.6633374116707034, "grad_norm": 0.08680365234613419, "learning_rate": 1.012515957846254e-05, "loss": 8.6732, "step": 132830 }, { "epoch": 0.6633873504956428, "grad_norm": 0.09133225679397583, "learning_rate": 1.012365766352099e-05, "loss": 8.6831, "step": 132840 }, { "epoch": 0.6634372893205823, "grad_norm": 0.08942453563213348, "learning_rate": 1.012215574857944e-05, "loss": 8.6746, "step": 132850 }, { "epoch": 0.6634872281455217, "grad_norm": 0.09342201054096222, "learning_rate": 1.0120653833637888e-05, "loss": 8.6761, "step": 132860 }, { "epoch": 0.6635371669704612, "grad_norm": 0.09316600114107132, "learning_rate": 1.0119151918696338e-05, "loss": 8.6868, "step": 132870 }, { "epoch": 0.6635871057954006, "grad_norm": 0.08866464346647263, "learning_rate": 1.0117650003754788e-05, "loss": 8.6818, "step": 132880 }, { "epoch": 0.6636370446203401, "grad_norm": 0.09229955822229385, "learning_rate": 1.0116148088813237e-05, "loss": 8.6467, "step": 132890 }, { "epoch": 0.6636869834452795, "grad_norm": 0.09364652633666992, "learning_rate": 1.0114646173871687e-05, "loss": 8.6595, "step": 132900 }, { "epoch": 0.663736922270219, "grad_norm": 0.08931564539670944, "learning_rate": 1.0113144258930135e-05, "loss": 8.6613, "step": 132910 }, { "epoch": 0.6637868610951584, "grad_norm": 0.09411066770553589, "learning_rate": 1.0111642343988586e-05, "loss": 8.6725, "step": 132920 }, { "epoch": 0.6638367999200979, "grad_norm": 0.08930312842130661, "learning_rate": 1.0110140429047036e-05, "loss": 8.6819, "step": 132930 }, { "epoch": 0.6638867387450373, "grad_norm": 0.10185947269201279, "learning_rate": 1.0108638514105484e-05, "loss": 8.6582, "step": 132940 }, { "epoch": 0.6639366775699768, "grad_norm": 0.09503977745771408, "learning_rate": 1.0107136599163934e-05, "loss": 8.6604, "step": 132950 }, { "epoch": 0.6639866163949162, "grad_norm": 0.08833954483270645, "learning_rate": 1.0105634684222385e-05, "loss": 8.6738, "step": 132960 }, { "epoch": 0.6640365552198557, "grad_norm": 0.09546960145235062, "learning_rate": 1.0104132769280833e-05, "loss": 8.6512, "step": 132970 }, { "epoch": 0.6640864940447951, "grad_norm": 0.089764803647995, "learning_rate": 1.0102630854339283e-05, "loss": 8.6673, "step": 132980 }, { "epoch": 0.6641364328697346, "grad_norm": 0.09377101808786392, "learning_rate": 1.0101128939397732e-05, "loss": 8.6686, "step": 132990 }, { "epoch": 0.664186371694674, "grad_norm": 0.0922766849398613, "learning_rate": 1.0099627024456182e-05, "loss": 8.6927, "step": 133000 }, { "epoch": 0.6642363105196135, "grad_norm": 0.09250890463590622, "learning_rate": 1.0098125109514632e-05, "loss": 8.6572, "step": 133010 }, { "epoch": 0.6642862493445529, "grad_norm": 0.09117847681045532, "learning_rate": 1.009662319457308e-05, "loss": 8.67, "step": 133020 }, { "epoch": 0.6643361881694924, "grad_norm": 0.09532193839550018, "learning_rate": 1.009512127963153e-05, "loss": 8.6688, "step": 133030 }, { "epoch": 0.6643861269944318, "grad_norm": 0.093431755900383, "learning_rate": 1.009361936468998e-05, "loss": 8.6796, "step": 133040 }, { "epoch": 0.6644360658193713, "grad_norm": 0.09252150356769562, "learning_rate": 1.009211744974843e-05, "loss": 8.6743, "step": 133050 }, { "epoch": 0.6644860046443107, "grad_norm": 0.10001608729362488, "learning_rate": 1.009061553480688e-05, "loss": 8.6727, "step": 133060 }, { "epoch": 0.6645359434692502, "grad_norm": 0.09084002673625946, "learning_rate": 1.0089113619865328e-05, "loss": 8.6693, "step": 133070 }, { "epoch": 0.6645858822941896, "grad_norm": 0.09234880656003952, "learning_rate": 1.0087611704923778e-05, "loss": 8.6681, "step": 133080 }, { "epoch": 0.6646358211191291, "grad_norm": 0.09040534496307373, "learning_rate": 1.0086109789982227e-05, "loss": 8.659, "step": 133090 }, { "epoch": 0.6646857599440685, "grad_norm": 0.0903211459517479, "learning_rate": 1.0084607875040677e-05, "loss": 8.6726, "step": 133100 }, { "epoch": 0.664735698769008, "grad_norm": 0.08797595649957657, "learning_rate": 1.0083105960099127e-05, "loss": 8.676, "step": 133110 }, { "epoch": 0.6647856375939474, "grad_norm": 0.0916309654712677, "learning_rate": 1.0081604045157577e-05, "loss": 8.676, "step": 133120 }, { "epoch": 0.6648355764188869, "grad_norm": 0.08980787545442581, "learning_rate": 1.0080102130216026e-05, "loss": 8.6746, "step": 133130 }, { "epoch": 0.6648855152438263, "grad_norm": 0.0977618619799614, "learning_rate": 1.0078600215274474e-05, "loss": 8.6623, "step": 133140 }, { "epoch": 0.6649354540687658, "grad_norm": 0.08751973509788513, "learning_rate": 1.0077098300332924e-05, "loss": 8.6612, "step": 133150 }, { "epoch": 0.6649853928937052, "grad_norm": 0.09779629111289978, "learning_rate": 1.0075596385391375e-05, "loss": 8.6694, "step": 133160 }, { "epoch": 0.6650353317186447, "grad_norm": 0.0898473933339119, "learning_rate": 1.0074094470449825e-05, "loss": 8.6637, "step": 133170 }, { "epoch": 0.6650852705435841, "grad_norm": 0.08798861503601074, "learning_rate": 1.0072592555508273e-05, "loss": 8.6467, "step": 133180 }, { "epoch": 0.6651352093685236, "grad_norm": 0.09523648023605347, "learning_rate": 1.0071090640566722e-05, "loss": 8.6672, "step": 133190 }, { "epoch": 0.665185148193463, "grad_norm": 0.08886650949716568, "learning_rate": 1.0069588725625172e-05, "loss": 8.6718, "step": 133200 }, { "epoch": 0.6652350870184025, "grad_norm": 0.09928663820028305, "learning_rate": 1.0068086810683622e-05, "loss": 8.6745, "step": 133210 }, { "epoch": 0.6652850258433419, "grad_norm": 0.09062928706407547, "learning_rate": 1.0066584895742072e-05, "loss": 8.6682, "step": 133220 }, { "epoch": 0.6653349646682813, "grad_norm": 0.09397498518228531, "learning_rate": 1.006508298080052e-05, "loss": 8.6596, "step": 133230 }, { "epoch": 0.6653849034932208, "grad_norm": 0.08882281184196472, "learning_rate": 1.006358106585897e-05, "loss": 8.6688, "step": 133240 }, { "epoch": 0.6654348423181603, "grad_norm": 0.09210222959518433, "learning_rate": 1.006207915091742e-05, "loss": 8.663, "step": 133250 }, { "epoch": 0.6654847811430997, "grad_norm": 0.08811397850513458, "learning_rate": 1.006057723597587e-05, "loss": 8.6646, "step": 133260 }, { "epoch": 0.6655347199680391, "grad_norm": 0.08988697081804276, "learning_rate": 1.005907532103432e-05, "loss": 8.673, "step": 133270 }, { "epoch": 0.6655846587929786, "grad_norm": 0.09060483425855637, "learning_rate": 1.005757340609277e-05, "loss": 8.691, "step": 133280 }, { "epoch": 0.6656345976179181, "grad_norm": 0.09499598294496536, "learning_rate": 1.0056071491151217e-05, "loss": 8.6645, "step": 133290 }, { "epoch": 0.6656845364428575, "grad_norm": 0.09347851574420929, "learning_rate": 1.0054569576209667e-05, "loss": 8.6652, "step": 133300 }, { "epoch": 0.6657344752677969, "grad_norm": 0.09178951382637024, "learning_rate": 1.0053067661268117e-05, "loss": 8.6673, "step": 133310 }, { "epoch": 0.6657844140927364, "grad_norm": 0.08929334580898285, "learning_rate": 1.0051565746326567e-05, "loss": 8.6643, "step": 133320 }, { "epoch": 0.6658343529176759, "grad_norm": 0.08914398401975632, "learning_rate": 1.0050063831385017e-05, "loss": 8.6549, "step": 133330 }, { "epoch": 0.6658842917426153, "grad_norm": 0.09610702097415924, "learning_rate": 1.0048561916443464e-05, "loss": 8.6626, "step": 133340 }, { "epoch": 0.6659342305675547, "grad_norm": 0.09578754752874374, "learning_rate": 1.0047060001501914e-05, "loss": 8.6582, "step": 133350 }, { "epoch": 0.6659841693924942, "grad_norm": 0.09582225978374481, "learning_rate": 1.0045558086560365e-05, "loss": 8.6685, "step": 133360 }, { "epoch": 0.6660341082174337, "grad_norm": 0.08942214399576187, "learning_rate": 1.0044056171618815e-05, "loss": 8.6746, "step": 133370 }, { "epoch": 0.6660840470423731, "grad_norm": 0.08953560143709183, "learning_rate": 1.0042554256677265e-05, "loss": 8.6745, "step": 133380 }, { "epoch": 0.6661339858673125, "grad_norm": 0.0939660593867302, "learning_rate": 1.0041052341735712e-05, "loss": 8.6744, "step": 133390 }, { "epoch": 0.666183924692252, "grad_norm": 0.09279930591583252, "learning_rate": 1.0039550426794162e-05, "loss": 8.6558, "step": 133400 }, { "epoch": 0.6662338635171915, "grad_norm": 0.08562085032463074, "learning_rate": 1.0038048511852612e-05, "loss": 8.6662, "step": 133410 }, { "epoch": 0.6662838023421309, "grad_norm": 0.09029558300971985, "learning_rate": 1.0036546596911062e-05, "loss": 8.6574, "step": 133420 }, { "epoch": 0.6663337411670703, "grad_norm": 0.08761605620384216, "learning_rate": 1.0035044681969512e-05, "loss": 8.6631, "step": 133430 }, { "epoch": 0.6663836799920098, "grad_norm": 0.09495789557695389, "learning_rate": 1.0033542767027961e-05, "loss": 8.66, "step": 133440 }, { "epoch": 0.6664336188169493, "grad_norm": 0.09556636214256287, "learning_rate": 1.003204085208641e-05, "loss": 8.682, "step": 133450 }, { "epoch": 0.6664835576418887, "grad_norm": 0.09460949152708054, "learning_rate": 1.003053893714486e-05, "loss": 8.6886, "step": 133460 }, { "epoch": 0.6665334964668281, "grad_norm": 0.0882893055677414, "learning_rate": 1.002903702220331e-05, "loss": 8.668, "step": 133470 }, { "epoch": 0.6665834352917676, "grad_norm": 0.09346653521060944, "learning_rate": 1.002753510726176e-05, "loss": 8.6562, "step": 133480 }, { "epoch": 0.6666333741167071, "grad_norm": 0.09014210850000381, "learning_rate": 1.0026033192320208e-05, "loss": 8.6711, "step": 133490 }, { "epoch": 0.6666833129416465, "grad_norm": 0.09201137721538544, "learning_rate": 1.0024531277378657e-05, "loss": 8.6789, "step": 133500 }, { "epoch": 0.6667332517665859, "grad_norm": 0.09237250685691833, "learning_rate": 1.0023029362437107e-05, "loss": 8.6558, "step": 133510 }, { "epoch": 0.6667831905915254, "grad_norm": 0.09040234982967377, "learning_rate": 1.0021527447495557e-05, "loss": 8.6683, "step": 133520 }, { "epoch": 0.6668331294164648, "grad_norm": 0.10480639338493347, "learning_rate": 1.0020025532554007e-05, "loss": 8.6698, "step": 133530 }, { "epoch": 0.6668830682414043, "grad_norm": 0.09072550386190414, "learning_rate": 1.0018523617612456e-05, "loss": 8.668, "step": 133540 }, { "epoch": 0.6669330070663437, "grad_norm": 0.09083649516105652, "learning_rate": 1.0017021702670904e-05, "loss": 8.6629, "step": 133550 }, { "epoch": 0.6669829458912832, "grad_norm": 0.08946739882230759, "learning_rate": 1.0015519787729355e-05, "loss": 8.6661, "step": 133560 }, { "epoch": 0.6670328847162226, "grad_norm": 0.09026233851909637, "learning_rate": 1.0014017872787805e-05, "loss": 8.6714, "step": 133570 }, { "epoch": 0.6670828235411621, "grad_norm": 0.09073863178491592, "learning_rate": 1.0012515957846255e-05, "loss": 8.6753, "step": 133580 }, { "epoch": 0.6671327623661015, "grad_norm": 0.08761871606111526, "learning_rate": 1.0011014042904703e-05, "loss": 8.6689, "step": 133590 }, { "epoch": 0.667182701191041, "grad_norm": 0.09583648294210434, "learning_rate": 1.0009512127963154e-05, "loss": 8.6691, "step": 133600 }, { "epoch": 0.6672326400159804, "grad_norm": 0.09313435852527618, "learning_rate": 1.0008010213021602e-05, "loss": 8.6623, "step": 133610 }, { "epoch": 0.6672825788409199, "grad_norm": 0.08645382523536682, "learning_rate": 1.0006508298080052e-05, "loss": 8.6729, "step": 133620 }, { "epoch": 0.6673325176658593, "grad_norm": 0.09160168468952179, "learning_rate": 1.0005006383138502e-05, "loss": 8.6709, "step": 133630 }, { "epoch": 0.6673824564907987, "grad_norm": 0.0923621729016304, "learning_rate": 1.0003504468196951e-05, "loss": 8.6641, "step": 133640 }, { "epoch": 0.6674323953157382, "grad_norm": 0.09282460808753967, "learning_rate": 1.0002002553255401e-05, "loss": 8.6736, "step": 133650 }, { "epoch": 0.6674823341406777, "grad_norm": 0.09043533354997635, "learning_rate": 1.000050063831385e-05, "loss": 8.6669, "step": 133660 }, { "epoch": 0.6675322729656171, "grad_norm": 0.09773357957601547, "learning_rate": 9.9989987233723e-06, "loss": 8.6597, "step": 133670 }, { "epoch": 0.6675822117905565, "grad_norm": 0.09863118827342987, "learning_rate": 9.99749680843075e-06, "loss": 8.683, "step": 133680 }, { "epoch": 0.667632150615496, "grad_norm": 0.10197389125823975, "learning_rate": 9.995994893489198e-06, "loss": 8.6554, "step": 133690 }, { "epoch": 0.6676820894404355, "grad_norm": 0.09325721859931946, "learning_rate": 9.994492978547649e-06, "loss": 8.6614, "step": 133700 }, { "epoch": 0.6677320282653749, "grad_norm": 0.09466391056776047, "learning_rate": 9.992991063606097e-06, "loss": 8.6618, "step": 133710 }, { "epoch": 0.6677819670903143, "grad_norm": 0.09134448319673538, "learning_rate": 9.991489148664547e-06, "loss": 8.6812, "step": 133720 }, { "epoch": 0.6678319059152538, "grad_norm": 0.09619180858135223, "learning_rate": 9.989987233722997e-06, "loss": 8.6641, "step": 133730 }, { "epoch": 0.6678818447401933, "grad_norm": 0.096577949821949, "learning_rate": 9.988485318781446e-06, "loss": 8.6647, "step": 133740 }, { "epoch": 0.6679317835651327, "grad_norm": 0.08971861004829407, "learning_rate": 9.986983403839896e-06, "loss": 8.6594, "step": 133750 }, { "epoch": 0.6679817223900721, "grad_norm": 0.08666064590215683, "learning_rate": 9.985481488898346e-06, "loss": 8.6657, "step": 133760 }, { "epoch": 0.6680316612150116, "grad_norm": 0.09941823035478592, "learning_rate": 9.983979573956795e-06, "loss": 8.6705, "step": 133770 }, { "epoch": 0.6680816000399511, "grad_norm": 0.09281333535909653, "learning_rate": 9.982477659015245e-06, "loss": 8.6561, "step": 133780 }, { "epoch": 0.6681315388648905, "grad_norm": 0.09609360247850418, "learning_rate": 9.980975744073693e-06, "loss": 8.6606, "step": 133790 }, { "epoch": 0.6681814776898299, "grad_norm": 0.09355857223272324, "learning_rate": 9.979473829132144e-06, "loss": 8.6826, "step": 133800 }, { "epoch": 0.6682314165147694, "grad_norm": 0.09512100368738174, "learning_rate": 9.977971914190594e-06, "loss": 8.662, "step": 133810 }, { "epoch": 0.6682813553397089, "grad_norm": 0.09662336111068726, "learning_rate": 9.976469999249042e-06, "loss": 8.6586, "step": 133820 }, { "epoch": 0.6683312941646483, "grad_norm": 0.09392707049846649, "learning_rate": 9.974968084307492e-06, "loss": 8.6483, "step": 133830 }, { "epoch": 0.6683812329895877, "grad_norm": 0.09166334569454193, "learning_rate": 9.973466169365941e-06, "loss": 8.6611, "step": 133840 }, { "epoch": 0.6684311718145272, "grad_norm": 0.09149006009101868, "learning_rate": 9.971964254424391e-06, "loss": 8.653, "step": 133850 }, { "epoch": 0.6684811106394667, "grad_norm": 0.09438381344079971, "learning_rate": 9.970462339482841e-06, "loss": 8.6613, "step": 133860 }, { "epoch": 0.6685310494644061, "grad_norm": 0.08881976455450058, "learning_rate": 9.96896042454129e-06, "loss": 8.656, "step": 133870 }, { "epoch": 0.6685809882893455, "grad_norm": 0.09529709070920944, "learning_rate": 9.96745850959974e-06, "loss": 8.6681, "step": 133880 }, { "epoch": 0.668630927114285, "grad_norm": 0.09204521775245667, "learning_rate": 9.965956594658188e-06, "loss": 8.6722, "step": 133890 }, { "epoch": 0.6686808659392245, "grad_norm": 0.09275458008050919, "learning_rate": 9.964454679716639e-06, "loss": 8.6581, "step": 133900 }, { "epoch": 0.6687308047641639, "grad_norm": 0.09179842472076416, "learning_rate": 9.962952764775089e-06, "loss": 8.6784, "step": 133910 }, { "epoch": 0.6687807435891033, "grad_norm": 0.09390482306480408, "learning_rate": 9.961450849833539e-06, "loss": 8.6541, "step": 133920 }, { "epoch": 0.6688306824140428, "grad_norm": 0.09328165650367737, "learning_rate": 9.959948934891988e-06, "loss": 8.6696, "step": 133930 }, { "epoch": 0.6688806212389823, "grad_norm": 0.09216855466365814, "learning_rate": 9.958447019950436e-06, "loss": 8.6686, "step": 133940 }, { "epoch": 0.6689305600639217, "grad_norm": 0.09541112184524536, "learning_rate": 9.956945105008886e-06, "loss": 8.6747, "step": 133950 }, { "epoch": 0.6689804988888611, "grad_norm": 0.0967298373579979, "learning_rate": 9.955443190067336e-06, "loss": 8.6533, "step": 133960 }, { "epoch": 0.6690304377138006, "grad_norm": 0.09267246723175049, "learning_rate": 9.953941275125787e-06, "loss": 8.6666, "step": 133970 }, { "epoch": 0.6690803765387401, "grad_norm": 0.09133971482515335, "learning_rate": 9.952439360184235e-06, "loss": 8.6595, "step": 133980 }, { "epoch": 0.6691303153636795, "grad_norm": 0.0926765725016594, "learning_rate": 9.950937445242685e-06, "loss": 8.6628, "step": 133990 }, { "epoch": 0.6691802541886189, "grad_norm": 0.09417456388473511, "learning_rate": 9.949435530301134e-06, "loss": 8.6531, "step": 134000 }, { "epoch": 0.6692301930135583, "grad_norm": 0.09054078161716461, "learning_rate": 9.947933615359584e-06, "loss": 8.6442, "step": 134010 }, { "epoch": 0.6692801318384979, "grad_norm": 0.09623897820711136, "learning_rate": 9.946431700418034e-06, "loss": 8.653, "step": 134020 }, { "epoch": 0.6693300706634373, "grad_norm": 0.08746648579835892, "learning_rate": 9.944929785476483e-06, "loss": 8.6438, "step": 134030 }, { "epoch": 0.6693800094883767, "grad_norm": 0.09206461906433105, "learning_rate": 9.943427870534933e-06, "loss": 8.6584, "step": 134040 }, { "epoch": 0.6694299483133161, "grad_norm": 0.08908689767122269, "learning_rate": 9.941925955593381e-06, "loss": 8.6869, "step": 134050 }, { "epoch": 0.6694798871382557, "grad_norm": 0.09292318671941757, "learning_rate": 9.940424040651831e-06, "loss": 8.6826, "step": 134060 }, { "epoch": 0.6695298259631951, "grad_norm": 0.09243188053369522, "learning_rate": 9.938922125710282e-06, "loss": 8.6561, "step": 134070 }, { "epoch": 0.6695797647881345, "grad_norm": 0.09017886966466904, "learning_rate": 9.937420210768732e-06, "loss": 8.6697, "step": 134080 }, { "epoch": 0.669629703613074, "grad_norm": 0.09152711927890778, "learning_rate": 9.93591829582718e-06, "loss": 8.6725, "step": 134090 }, { "epoch": 0.6696796424380135, "grad_norm": 0.08486898243427277, "learning_rate": 9.934416380885629e-06, "loss": 8.6743, "step": 134100 }, { "epoch": 0.6697295812629529, "grad_norm": 0.09803474694490433, "learning_rate": 9.932914465944079e-06, "loss": 8.6401, "step": 134110 }, { "epoch": 0.6697795200878923, "grad_norm": 0.08933472633361816, "learning_rate": 9.931412551002529e-06, "loss": 8.6808, "step": 134120 }, { "epoch": 0.6698294589128317, "grad_norm": 0.09698875993490219, "learning_rate": 9.92991063606098e-06, "loss": 8.6689, "step": 134130 }, { "epoch": 0.6698793977377713, "grad_norm": 0.08952021598815918, "learning_rate": 9.928408721119428e-06, "loss": 8.6691, "step": 134140 }, { "epoch": 0.6699293365627107, "grad_norm": 0.09210740774869919, "learning_rate": 9.926906806177876e-06, "loss": 8.6496, "step": 134150 }, { "epoch": 0.6699792753876501, "grad_norm": 0.09553104639053345, "learning_rate": 9.925404891236326e-06, "loss": 8.6525, "step": 134160 }, { "epoch": 0.6700292142125895, "grad_norm": 0.09282413125038147, "learning_rate": 9.923902976294777e-06, "loss": 8.6403, "step": 134170 }, { "epoch": 0.6700791530375291, "grad_norm": 0.08609344810247421, "learning_rate": 9.922401061353227e-06, "loss": 8.6725, "step": 134180 }, { "epoch": 0.6701290918624685, "grad_norm": 0.09846524894237518, "learning_rate": 9.920899146411675e-06, "loss": 8.6636, "step": 134190 }, { "epoch": 0.6701790306874079, "grad_norm": 0.09294971078634262, "learning_rate": 9.919397231470124e-06, "loss": 8.6753, "step": 134200 }, { "epoch": 0.6702289695123473, "grad_norm": 0.08816470950841904, "learning_rate": 9.917895316528574e-06, "loss": 8.6784, "step": 134210 }, { "epoch": 0.6702789083372869, "grad_norm": 0.08824525773525238, "learning_rate": 9.916393401587024e-06, "loss": 8.6515, "step": 134220 }, { "epoch": 0.6703288471622263, "grad_norm": 0.09112218022346497, "learning_rate": 9.914891486645474e-06, "loss": 8.6579, "step": 134230 }, { "epoch": 0.6703787859871657, "grad_norm": 0.09652066230773926, "learning_rate": 9.913389571703924e-06, "loss": 8.6431, "step": 134240 }, { "epoch": 0.6704287248121051, "grad_norm": 0.09298358857631683, "learning_rate": 9.911887656762371e-06, "loss": 8.6533, "step": 134250 }, { "epoch": 0.6704786636370447, "grad_norm": 0.09158873558044434, "learning_rate": 9.910385741820821e-06, "loss": 8.6822, "step": 134260 }, { "epoch": 0.6705286024619841, "grad_norm": 0.09415313601493835, "learning_rate": 9.908883826879272e-06, "loss": 8.6575, "step": 134270 }, { "epoch": 0.6705785412869235, "grad_norm": 0.09670121222734451, "learning_rate": 9.907381911937722e-06, "loss": 8.6453, "step": 134280 }, { "epoch": 0.6706284801118629, "grad_norm": 0.08960152417421341, "learning_rate": 9.905879996996172e-06, "loss": 8.6497, "step": 134290 }, { "epoch": 0.6706784189368025, "grad_norm": 0.09629824757575989, "learning_rate": 9.904378082054619e-06, "loss": 8.6611, "step": 134300 }, { "epoch": 0.6707283577617419, "grad_norm": 0.09402245283126831, "learning_rate": 9.902876167113069e-06, "loss": 8.6787, "step": 134310 }, { "epoch": 0.6707782965866813, "grad_norm": 0.09469646960496902, "learning_rate": 9.901374252171519e-06, "loss": 8.6602, "step": 134320 }, { "epoch": 0.6708282354116207, "grad_norm": 0.09018561244010925, "learning_rate": 9.89987233722997e-06, "loss": 8.6598, "step": 134330 }, { "epoch": 0.6708781742365603, "grad_norm": 0.0924052819609642, "learning_rate": 9.89837042228842e-06, "loss": 8.6522, "step": 134340 }, { "epoch": 0.6709281130614997, "grad_norm": 0.09291255474090576, "learning_rate": 9.896868507346866e-06, "loss": 8.6561, "step": 134350 }, { "epoch": 0.6709780518864391, "grad_norm": 0.09238367527723312, "learning_rate": 9.895366592405316e-06, "loss": 8.6519, "step": 134360 }, { "epoch": 0.6710279907113785, "grad_norm": 0.10505057871341705, "learning_rate": 9.893864677463767e-06, "loss": 8.6528, "step": 134370 }, { "epoch": 0.6710779295363181, "grad_norm": 0.09222082048654556, "learning_rate": 9.892362762522217e-06, "loss": 8.6566, "step": 134380 }, { "epoch": 0.6711278683612575, "grad_norm": 0.09739550203084946, "learning_rate": 9.890860847580667e-06, "loss": 8.6633, "step": 134390 }, { "epoch": 0.6711778071861969, "grad_norm": 0.09582085907459259, "learning_rate": 9.889358932639115e-06, "loss": 8.6723, "step": 134400 }, { "epoch": 0.6712277460111363, "grad_norm": 0.09395405650138855, "learning_rate": 9.887857017697564e-06, "loss": 8.6626, "step": 134410 }, { "epoch": 0.6712776848360759, "grad_norm": 0.09649001806974411, "learning_rate": 9.886355102756014e-06, "loss": 8.6701, "step": 134420 }, { "epoch": 0.6713276236610153, "grad_norm": 0.09423769265413284, "learning_rate": 9.884853187814464e-06, "loss": 8.6763, "step": 134430 }, { "epoch": 0.6713775624859547, "grad_norm": 0.09237778931856155, "learning_rate": 9.883351272872914e-06, "loss": 8.6705, "step": 134440 }, { "epoch": 0.6714275013108941, "grad_norm": 0.08992139250040054, "learning_rate": 9.881849357931363e-06, "loss": 8.6781, "step": 134450 }, { "epoch": 0.6714774401358337, "grad_norm": 0.09282557666301727, "learning_rate": 9.880347442989811e-06, "loss": 8.6552, "step": 134460 }, { "epoch": 0.6715273789607731, "grad_norm": 0.0941321849822998, "learning_rate": 9.878845528048262e-06, "loss": 8.6635, "step": 134470 }, { "epoch": 0.6715773177857125, "grad_norm": 0.09401953965425491, "learning_rate": 9.877343613106712e-06, "loss": 8.6542, "step": 134480 }, { "epoch": 0.6716272566106519, "grad_norm": 0.0891314148902893, "learning_rate": 9.875841698165162e-06, "loss": 8.6772, "step": 134490 }, { "epoch": 0.6716771954355913, "grad_norm": 0.09066881239414215, "learning_rate": 9.87433978322361e-06, "loss": 8.6553, "step": 134500 }, { "epoch": 0.6717271342605309, "grad_norm": 0.09230073541402817, "learning_rate": 9.872837868282059e-06, "loss": 8.6493, "step": 134510 }, { "epoch": 0.6717770730854703, "grad_norm": 0.09482811391353607, "learning_rate": 9.871335953340509e-06, "loss": 8.656, "step": 134520 }, { "epoch": 0.6718270119104097, "grad_norm": 0.0987572893500328, "learning_rate": 9.86983403839896e-06, "loss": 8.6604, "step": 134530 }, { "epoch": 0.6718769507353491, "grad_norm": 0.10041922330856323, "learning_rate": 9.86833212345741e-06, "loss": 8.6664, "step": 134540 }, { "epoch": 0.6719268895602887, "grad_norm": 0.09294810891151428, "learning_rate": 9.866830208515858e-06, "loss": 8.6552, "step": 134550 }, { "epoch": 0.6719768283852281, "grad_norm": 0.0915827602148056, "learning_rate": 9.865328293574308e-06, "loss": 8.668, "step": 134560 }, { "epoch": 0.6720267672101675, "grad_norm": 0.09003446251153946, "learning_rate": 9.863826378632757e-06, "loss": 8.6777, "step": 134570 }, { "epoch": 0.6720767060351069, "grad_norm": 0.09368298947811127, "learning_rate": 9.862324463691207e-06, "loss": 8.6627, "step": 134580 }, { "epoch": 0.6721266448600465, "grad_norm": 0.09174887835979462, "learning_rate": 9.860822548749657e-06, "loss": 8.6605, "step": 134590 }, { "epoch": 0.6721765836849859, "grad_norm": 0.09435159713029861, "learning_rate": 9.859320633808105e-06, "loss": 8.6576, "step": 134600 }, { "epoch": 0.6722265225099253, "grad_norm": 0.09179011732339859, "learning_rate": 9.857818718866556e-06, "loss": 8.6462, "step": 134610 }, { "epoch": 0.6722764613348647, "grad_norm": 0.09385015815496445, "learning_rate": 9.856316803925004e-06, "loss": 8.6683, "step": 134620 }, { "epoch": 0.6723264001598043, "grad_norm": 0.09193267673254013, "learning_rate": 9.854814888983454e-06, "loss": 8.6526, "step": 134630 }, { "epoch": 0.6723763389847437, "grad_norm": 0.09351233392953873, "learning_rate": 9.853312974041904e-06, "loss": 8.6577, "step": 134640 }, { "epoch": 0.6724262778096831, "grad_norm": 0.09000106900930405, "learning_rate": 9.851811059100353e-06, "loss": 8.6783, "step": 134650 }, { "epoch": 0.6724762166346225, "grad_norm": 0.09067153185606003, "learning_rate": 9.850309144158803e-06, "loss": 8.6576, "step": 134660 }, { "epoch": 0.6725261554595621, "grad_norm": 0.0920468345284462, "learning_rate": 9.848807229217252e-06, "loss": 8.6682, "step": 134670 }, { "epoch": 0.6725760942845015, "grad_norm": 0.09809605777263641, "learning_rate": 9.847305314275702e-06, "loss": 8.6612, "step": 134680 }, { "epoch": 0.6726260331094409, "grad_norm": 0.1000465601682663, "learning_rate": 9.845803399334152e-06, "loss": 8.6516, "step": 134690 }, { "epoch": 0.6726759719343803, "grad_norm": 0.08977311849594116, "learning_rate": 9.8443014843926e-06, "loss": 8.6702, "step": 134700 }, { "epoch": 0.6727259107593199, "grad_norm": 0.09617611020803452, "learning_rate": 9.84279956945105e-06, "loss": 8.655, "step": 134710 }, { "epoch": 0.6727758495842593, "grad_norm": 0.08458354324102402, "learning_rate": 9.841297654509499e-06, "loss": 8.6569, "step": 134720 }, { "epoch": 0.6728257884091987, "grad_norm": 0.09260948747396469, "learning_rate": 9.83979573956795e-06, "loss": 8.6509, "step": 134730 }, { "epoch": 0.6728757272341381, "grad_norm": 0.09007035940885544, "learning_rate": 9.8382938246264e-06, "loss": 8.6548, "step": 134740 }, { "epoch": 0.6729256660590777, "grad_norm": 0.09251650422811508, "learning_rate": 9.836791909684848e-06, "loss": 8.6643, "step": 134750 }, { "epoch": 0.6729756048840171, "grad_norm": 0.09078550338745117, "learning_rate": 9.835289994743298e-06, "loss": 8.6623, "step": 134760 }, { "epoch": 0.6730255437089565, "grad_norm": 0.09649539738893509, "learning_rate": 9.833788079801748e-06, "loss": 8.6652, "step": 134770 }, { "epoch": 0.6730754825338959, "grad_norm": 0.09006690233945847, "learning_rate": 9.832286164860197e-06, "loss": 8.6559, "step": 134780 }, { "epoch": 0.6731254213588355, "grad_norm": 0.09151316434144974, "learning_rate": 9.830784249918647e-06, "loss": 8.656, "step": 134790 }, { "epoch": 0.6731753601837749, "grad_norm": 0.08936766535043716, "learning_rate": 9.829282334977095e-06, "loss": 8.6594, "step": 134800 }, { "epoch": 0.6732252990087143, "grad_norm": 0.09173012524843216, "learning_rate": 9.827780420035546e-06, "loss": 8.6488, "step": 134810 }, { "epoch": 0.6732752378336537, "grad_norm": 0.09206842631101608, "learning_rate": 9.826278505093996e-06, "loss": 8.6666, "step": 134820 }, { "epoch": 0.6733251766585933, "grad_norm": 0.08679156005382538, "learning_rate": 9.824776590152444e-06, "loss": 8.6556, "step": 134830 }, { "epoch": 0.6733751154835327, "grad_norm": 0.08848954737186432, "learning_rate": 9.823274675210894e-06, "loss": 8.6569, "step": 134840 }, { "epoch": 0.6734250543084721, "grad_norm": 0.09259720891714096, "learning_rate": 9.821772760269343e-06, "loss": 8.665, "step": 134850 }, { "epoch": 0.6734749931334115, "grad_norm": 0.0927811861038208, "learning_rate": 9.820270845327793e-06, "loss": 8.6641, "step": 134860 }, { "epoch": 0.6735249319583511, "grad_norm": 0.09524352103471756, "learning_rate": 9.818768930386243e-06, "loss": 8.6755, "step": 134870 }, { "epoch": 0.6735748707832905, "grad_norm": 0.09407173842191696, "learning_rate": 9.817267015444692e-06, "loss": 8.6591, "step": 134880 }, { "epoch": 0.6736248096082299, "grad_norm": 0.0950080081820488, "learning_rate": 9.815765100503142e-06, "loss": 8.6511, "step": 134890 }, { "epoch": 0.6736747484331693, "grad_norm": 0.09280173480510712, "learning_rate": 9.81426318556159e-06, "loss": 8.6471, "step": 134900 }, { "epoch": 0.6737246872581089, "grad_norm": 0.0905299037694931, "learning_rate": 9.81276127062004e-06, "loss": 8.6516, "step": 134910 }, { "epoch": 0.6737746260830483, "grad_norm": 0.09357500821352005, "learning_rate": 9.81125935567849e-06, "loss": 8.668, "step": 134920 }, { "epoch": 0.6738245649079877, "grad_norm": 0.09710787981748581, "learning_rate": 9.809757440736941e-06, "loss": 8.6576, "step": 134930 }, { "epoch": 0.6738745037329271, "grad_norm": 0.08756748586893082, "learning_rate": 9.80825552579539e-06, "loss": 8.6721, "step": 134940 }, { "epoch": 0.6739244425578667, "grad_norm": 0.08993040025234222, "learning_rate": 9.806753610853838e-06, "loss": 8.6743, "step": 134950 }, { "epoch": 0.6739743813828061, "grad_norm": 0.0880391076207161, "learning_rate": 9.805251695912288e-06, "loss": 8.6597, "step": 134960 }, { "epoch": 0.6740243202077455, "grad_norm": 0.08911752700805664, "learning_rate": 9.803749780970738e-06, "loss": 8.6669, "step": 134970 }, { "epoch": 0.6740742590326849, "grad_norm": 0.09046872705221176, "learning_rate": 9.802247866029188e-06, "loss": 8.6558, "step": 134980 }, { "epoch": 0.6741241978576245, "grad_norm": 0.09054312109947205, "learning_rate": 9.800745951087637e-06, "loss": 8.6595, "step": 134990 }, { "epoch": 0.6741741366825639, "grad_norm": 0.09408404678106308, "learning_rate": 9.799244036146085e-06, "loss": 8.6439, "step": 135000 }, { "epoch": 0.6742240755075033, "grad_norm": 0.08936314284801483, "learning_rate": 9.797742121204536e-06, "loss": 8.6506, "step": 135010 }, { "epoch": 0.6742740143324427, "grad_norm": 0.09435812383890152, "learning_rate": 9.796240206262986e-06, "loss": 8.6472, "step": 135020 }, { "epoch": 0.6743239531573822, "grad_norm": 0.09421943128108978, "learning_rate": 9.794738291321436e-06, "loss": 8.6749, "step": 135030 }, { "epoch": 0.6743738919823217, "grad_norm": 0.08769576996564865, "learning_rate": 9.793236376379884e-06, "loss": 8.6708, "step": 135040 }, { "epoch": 0.6744238308072611, "grad_norm": 0.09175736457109451, "learning_rate": 9.791734461438333e-06, "loss": 8.6579, "step": 135050 }, { "epoch": 0.6744737696322005, "grad_norm": 0.08920390158891678, "learning_rate": 9.790232546496783e-06, "loss": 8.6663, "step": 135060 }, { "epoch": 0.67452370845714, "grad_norm": 0.09278859943151474, "learning_rate": 9.788730631555233e-06, "loss": 8.6719, "step": 135070 }, { "epoch": 0.6745736472820795, "grad_norm": 0.090042345225811, "learning_rate": 9.787228716613683e-06, "loss": 8.6571, "step": 135080 }, { "epoch": 0.6746235861070189, "grad_norm": 0.09226316958665848, "learning_rate": 9.785726801672134e-06, "loss": 8.6539, "step": 135090 }, { "epoch": 0.6746735249319583, "grad_norm": 0.09357789903879166, "learning_rate": 9.78422488673058e-06, "loss": 8.6673, "step": 135100 }, { "epoch": 0.6747234637568978, "grad_norm": 0.09317333251237869, "learning_rate": 9.78272297178903e-06, "loss": 8.6531, "step": 135110 }, { "epoch": 0.6747734025818373, "grad_norm": 0.09152083098888397, "learning_rate": 9.78122105684748e-06, "loss": 8.6667, "step": 135120 }, { "epoch": 0.6748233414067767, "grad_norm": 0.0933714434504509, "learning_rate": 9.779719141905931e-06, "loss": 8.6673, "step": 135130 }, { "epoch": 0.6748732802317161, "grad_norm": 0.09301735460758209, "learning_rate": 9.778217226964381e-06, "loss": 8.6561, "step": 135140 }, { "epoch": 0.6749232190566556, "grad_norm": 0.0876002088189125, "learning_rate": 9.776715312022828e-06, "loss": 8.6553, "step": 135150 }, { "epoch": 0.6749731578815951, "grad_norm": 0.09405035525560379, "learning_rate": 9.775213397081278e-06, "loss": 8.6746, "step": 135160 }, { "epoch": 0.6750230967065345, "grad_norm": 0.09140407294034958, "learning_rate": 9.773711482139728e-06, "loss": 8.6653, "step": 135170 }, { "epoch": 0.6750730355314739, "grad_norm": 0.08978252112865448, "learning_rate": 9.772209567198178e-06, "loss": 8.6645, "step": 135180 }, { "epoch": 0.6751229743564134, "grad_norm": 0.0867505818605423, "learning_rate": 9.770707652256629e-06, "loss": 8.6649, "step": 135190 }, { "epoch": 0.6751729131813529, "grad_norm": 0.08967334032058716, "learning_rate": 9.769205737315075e-06, "loss": 8.6698, "step": 135200 }, { "epoch": 0.6752228520062923, "grad_norm": 0.09012636542320251, "learning_rate": 9.767703822373526e-06, "loss": 8.6562, "step": 135210 }, { "epoch": 0.6752727908312317, "grad_norm": 0.0897894874215126, "learning_rate": 9.766201907431976e-06, "loss": 8.6699, "step": 135220 }, { "epoch": 0.6753227296561712, "grad_norm": 0.09098558127880096, "learning_rate": 9.764699992490426e-06, "loss": 8.6482, "step": 135230 }, { "epoch": 0.6753726684811107, "grad_norm": 0.09297525882720947, "learning_rate": 9.763198077548876e-06, "loss": 8.6506, "step": 135240 }, { "epoch": 0.6754226073060501, "grad_norm": 0.09668441861867905, "learning_rate": 9.761696162607325e-06, "loss": 8.6461, "step": 135250 }, { "epoch": 0.6754725461309895, "grad_norm": 0.09092225879430771, "learning_rate": 9.760194247665773e-06, "loss": 8.6631, "step": 135260 }, { "epoch": 0.675522484955929, "grad_norm": 0.09417429566383362, "learning_rate": 9.758692332724223e-06, "loss": 8.6589, "step": 135270 }, { "epoch": 0.6755724237808685, "grad_norm": 0.08669702708721161, "learning_rate": 9.757190417782673e-06, "loss": 8.6646, "step": 135280 }, { "epoch": 0.6756223626058079, "grad_norm": 0.08657342940568924, "learning_rate": 9.755688502841124e-06, "loss": 8.6499, "step": 135290 }, { "epoch": 0.6756723014307473, "grad_norm": 0.09297830611467361, "learning_rate": 9.754186587899572e-06, "loss": 8.6483, "step": 135300 }, { "epoch": 0.6757222402556868, "grad_norm": 0.09091796725988388, "learning_rate": 9.75268467295802e-06, "loss": 8.6687, "step": 135310 }, { "epoch": 0.6757721790806263, "grad_norm": 0.09649965912103653, "learning_rate": 9.75118275801647e-06, "loss": 8.6546, "step": 135320 }, { "epoch": 0.6758221179055657, "grad_norm": 0.09378094226121902, "learning_rate": 9.749680843074921e-06, "loss": 8.6654, "step": 135330 }, { "epoch": 0.6758720567305051, "grad_norm": 0.09173280000686646, "learning_rate": 9.748178928133371e-06, "loss": 8.6516, "step": 135340 }, { "epoch": 0.6759219955554446, "grad_norm": 0.096292644739151, "learning_rate": 9.74667701319182e-06, "loss": 8.6403, "step": 135350 }, { "epoch": 0.675971934380384, "grad_norm": 0.09011993557214737, "learning_rate": 9.745175098250268e-06, "loss": 8.6538, "step": 135360 }, { "epoch": 0.6760218732053235, "grad_norm": 0.09252528101205826, "learning_rate": 9.743673183308718e-06, "loss": 8.6566, "step": 135370 }, { "epoch": 0.6760718120302629, "grad_norm": 0.09614871442317963, "learning_rate": 9.742171268367168e-06, "loss": 8.6462, "step": 135380 }, { "epoch": 0.6761217508552024, "grad_norm": 0.09508166462182999, "learning_rate": 9.740669353425619e-06, "loss": 8.6597, "step": 135390 }, { "epoch": 0.6761716896801419, "grad_norm": 0.09378086030483246, "learning_rate": 9.739167438484067e-06, "loss": 8.6674, "step": 135400 }, { "epoch": 0.6762216285050813, "grad_norm": 0.08668700605630875, "learning_rate": 9.737665523542517e-06, "loss": 8.6488, "step": 135410 }, { "epoch": 0.6762715673300207, "grad_norm": 0.09363897889852524, "learning_rate": 9.736163608600966e-06, "loss": 8.6639, "step": 135420 }, { "epoch": 0.6763215061549602, "grad_norm": 0.0928199365735054, "learning_rate": 9.734661693659416e-06, "loss": 8.6574, "step": 135430 }, { "epoch": 0.6763714449798996, "grad_norm": 0.09100401401519775, "learning_rate": 9.733159778717866e-06, "loss": 8.6541, "step": 135440 }, { "epoch": 0.6764213838048391, "grad_norm": 0.08921308815479279, "learning_rate": 9.731657863776315e-06, "loss": 8.6332, "step": 135450 }, { "epoch": 0.6764713226297785, "grad_norm": 0.0926818996667862, "learning_rate": 9.730155948834765e-06, "loss": 8.6395, "step": 135460 }, { "epoch": 0.676521261454718, "grad_norm": 0.08852003514766693, "learning_rate": 9.728654033893213e-06, "loss": 8.6492, "step": 135470 }, { "epoch": 0.6765712002796574, "grad_norm": 0.08820042759180069, "learning_rate": 9.727152118951664e-06, "loss": 8.6549, "step": 135480 }, { "epoch": 0.6766211391045969, "grad_norm": 0.09674821048974991, "learning_rate": 9.725650204010114e-06, "loss": 8.6498, "step": 135490 }, { "epoch": 0.6766710779295363, "grad_norm": 0.0904177725315094, "learning_rate": 9.724148289068562e-06, "loss": 8.6776, "step": 135500 }, { "epoch": 0.6767210167544757, "grad_norm": 0.09356199204921722, "learning_rate": 9.722646374127012e-06, "loss": 8.6619, "step": 135510 }, { "epoch": 0.6767709555794152, "grad_norm": 0.09460005164146423, "learning_rate": 9.72114445918546e-06, "loss": 8.6478, "step": 135520 }, { "epoch": 0.6768208944043547, "grad_norm": 0.09244546294212341, "learning_rate": 9.719642544243911e-06, "loss": 8.6778, "step": 135530 }, { "epoch": 0.6768708332292941, "grad_norm": 0.09258837252855301, "learning_rate": 9.718140629302361e-06, "loss": 8.6654, "step": 135540 }, { "epoch": 0.6769207720542335, "grad_norm": 0.08935635536909103, "learning_rate": 9.71663871436081e-06, "loss": 8.6699, "step": 135550 }, { "epoch": 0.676970710879173, "grad_norm": 0.10672944039106369, "learning_rate": 9.71513679941926e-06, "loss": 8.6493, "step": 135560 }, { "epoch": 0.6770206497041125, "grad_norm": 0.09256184101104736, "learning_rate": 9.71363488447771e-06, "loss": 8.6716, "step": 135570 }, { "epoch": 0.6770705885290519, "grad_norm": 0.09515663981437683, "learning_rate": 9.712132969536159e-06, "loss": 8.6385, "step": 135580 }, { "epoch": 0.6771205273539913, "grad_norm": 0.09241361916065216, "learning_rate": 9.710631054594609e-06, "loss": 8.6506, "step": 135590 }, { "epoch": 0.6771704661789308, "grad_norm": 0.08679981529712677, "learning_rate": 9.709129139653057e-06, "loss": 8.6791, "step": 135600 }, { "epoch": 0.6772204050038703, "grad_norm": 0.08532244712114334, "learning_rate": 9.707627224711507e-06, "loss": 8.671, "step": 135610 }, { "epoch": 0.6772703438288097, "grad_norm": 0.08982454240322113, "learning_rate": 9.706125309769958e-06, "loss": 8.6516, "step": 135620 }, { "epoch": 0.6773202826537491, "grad_norm": 0.10029303282499313, "learning_rate": 9.704623394828406e-06, "loss": 8.6435, "step": 135630 }, { "epoch": 0.6773702214786886, "grad_norm": 0.09623725712299347, "learning_rate": 9.703121479886856e-06, "loss": 8.6662, "step": 135640 }, { "epoch": 0.6774201603036281, "grad_norm": 0.08999323099851608, "learning_rate": 9.701619564945305e-06, "loss": 8.6676, "step": 135650 }, { "epoch": 0.6774700991285675, "grad_norm": 0.09232403337955475, "learning_rate": 9.700117650003755e-06, "loss": 8.6596, "step": 135660 }, { "epoch": 0.6775200379535069, "grad_norm": 0.08921338617801666, "learning_rate": 9.698615735062205e-06, "loss": 8.6421, "step": 135670 }, { "epoch": 0.6775699767784464, "grad_norm": 0.09304777532815933, "learning_rate": 9.697113820120654e-06, "loss": 8.6406, "step": 135680 }, { "epoch": 0.6776199156033859, "grad_norm": 0.08728094398975372, "learning_rate": 9.695611905179104e-06, "loss": 8.6578, "step": 135690 }, { "epoch": 0.6776698544283253, "grad_norm": 0.09113137423992157, "learning_rate": 9.694109990237552e-06, "loss": 8.6582, "step": 135700 }, { "epoch": 0.6777197932532647, "grad_norm": 0.09369809180498123, "learning_rate": 9.692608075296002e-06, "loss": 8.6454, "step": 135710 }, { "epoch": 0.6777697320782042, "grad_norm": 0.0949154943227768, "learning_rate": 9.691106160354453e-06, "loss": 8.646, "step": 135720 }, { "epoch": 0.6778196709031437, "grad_norm": 0.09159260988235474, "learning_rate": 9.689604245412903e-06, "loss": 8.6647, "step": 135730 }, { "epoch": 0.6778696097280831, "grad_norm": 0.08925732225179672, "learning_rate": 9.688102330471351e-06, "loss": 8.6396, "step": 135740 }, { "epoch": 0.6779195485530225, "grad_norm": 0.10024773329496384, "learning_rate": 9.6866004155298e-06, "loss": 8.6457, "step": 135750 }, { "epoch": 0.677969487377962, "grad_norm": 0.08866682648658752, "learning_rate": 9.68509850058825e-06, "loss": 8.662, "step": 135760 }, { "epoch": 0.6780194262029015, "grad_norm": 0.09140399843454361, "learning_rate": 9.6835965856467e-06, "loss": 8.6452, "step": 135770 }, { "epoch": 0.6780693650278409, "grad_norm": 0.09286113828420639, "learning_rate": 9.68209467070515e-06, "loss": 8.6551, "step": 135780 }, { "epoch": 0.6781193038527803, "grad_norm": 0.09333377331495285, "learning_rate": 9.680592755763599e-06, "loss": 8.6632, "step": 135790 }, { "epoch": 0.6781692426777198, "grad_norm": 0.09056348353624344, "learning_rate": 9.679090840822047e-06, "loss": 8.6587, "step": 135800 }, { "epoch": 0.6782191815026593, "grad_norm": 0.09084860980510712, "learning_rate": 9.677588925880497e-06, "loss": 8.664, "step": 135810 }, { "epoch": 0.6782691203275987, "grad_norm": 0.09165407717227936, "learning_rate": 9.676087010938948e-06, "loss": 8.6647, "step": 135820 }, { "epoch": 0.6783190591525381, "grad_norm": 0.08495034277439117, "learning_rate": 9.674585095997398e-06, "loss": 8.6464, "step": 135830 }, { "epoch": 0.6783689979774776, "grad_norm": 0.09273646771907806, "learning_rate": 9.673083181055846e-06, "loss": 8.6369, "step": 135840 }, { "epoch": 0.678418936802417, "grad_norm": 0.09448003023862839, "learning_rate": 9.671581266114295e-06, "loss": 8.6468, "step": 135850 }, { "epoch": 0.6784688756273565, "grad_norm": 0.08844837546348572, "learning_rate": 9.670079351172745e-06, "loss": 8.6591, "step": 135860 }, { "epoch": 0.6785188144522959, "grad_norm": 0.09456723183393478, "learning_rate": 9.668577436231195e-06, "loss": 8.6537, "step": 135870 }, { "epoch": 0.6785687532772354, "grad_norm": 0.0897020697593689, "learning_rate": 9.667075521289645e-06, "loss": 8.6545, "step": 135880 }, { "epoch": 0.6786186921021748, "grad_norm": 0.09378873556852341, "learning_rate": 9.665573606348095e-06, "loss": 8.6497, "step": 135890 }, { "epoch": 0.6786686309271143, "grad_norm": 0.09122031182050705, "learning_rate": 9.664071691406542e-06, "loss": 8.6599, "step": 135900 }, { "epoch": 0.6787185697520537, "grad_norm": 0.0962730273604393, "learning_rate": 9.662569776464992e-06, "loss": 8.6579, "step": 135910 }, { "epoch": 0.6787685085769932, "grad_norm": 0.0921352207660675, "learning_rate": 9.661067861523443e-06, "loss": 8.6433, "step": 135920 }, { "epoch": 0.6788184474019326, "grad_norm": 0.09395617246627808, "learning_rate": 9.659565946581893e-06, "loss": 8.6507, "step": 135930 }, { "epoch": 0.6788683862268721, "grad_norm": 0.08777940273284912, "learning_rate": 9.658064031640343e-06, "loss": 8.6626, "step": 135940 }, { "epoch": 0.6789183250518115, "grad_norm": 0.08930377662181854, "learning_rate": 9.65656211669879e-06, "loss": 8.6573, "step": 135950 }, { "epoch": 0.678968263876751, "grad_norm": 0.09742255508899689, "learning_rate": 9.65506020175724e-06, "loss": 8.6381, "step": 135960 }, { "epoch": 0.6790182027016904, "grad_norm": 0.09233038872480392, "learning_rate": 9.65355828681569e-06, "loss": 8.6535, "step": 135970 }, { "epoch": 0.6790681415266299, "grad_norm": 0.08856892585754395, "learning_rate": 9.65205637187414e-06, "loss": 8.6476, "step": 135980 }, { "epoch": 0.6791180803515693, "grad_norm": 0.09412689507007599, "learning_rate": 9.65055445693259e-06, "loss": 8.6462, "step": 135990 }, { "epoch": 0.6791680191765088, "grad_norm": 0.09102386981248856, "learning_rate": 9.649052541991037e-06, "loss": 8.6614, "step": 136000 }, { "epoch": 0.6792179580014482, "grad_norm": 0.0882454663515091, "learning_rate": 9.647550627049487e-06, "loss": 8.6684, "step": 136010 }, { "epoch": 0.6792678968263877, "grad_norm": 0.09218038618564606, "learning_rate": 9.646048712107938e-06, "loss": 8.6619, "step": 136020 }, { "epoch": 0.6793178356513271, "grad_norm": 0.09152485430240631, "learning_rate": 9.644546797166388e-06, "loss": 8.6552, "step": 136030 }, { "epoch": 0.6793677744762666, "grad_norm": 0.08703062683343887, "learning_rate": 9.643044882224838e-06, "loss": 8.6545, "step": 136040 }, { "epoch": 0.679417713301206, "grad_norm": 0.09743312001228333, "learning_rate": 9.641542967283286e-06, "loss": 8.6342, "step": 136050 }, { "epoch": 0.6794676521261455, "grad_norm": 0.09214362502098083, "learning_rate": 9.640041052341735e-06, "loss": 8.6579, "step": 136060 }, { "epoch": 0.6795175909510849, "grad_norm": 0.0931442603468895, "learning_rate": 9.638539137400185e-06, "loss": 8.6461, "step": 136070 }, { "epoch": 0.6795675297760244, "grad_norm": 0.09083132445812225, "learning_rate": 9.637037222458635e-06, "loss": 8.6503, "step": 136080 }, { "epoch": 0.6796174686009638, "grad_norm": 0.0905727818608284, "learning_rate": 9.635535307517085e-06, "loss": 8.6579, "step": 136090 }, { "epoch": 0.6796674074259033, "grad_norm": 0.09426701813936234, "learning_rate": 9.634033392575534e-06, "loss": 8.6577, "step": 136100 }, { "epoch": 0.6797173462508427, "grad_norm": 0.0878579169511795, "learning_rate": 9.632531477633982e-06, "loss": 8.6501, "step": 136110 }, { "epoch": 0.6797672850757822, "grad_norm": 0.0943881943821907, "learning_rate": 9.631029562692433e-06, "loss": 8.6596, "step": 136120 }, { "epoch": 0.6798172239007216, "grad_norm": 0.0909527838230133, "learning_rate": 9.629527647750883e-06, "loss": 8.6589, "step": 136130 }, { "epoch": 0.6798671627256611, "grad_norm": 0.09404680877923965, "learning_rate": 9.628025732809333e-06, "loss": 8.6575, "step": 136140 }, { "epoch": 0.6799171015506005, "grad_norm": 0.09321132302284241, "learning_rate": 9.626523817867781e-06, "loss": 8.6519, "step": 136150 }, { "epoch": 0.67996704037554, "grad_norm": 0.09817219525575638, "learning_rate": 9.62502190292623e-06, "loss": 8.6566, "step": 136160 }, { "epoch": 0.6800169792004794, "grad_norm": 0.09167230874300003, "learning_rate": 9.62351998798468e-06, "loss": 8.6576, "step": 136170 }, { "epoch": 0.6800669180254189, "grad_norm": 0.09567620605230331, "learning_rate": 9.62201807304313e-06, "loss": 8.6495, "step": 136180 }, { "epoch": 0.6801168568503583, "grad_norm": 0.0942583829164505, "learning_rate": 9.62051615810158e-06, "loss": 8.664, "step": 136190 }, { "epoch": 0.6801667956752978, "grad_norm": 0.08843056857585907, "learning_rate": 9.619014243160029e-06, "loss": 8.6652, "step": 136200 }, { "epoch": 0.6802167345002372, "grad_norm": 0.08930647373199463, "learning_rate": 9.617512328218479e-06, "loss": 8.6719, "step": 136210 }, { "epoch": 0.6802666733251767, "grad_norm": 0.09439358115196228, "learning_rate": 9.616010413276928e-06, "loss": 8.6578, "step": 136220 }, { "epoch": 0.6803166121501161, "grad_norm": 0.08645504713058472, "learning_rate": 9.614508498335378e-06, "loss": 8.661, "step": 136230 }, { "epoch": 0.6803665509750556, "grad_norm": 0.0910206213593483, "learning_rate": 9.613006583393828e-06, "loss": 8.6595, "step": 136240 }, { "epoch": 0.680416489799995, "grad_norm": 0.09109845012426376, "learning_rate": 9.611504668452278e-06, "loss": 8.6505, "step": 136250 }, { "epoch": 0.6804664286249344, "grad_norm": 0.09436528384685516, "learning_rate": 9.610002753510727e-06, "loss": 8.6472, "step": 136260 }, { "epoch": 0.6805163674498739, "grad_norm": 0.09152334183454514, "learning_rate": 9.608500838569175e-06, "loss": 8.6531, "step": 136270 }, { "epoch": 0.6805663062748134, "grad_norm": 0.09130854159593582, "learning_rate": 9.606998923627625e-06, "loss": 8.6697, "step": 136280 }, { "epoch": 0.6806162450997528, "grad_norm": 0.08787249028682709, "learning_rate": 9.605497008686075e-06, "loss": 8.6474, "step": 136290 }, { "epoch": 0.6806661839246922, "grad_norm": 0.09564139693975449, "learning_rate": 9.603995093744526e-06, "loss": 8.6475, "step": 136300 }, { "epoch": 0.6807161227496317, "grad_norm": 0.08936180919408798, "learning_rate": 9.602493178802974e-06, "loss": 8.6639, "step": 136310 }, { "epoch": 0.6807660615745712, "grad_norm": 0.09855446964502335, "learning_rate": 9.600991263861423e-06, "loss": 8.673, "step": 136320 }, { "epoch": 0.6808160003995106, "grad_norm": 0.08871414512395859, "learning_rate": 9.599489348919873e-06, "loss": 8.6471, "step": 136330 }, { "epoch": 0.68086593922445, "grad_norm": 0.09087369590997696, "learning_rate": 9.597987433978323e-06, "loss": 8.6577, "step": 136340 }, { "epoch": 0.6809158780493895, "grad_norm": 0.09292785823345184, "learning_rate": 9.596485519036773e-06, "loss": 8.6358, "step": 136350 }, { "epoch": 0.680965816874329, "grad_norm": 0.09038669615983963, "learning_rate": 9.594983604095222e-06, "loss": 8.6422, "step": 136360 }, { "epoch": 0.6810157556992684, "grad_norm": 0.09672816097736359, "learning_rate": 9.593481689153672e-06, "loss": 8.6498, "step": 136370 }, { "epoch": 0.6810656945242078, "grad_norm": 0.09116402268409729, "learning_rate": 9.59197977421212e-06, "loss": 8.6377, "step": 136380 }, { "epoch": 0.6811156333491473, "grad_norm": 0.09753582626581192, "learning_rate": 9.59047785927057e-06, "loss": 8.6643, "step": 136390 }, { "epoch": 0.6811655721740868, "grad_norm": 0.0920112207531929, "learning_rate": 9.58897594432902e-06, "loss": 8.6488, "step": 136400 }, { "epoch": 0.6812155109990262, "grad_norm": 0.09360447525978088, "learning_rate": 9.587474029387469e-06, "loss": 8.6664, "step": 136410 }, { "epoch": 0.6812654498239656, "grad_norm": 0.08685506880283356, "learning_rate": 9.58597211444592e-06, "loss": 8.6444, "step": 136420 }, { "epoch": 0.6813153886489051, "grad_norm": 0.08979588001966476, "learning_rate": 9.584470199504368e-06, "loss": 8.6541, "step": 136430 }, { "epoch": 0.6813653274738446, "grad_norm": 0.09099925309419632, "learning_rate": 9.582968284562818e-06, "loss": 8.6319, "step": 136440 }, { "epoch": 0.681415266298784, "grad_norm": 0.09031230956315994, "learning_rate": 9.581466369621268e-06, "loss": 8.6513, "step": 136450 }, { "epoch": 0.6814652051237234, "grad_norm": 0.08769738674163818, "learning_rate": 9.579964454679717e-06, "loss": 8.6562, "step": 136460 }, { "epoch": 0.6815151439486629, "grad_norm": 0.08992310613393784, "learning_rate": 9.578462539738167e-06, "loss": 8.6546, "step": 136470 }, { "epoch": 0.6815650827736023, "grad_norm": 0.09138327836990356, "learning_rate": 9.576960624796615e-06, "loss": 8.6545, "step": 136480 }, { "epoch": 0.6816150215985418, "grad_norm": 0.08947155624628067, "learning_rate": 9.575458709855065e-06, "loss": 8.6435, "step": 136490 }, { "epoch": 0.6816649604234812, "grad_norm": 0.09748796373605728, "learning_rate": 9.573956794913516e-06, "loss": 8.6491, "step": 136500 }, { "epoch": 0.6817148992484207, "grad_norm": 0.0930284857749939, "learning_rate": 9.572454879971964e-06, "loss": 8.6502, "step": 136510 }, { "epoch": 0.6817648380733601, "grad_norm": 0.08821598440408707, "learning_rate": 9.570952965030414e-06, "loss": 8.6452, "step": 136520 }, { "epoch": 0.6818147768982996, "grad_norm": 0.09385675936937332, "learning_rate": 9.569451050088864e-06, "loss": 8.6533, "step": 136530 }, { "epoch": 0.681864715723239, "grad_norm": 0.09572584182024002, "learning_rate": 9.567949135147313e-06, "loss": 8.6635, "step": 136540 }, { "epoch": 0.6819146545481785, "grad_norm": 0.09196435660123825, "learning_rate": 9.566447220205763e-06, "loss": 8.6576, "step": 136550 }, { "epoch": 0.6819645933731179, "grad_norm": 0.09118195623159409, "learning_rate": 9.564945305264212e-06, "loss": 8.6366, "step": 136560 }, { "epoch": 0.6820145321980574, "grad_norm": 0.09549646824598312, "learning_rate": 9.563443390322662e-06, "loss": 8.6665, "step": 136570 }, { "epoch": 0.6820644710229968, "grad_norm": 0.09187420457601547, "learning_rate": 9.561941475381112e-06, "loss": 8.6431, "step": 136580 }, { "epoch": 0.6821144098479363, "grad_norm": 0.09515494108200073, "learning_rate": 9.56043956043956e-06, "loss": 8.6565, "step": 136590 }, { "epoch": 0.6821643486728757, "grad_norm": 0.09291938692331314, "learning_rate": 9.55893764549801e-06, "loss": 8.6536, "step": 136600 }, { "epoch": 0.6822142874978152, "grad_norm": 0.09135958552360535, "learning_rate": 9.557435730556459e-06, "loss": 8.6686, "step": 136610 }, { "epoch": 0.6822642263227546, "grad_norm": 0.09079675376415253, "learning_rate": 9.55593381561491e-06, "loss": 8.6562, "step": 136620 }, { "epoch": 0.682314165147694, "grad_norm": 0.08791366219520569, "learning_rate": 9.55443190067336e-06, "loss": 8.6422, "step": 136630 }, { "epoch": 0.6823641039726335, "grad_norm": 0.09008821099996567, "learning_rate": 9.552929985731808e-06, "loss": 8.6484, "step": 136640 }, { "epoch": 0.682414042797573, "grad_norm": 0.09008509665727615, "learning_rate": 9.551428070790258e-06, "loss": 8.6609, "step": 136650 }, { "epoch": 0.6824639816225124, "grad_norm": 0.0959354117512703, "learning_rate": 9.549926155848707e-06, "loss": 8.6284, "step": 136660 }, { "epoch": 0.6825139204474518, "grad_norm": 0.08918027579784393, "learning_rate": 9.548424240907157e-06, "loss": 8.6587, "step": 136670 }, { "epoch": 0.6825638592723913, "grad_norm": 0.08706361055374146, "learning_rate": 9.546922325965607e-06, "loss": 8.6813, "step": 136680 }, { "epoch": 0.6826137980973308, "grad_norm": 0.09403030574321747, "learning_rate": 9.545420411024057e-06, "loss": 8.6521, "step": 136690 }, { "epoch": 0.6826637369222702, "grad_norm": 0.08769270032644272, "learning_rate": 9.543918496082506e-06, "loss": 8.6526, "step": 136700 }, { "epoch": 0.6827136757472096, "grad_norm": 0.08837845176458359, "learning_rate": 9.542416581140954e-06, "loss": 8.647, "step": 136710 }, { "epoch": 0.6827636145721491, "grad_norm": 0.08815280348062515, "learning_rate": 9.540914666199404e-06, "loss": 8.6532, "step": 136720 }, { "epoch": 0.6828135533970886, "grad_norm": 0.09271814674139023, "learning_rate": 9.539412751257854e-06, "loss": 8.6624, "step": 136730 }, { "epoch": 0.682863492222028, "grad_norm": 0.09003327786922455, "learning_rate": 9.537910836316305e-06, "loss": 8.6456, "step": 136740 }, { "epoch": 0.6829134310469674, "grad_norm": 0.09336933493614197, "learning_rate": 9.536408921374753e-06, "loss": 8.6364, "step": 136750 }, { "epoch": 0.6829633698719069, "grad_norm": 0.08961906284093857, "learning_rate": 9.534907006433202e-06, "loss": 8.653, "step": 136760 }, { "epoch": 0.6830133086968464, "grad_norm": 0.08953448385000229, "learning_rate": 9.533405091491652e-06, "loss": 8.6516, "step": 136770 }, { "epoch": 0.6830632475217858, "grad_norm": 0.09705983847379684, "learning_rate": 9.531903176550102e-06, "loss": 8.6433, "step": 136780 }, { "epoch": 0.6831131863467252, "grad_norm": 0.0961066335439682, "learning_rate": 9.530401261608552e-06, "loss": 8.6596, "step": 136790 }, { "epoch": 0.6831631251716647, "grad_norm": 0.09489046037197113, "learning_rate": 9.528899346667e-06, "loss": 8.6587, "step": 136800 }, { "epoch": 0.6832130639966042, "grad_norm": 0.0909392461180687, "learning_rate": 9.527397431725449e-06, "loss": 8.6567, "step": 136810 }, { "epoch": 0.6832630028215436, "grad_norm": 0.09276154637336731, "learning_rate": 9.5258955167839e-06, "loss": 8.6676, "step": 136820 }, { "epoch": 0.683312941646483, "grad_norm": 0.08885812014341354, "learning_rate": 9.52439360184235e-06, "loss": 8.6517, "step": 136830 }, { "epoch": 0.6833628804714225, "grad_norm": 0.0934206172823906, "learning_rate": 9.5228916869008e-06, "loss": 8.6566, "step": 136840 }, { "epoch": 0.683412819296362, "grad_norm": 0.08924253284931183, "learning_rate": 9.52138977195925e-06, "loss": 8.6622, "step": 136850 }, { "epoch": 0.6834627581213014, "grad_norm": 0.0887620598077774, "learning_rate": 9.519887857017697e-06, "loss": 8.6523, "step": 136860 }, { "epoch": 0.6835126969462408, "grad_norm": 0.09116550534963608, "learning_rate": 9.518385942076147e-06, "loss": 8.6463, "step": 136870 }, { "epoch": 0.6835626357711803, "grad_norm": 0.09384380280971527, "learning_rate": 9.516884027134597e-06, "loss": 8.6518, "step": 136880 }, { "epoch": 0.6836125745961198, "grad_norm": 0.08999653905630112, "learning_rate": 9.515382112193047e-06, "loss": 8.6534, "step": 136890 }, { "epoch": 0.6836625134210592, "grad_norm": 0.09472544491291046, "learning_rate": 9.513880197251497e-06, "loss": 8.6418, "step": 136900 }, { "epoch": 0.6837124522459986, "grad_norm": 0.08867818862199783, "learning_rate": 9.512378282309944e-06, "loss": 8.6511, "step": 136910 }, { "epoch": 0.6837623910709381, "grad_norm": 0.09118511527776718, "learning_rate": 9.510876367368394e-06, "loss": 8.6535, "step": 136920 }, { "epoch": 0.6838123298958776, "grad_norm": 0.09027484059333801, "learning_rate": 9.509374452426844e-06, "loss": 8.6652, "step": 136930 }, { "epoch": 0.683862268720817, "grad_norm": 0.09857519716024399, "learning_rate": 9.507872537485295e-06, "loss": 8.6434, "step": 136940 }, { "epoch": 0.6839122075457564, "grad_norm": 0.09504196047782898, "learning_rate": 9.506370622543745e-06, "loss": 8.6584, "step": 136950 }, { "epoch": 0.6839621463706959, "grad_norm": 0.09877480566501617, "learning_rate": 9.504868707602192e-06, "loss": 8.6523, "step": 136960 }, { "epoch": 0.6840120851956354, "grad_norm": 0.09247778356075287, "learning_rate": 9.503366792660642e-06, "loss": 8.6504, "step": 136970 }, { "epoch": 0.6840620240205748, "grad_norm": 0.09239480644464493, "learning_rate": 9.501864877719092e-06, "loss": 8.6505, "step": 136980 }, { "epoch": 0.6841119628455142, "grad_norm": 0.09122372418642044, "learning_rate": 9.500362962777542e-06, "loss": 8.6395, "step": 136990 }, { "epoch": 0.6841619016704537, "grad_norm": 0.09300727397203445, "learning_rate": 9.498861047835992e-06, "loss": 8.6488, "step": 137000 }, { "epoch": 0.6842118404953932, "grad_norm": 0.0911245048046112, "learning_rate": 9.49735913289444e-06, "loss": 8.6665, "step": 137010 }, { "epoch": 0.6842617793203326, "grad_norm": 0.0955081507563591, "learning_rate": 9.49585721795289e-06, "loss": 8.6382, "step": 137020 }, { "epoch": 0.684311718145272, "grad_norm": 0.09284482151269913, "learning_rate": 9.49435530301134e-06, "loss": 8.6343, "step": 137030 }, { "epoch": 0.6843616569702115, "grad_norm": 0.09098115563392639, "learning_rate": 9.49285338806979e-06, "loss": 8.6491, "step": 137040 }, { "epoch": 0.684411595795151, "grad_norm": 0.09565536677837372, "learning_rate": 9.49135147312824e-06, "loss": 8.6573, "step": 137050 }, { "epoch": 0.6844615346200904, "grad_norm": 0.0895000547170639, "learning_rate": 9.489849558186688e-06, "loss": 8.6537, "step": 137060 }, { "epoch": 0.6845114734450298, "grad_norm": 0.09270433336496353, "learning_rate": 9.488347643245137e-06, "loss": 8.6309, "step": 137070 }, { "epoch": 0.6845614122699692, "grad_norm": 0.09027011692523956, "learning_rate": 9.486845728303587e-06, "loss": 8.6541, "step": 137080 }, { "epoch": 0.6846113510949088, "grad_norm": 0.08946502953767776, "learning_rate": 9.485343813362037e-06, "loss": 8.6389, "step": 137090 }, { "epoch": 0.6846612899198482, "grad_norm": 0.09274662286043167, "learning_rate": 9.483841898420487e-06, "loss": 8.6507, "step": 137100 }, { "epoch": 0.6847112287447876, "grad_norm": 0.09150737524032593, "learning_rate": 9.482339983478936e-06, "loss": 8.6462, "step": 137110 }, { "epoch": 0.684761167569727, "grad_norm": 0.09016991406679153, "learning_rate": 9.480838068537384e-06, "loss": 8.662, "step": 137120 }, { "epoch": 0.6848111063946666, "grad_norm": 0.09275011718273163, "learning_rate": 9.479336153595835e-06, "loss": 8.6538, "step": 137130 }, { "epoch": 0.684861045219606, "grad_norm": 0.09211241453886032, "learning_rate": 9.477834238654285e-06, "loss": 8.6423, "step": 137140 }, { "epoch": 0.6849109840445454, "grad_norm": 0.09323053061962128, "learning_rate": 9.476332323712735e-06, "loss": 8.6311, "step": 137150 }, { "epoch": 0.6849609228694848, "grad_norm": 0.0909585952758789, "learning_rate": 9.474830408771183e-06, "loss": 8.6413, "step": 137160 }, { "epoch": 0.6850108616944244, "grad_norm": 0.09803111851215363, "learning_rate": 9.473328493829634e-06, "loss": 8.6599, "step": 137170 }, { "epoch": 0.6850608005193638, "grad_norm": 0.09382762759923935, "learning_rate": 9.471826578888082e-06, "loss": 8.6395, "step": 137180 }, { "epoch": 0.6851107393443032, "grad_norm": 0.09193086624145508, "learning_rate": 9.470324663946532e-06, "loss": 8.6454, "step": 137190 }, { "epoch": 0.6851606781692426, "grad_norm": 0.0866667628288269, "learning_rate": 9.468822749004982e-06, "loss": 8.6586, "step": 137200 }, { "epoch": 0.6852106169941822, "grad_norm": 0.09309227019548416, "learning_rate": 9.467320834063431e-06, "loss": 8.6427, "step": 137210 }, { "epoch": 0.6852605558191216, "grad_norm": 0.091177798807621, "learning_rate": 9.465818919121881e-06, "loss": 8.6524, "step": 137220 }, { "epoch": 0.685310494644061, "grad_norm": 0.09202263504266739, "learning_rate": 9.46431700418033e-06, "loss": 8.6487, "step": 137230 }, { "epoch": 0.6853604334690004, "grad_norm": 0.0925859585404396, "learning_rate": 9.46281508923878e-06, "loss": 8.6511, "step": 137240 }, { "epoch": 0.68541037229394, "grad_norm": 0.09529491513967514, "learning_rate": 9.46131317429723e-06, "loss": 8.6417, "step": 137250 }, { "epoch": 0.6854603111188794, "grad_norm": 0.09451477974653244, "learning_rate": 9.459811259355678e-06, "loss": 8.6519, "step": 137260 }, { "epoch": 0.6855102499438188, "grad_norm": 0.08990958333015442, "learning_rate": 9.458309344414129e-06, "loss": 8.647, "step": 137270 }, { "epoch": 0.6855601887687582, "grad_norm": 0.09311163425445557, "learning_rate": 9.456807429472577e-06, "loss": 8.642, "step": 137280 }, { "epoch": 0.6856101275936978, "grad_norm": 0.09261354058980942, "learning_rate": 9.455305514531027e-06, "loss": 8.6528, "step": 137290 }, { "epoch": 0.6856600664186372, "grad_norm": 0.09306151419878006, "learning_rate": 9.453803599589477e-06, "loss": 8.6436, "step": 137300 }, { "epoch": 0.6857100052435766, "grad_norm": 0.09042923152446747, "learning_rate": 9.452301684647926e-06, "loss": 8.6518, "step": 137310 }, { "epoch": 0.685759944068516, "grad_norm": 0.09723861515522003, "learning_rate": 9.450799769706376e-06, "loss": 8.6463, "step": 137320 }, { "epoch": 0.6858098828934556, "grad_norm": 0.08681852370500565, "learning_rate": 9.449297854764826e-06, "loss": 8.6446, "step": 137330 }, { "epoch": 0.685859821718395, "grad_norm": 0.09019467979669571, "learning_rate": 9.447795939823275e-06, "loss": 8.6482, "step": 137340 }, { "epoch": 0.6859097605433344, "grad_norm": 0.08701999485492706, "learning_rate": 9.446294024881725e-06, "loss": 8.6666, "step": 137350 }, { "epoch": 0.6859596993682738, "grad_norm": 0.09151817858219147, "learning_rate": 9.444792109940173e-06, "loss": 8.6528, "step": 137360 }, { "epoch": 0.6860096381932134, "grad_norm": 0.0913306251168251, "learning_rate": 9.443290194998624e-06, "loss": 8.6475, "step": 137370 }, { "epoch": 0.6860595770181528, "grad_norm": 0.09225058555603027, "learning_rate": 9.441788280057074e-06, "loss": 8.6478, "step": 137380 }, { "epoch": 0.6861095158430922, "grad_norm": 0.09292393922805786, "learning_rate": 9.440286365115522e-06, "loss": 8.6571, "step": 137390 }, { "epoch": 0.6861594546680316, "grad_norm": 0.09331595152616501, "learning_rate": 9.438784450173972e-06, "loss": 8.6414, "step": 137400 }, { "epoch": 0.6862093934929712, "grad_norm": 0.09516488760709763, "learning_rate": 9.437282535232421e-06, "loss": 8.6387, "step": 137410 }, { "epoch": 0.6862593323179106, "grad_norm": 0.09467773884534836, "learning_rate": 9.435780620290871e-06, "loss": 8.6409, "step": 137420 }, { "epoch": 0.68630927114285, "grad_norm": 0.09190910309553146, "learning_rate": 9.434278705349321e-06, "loss": 8.6683, "step": 137430 }, { "epoch": 0.6863592099677894, "grad_norm": 0.09423499554395676, "learning_rate": 9.43277679040777e-06, "loss": 8.6736, "step": 137440 }, { "epoch": 0.686409148792729, "grad_norm": 0.0923381969332695, "learning_rate": 9.43127487546622e-06, "loss": 8.6486, "step": 137450 }, { "epoch": 0.6864590876176684, "grad_norm": 0.08919500559568405, "learning_rate": 9.429772960524668e-06, "loss": 8.6351, "step": 137460 }, { "epoch": 0.6865090264426078, "grad_norm": 0.0964340791106224, "learning_rate": 9.428271045583119e-06, "loss": 8.6507, "step": 137470 }, { "epoch": 0.6865589652675472, "grad_norm": 0.09165029972791672, "learning_rate": 9.426769130641569e-06, "loss": 8.6495, "step": 137480 }, { "epoch": 0.6866089040924866, "grad_norm": 0.09624398499727249, "learning_rate": 9.425267215700019e-06, "loss": 8.6515, "step": 137490 }, { "epoch": 0.6866588429174262, "grad_norm": 0.09847095608711243, "learning_rate": 9.423765300758467e-06, "loss": 8.6554, "step": 137500 }, { "epoch": 0.6867087817423656, "grad_norm": 0.0960099995136261, "learning_rate": 9.422263385816916e-06, "loss": 8.6467, "step": 137510 }, { "epoch": 0.686758720567305, "grad_norm": 0.09143520146608353, "learning_rate": 9.420761470875366e-06, "loss": 8.6458, "step": 137520 }, { "epoch": 0.6868086593922444, "grad_norm": 0.08999507874250412, "learning_rate": 9.419259555933816e-06, "loss": 8.6486, "step": 137530 }, { "epoch": 0.686858598217184, "grad_norm": 0.0936044380068779, "learning_rate": 9.417757640992266e-06, "loss": 8.6722, "step": 137540 }, { "epoch": 0.6869085370421234, "grad_norm": 0.08734313398599625, "learning_rate": 9.416255726050715e-06, "loss": 8.6346, "step": 137550 }, { "epoch": 0.6869584758670628, "grad_norm": 0.09482523053884506, "learning_rate": 9.414753811109163e-06, "loss": 8.6509, "step": 137560 }, { "epoch": 0.6870084146920022, "grad_norm": 0.08811195939779282, "learning_rate": 9.413251896167614e-06, "loss": 8.6456, "step": 137570 }, { "epoch": 0.6870583535169418, "grad_norm": 0.09007152169942856, "learning_rate": 9.411749981226064e-06, "loss": 8.6496, "step": 137580 }, { "epoch": 0.6871082923418812, "grad_norm": 0.09563975781202316, "learning_rate": 9.410248066284514e-06, "loss": 8.6543, "step": 137590 }, { "epoch": 0.6871582311668206, "grad_norm": 0.09100675582885742, "learning_rate": 9.408746151342962e-06, "loss": 8.6472, "step": 137600 }, { "epoch": 0.68720816999176, "grad_norm": 0.09444539994001389, "learning_rate": 9.407244236401411e-06, "loss": 8.652, "step": 137610 }, { "epoch": 0.6872581088166996, "grad_norm": 0.09386273473501205, "learning_rate": 9.405742321459861e-06, "loss": 8.6341, "step": 137620 }, { "epoch": 0.687308047641639, "grad_norm": 0.09169203042984009, "learning_rate": 9.404240406518311e-06, "loss": 8.6365, "step": 137630 }, { "epoch": 0.6873579864665784, "grad_norm": 0.0909794345498085, "learning_rate": 9.402738491576761e-06, "loss": 8.6598, "step": 137640 }, { "epoch": 0.6874079252915178, "grad_norm": 0.08933387696743011, "learning_rate": 9.40123657663521e-06, "loss": 8.6547, "step": 137650 }, { "epoch": 0.6874578641164574, "grad_norm": 0.097355417907238, "learning_rate": 9.399734661693658e-06, "loss": 8.6451, "step": 137660 }, { "epoch": 0.6875078029413968, "grad_norm": 0.08841703087091446, "learning_rate": 9.398232746752109e-06, "loss": 8.6566, "step": 137670 }, { "epoch": 0.6875577417663362, "grad_norm": 0.09026035666465759, "learning_rate": 9.396730831810559e-06, "loss": 8.6522, "step": 137680 }, { "epoch": 0.6876076805912756, "grad_norm": 0.09169178456068039, "learning_rate": 9.395228916869009e-06, "loss": 8.6437, "step": 137690 }, { "epoch": 0.6876576194162152, "grad_norm": 0.08948150277137756, "learning_rate": 9.393727001927459e-06, "loss": 8.6354, "step": 137700 }, { "epoch": 0.6877075582411546, "grad_norm": 0.09596080332994461, "learning_rate": 9.392225086985906e-06, "loss": 8.637, "step": 137710 }, { "epoch": 0.687757497066094, "grad_norm": 0.09012558311223984, "learning_rate": 9.390723172044356e-06, "loss": 8.6362, "step": 137720 }, { "epoch": 0.6878074358910334, "grad_norm": 0.09151880443096161, "learning_rate": 9.389221257102806e-06, "loss": 8.643, "step": 137730 }, { "epoch": 0.687857374715973, "grad_norm": 0.09232999384403229, "learning_rate": 9.387719342161256e-06, "loss": 8.648, "step": 137740 }, { "epoch": 0.6879073135409124, "grad_norm": 0.08922406286001205, "learning_rate": 9.386217427219707e-06, "loss": 8.6526, "step": 137750 }, { "epoch": 0.6879572523658518, "grad_norm": 0.09624180942773819, "learning_rate": 9.384715512278153e-06, "loss": 8.6546, "step": 137760 }, { "epoch": 0.6880071911907912, "grad_norm": 0.09329060465097427, "learning_rate": 9.383213597336604e-06, "loss": 8.652, "step": 137770 }, { "epoch": 0.6880571300157308, "grad_norm": 0.08916980773210526, "learning_rate": 9.381711682395054e-06, "loss": 8.6219, "step": 137780 }, { "epoch": 0.6881070688406702, "grad_norm": 0.08881982415914536, "learning_rate": 9.380209767453504e-06, "loss": 8.6483, "step": 137790 }, { "epoch": 0.6881570076656096, "grad_norm": 0.09591236710548401, "learning_rate": 9.378707852511954e-06, "loss": 8.6547, "step": 137800 }, { "epoch": 0.688206946490549, "grad_norm": 0.0902673676609993, "learning_rate": 9.377205937570401e-06, "loss": 8.6516, "step": 137810 }, { "epoch": 0.6882568853154886, "grad_norm": 0.09228136390447617, "learning_rate": 9.375704022628851e-06, "loss": 8.6433, "step": 137820 }, { "epoch": 0.688306824140428, "grad_norm": 0.09464501589536667, "learning_rate": 9.374202107687301e-06, "loss": 8.6375, "step": 137830 }, { "epoch": 0.6883567629653674, "grad_norm": 0.09438979625701904, "learning_rate": 9.372700192745751e-06, "loss": 8.6417, "step": 137840 }, { "epoch": 0.6884067017903068, "grad_norm": 0.08938003331422806, "learning_rate": 9.371198277804202e-06, "loss": 8.6508, "step": 137850 }, { "epoch": 0.6884566406152464, "grad_norm": 0.09114663302898407, "learning_rate": 9.36969636286265e-06, "loss": 8.6371, "step": 137860 }, { "epoch": 0.6885065794401858, "grad_norm": 0.09571482241153717, "learning_rate": 9.368194447921099e-06, "loss": 8.6337, "step": 137870 }, { "epoch": 0.6885565182651252, "grad_norm": 0.08686354756355286, "learning_rate": 9.366692532979549e-06, "loss": 8.6501, "step": 137880 }, { "epoch": 0.6886064570900646, "grad_norm": 0.09277797490358353, "learning_rate": 9.365190618037999e-06, "loss": 8.6464, "step": 137890 }, { "epoch": 0.6886563959150042, "grad_norm": 0.09338825196027756, "learning_rate": 9.363688703096449e-06, "loss": 8.6573, "step": 137900 }, { "epoch": 0.6887063347399436, "grad_norm": 0.09079349040985107, "learning_rate": 9.362186788154898e-06, "loss": 8.6472, "step": 137910 }, { "epoch": 0.688756273564883, "grad_norm": 0.09190376847982407, "learning_rate": 9.360684873213346e-06, "loss": 8.6445, "step": 137920 }, { "epoch": 0.6888062123898224, "grad_norm": 0.09575271606445312, "learning_rate": 9.359182958271796e-06, "loss": 8.6442, "step": 137930 }, { "epoch": 0.688856151214762, "grad_norm": 0.09022709727287292, "learning_rate": 9.357681043330246e-06, "loss": 8.6403, "step": 137940 }, { "epoch": 0.6889060900397014, "grad_norm": 0.09948229044675827, "learning_rate": 9.356179128388697e-06, "loss": 8.636, "step": 137950 }, { "epoch": 0.6889560288646408, "grad_norm": 0.09341901540756226, "learning_rate": 9.354677213447145e-06, "loss": 8.6431, "step": 137960 }, { "epoch": 0.6890059676895802, "grad_norm": 0.09408657997846603, "learning_rate": 9.353175298505594e-06, "loss": 8.6304, "step": 137970 }, { "epoch": 0.6890559065145198, "grad_norm": 0.09293026477098465, "learning_rate": 9.351673383564044e-06, "loss": 8.6281, "step": 137980 }, { "epoch": 0.6891058453394592, "grad_norm": 0.09260407835245132, "learning_rate": 9.350171468622494e-06, "loss": 8.6426, "step": 137990 }, { "epoch": 0.6891557841643986, "grad_norm": 0.09389134496450424, "learning_rate": 9.348669553680944e-06, "loss": 8.6538, "step": 138000 }, { "epoch": 0.689205722989338, "grad_norm": 0.09285546839237213, "learning_rate": 9.347167638739393e-06, "loss": 8.6568, "step": 138010 }, { "epoch": 0.6892556618142776, "grad_norm": 0.0875903069972992, "learning_rate": 9.345665723797843e-06, "loss": 8.6533, "step": 138020 }, { "epoch": 0.689305600639217, "grad_norm": 0.08949259668588638, "learning_rate": 9.344163808856291e-06, "loss": 8.6302, "step": 138030 }, { "epoch": 0.6893555394641564, "grad_norm": 0.09010973572731018, "learning_rate": 9.342661893914741e-06, "loss": 8.6442, "step": 138040 }, { "epoch": 0.6894054782890958, "grad_norm": 0.09066742658615112, "learning_rate": 9.341159978973192e-06, "loss": 8.6377, "step": 138050 }, { "epoch": 0.6894554171140354, "grad_norm": 0.09108269959688187, "learning_rate": 9.33965806403164e-06, "loss": 8.6662, "step": 138060 }, { "epoch": 0.6895053559389748, "grad_norm": 0.08662552386522293, "learning_rate": 9.33815614909009e-06, "loss": 8.6386, "step": 138070 }, { "epoch": 0.6895552947639142, "grad_norm": 0.08738676458597183, "learning_rate": 9.336654234148539e-06, "loss": 8.6379, "step": 138080 }, { "epoch": 0.6896052335888536, "grad_norm": 0.0980435162782669, "learning_rate": 9.335152319206989e-06, "loss": 8.6349, "step": 138090 }, { "epoch": 0.6896551724137931, "grad_norm": 0.09144052863121033, "learning_rate": 9.333650404265439e-06, "loss": 8.638, "step": 138100 }, { "epoch": 0.6897051112387326, "grad_norm": 0.09114320576190948, "learning_rate": 9.332148489323888e-06, "loss": 8.6406, "step": 138110 }, { "epoch": 0.689755050063672, "grad_norm": 0.09149006754159927, "learning_rate": 9.330646574382338e-06, "loss": 8.6525, "step": 138120 }, { "epoch": 0.6898049888886114, "grad_norm": 0.0991503894329071, "learning_rate": 9.329144659440786e-06, "loss": 8.6523, "step": 138130 }, { "epoch": 0.689854927713551, "grad_norm": 0.09777787327766418, "learning_rate": 9.327642744499236e-06, "loss": 8.6357, "step": 138140 }, { "epoch": 0.6899048665384904, "grad_norm": 0.09248573333024979, "learning_rate": 9.326140829557687e-06, "loss": 8.6435, "step": 138150 }, { "epoch": 0.6899548053634298, "grad_norm": 0.09282256662845612, "learning_rate": 9.324638914616135e-06, "loss": 8.6544, "step": 138160 }, { "epoch": 0.6900047441883692, "grad_norm": 0.08951915800571442, "learning_rate": 9.323136999674585e-06, "loss": 8.6397, "step": 138170 }, { "epoch": 0.6900546830133087, "grad_norm": 0.09212198853492737, "learning_rate": 9.321635084733035e-06, "loss": 8.6415, "step": 138180 }, { "epoch": 0.6901046218382482, "grad_norm": 0.09542210400104523, "learning_rate": 9.320133169791484e-06, "loss": 8.6362, "step": 138190 }, { "epoch": 0.6901545606631876, "grad_norm": 0.08942250162363052, "learning_rate": 9.318631254849934e-06, "loss": 8.6416, "step": 138200 }, { "epoch": 0.690204499488127, "grad_norm": 0.08706214278936386, "learning_rate": 9.317129339908383e-06, "loss": 8.6381, "step": 138210 }, { "epoch": 0.6902544383130665, "grad_norm": 0.09015882015228271, "learning_rate": 9.315627424966833e-06, "loss": 8.6384, "step": 138220 }, { "epoch": 0.690304377138006, "grad_norm": 0.09370545297861099, "learning_rate": 9.314125510025283e-06, "loss": 8.6408, "step": 138230 }, { "epoch": 0.6903543159629454, "grad_norm": 0.08900182694196701, "learning_rate": 9.312623595083731e-06, "loss": 8.6495, "step": 138240 }, { "epoch": 0.6904042547878848, "grad_norm": 0.08960168808698654, "learning_rate": 9.311121680142182e-06, "loss": 8.6513, "step": 138250 }, { "epoch": 0.6904541936128243, "grad_norm": 0.09003665298223495, "learning_rate": 9.30961976520063e-06, "loss": 8.6419, "step": 138260 }, { "epoch": 0.6905041324377638, "grad_norm": 0.09128106385469437, "learning_rate": 9.30811785025908e-06, "loss": 8.6406, "step": 138270 }, { "epoch": 0.6905540712627032, "grad_norm": 0.09446307271718979, "learning_rate": 9.30661593531753e-06, "loss": 8.6382, "step": 138280 }, { "epoch": 0.6906040100876426, "grad_norm": 0.08831710368394852, "learning_rate": 9.305114020375979e-06, "loss": 8.6417, "step": 138290 }, { "epoch": 0.6906539489125821, "grad_norm": 0.08965827524662018, "learning_rate": 9.303612105434429e-06, "loss": 8.633, "step": 138300 }, { "epoch": 0.6907038877375216, "grad_norm": 0.09387178719043732, "learning_rate": 9.302110190492878e-06, "loss": 8.6404, "step": 138310 }, { "epoch": 0.690753826562461, "grad_norm": 0.0937371626496315, "learning_rate": 9.300608275551328e-06, "loss": 8.6382, "step": 138320 }, { "epoch": 0.6908037653874004, "grad_norm": 0.08978761732578278, "learning_rate": 9.299106360609778e-06, "loss": 8.6647, "step": 138330 }, { "epoch": 0.6908537042123399, "grad_norm": 0.09310397505760193, "learning_rate": 9.297604445668228e-06, "loss": 8.6311, "step": 138340 }, { "epoch": 0.6909036430372794, "grad_norm": 0.0897870808839798, "learning_rate": 9.296102530726677e-06, "loss": 8.6466, "step": 138350 }, { "epoch": 0.6909535818622188, "grad_norm": 0.09369547665119171, "learning_rate": 9.294600615785127e-06, "loss": 8.6451, "step": 138360 }, { "epoch": 0.6910035206871582, "grad_norm": 0.09182377904653549, "learning_rate": 9.293098700843575e-06, "loss": 8.636, "step": 138370 }, { "epoch": 0.6910534595120977, "grad_norm": 0.09317004680633545, "learning_rate": 9.291596785902025e-06, "loss": 8.6357, "step": 138380 }, { "epoch": 0.6911033983370372, "grad_norm": 0.09063698351383209, "learning_rate": 9.290094870960476e-06, "loss": 8.6374, "step": 138390 }, { "epoch": 0.6911533371619766, "grad_norm": 0.09114688634872437, "learning_rate": 9.288592956018924e-06, "loss": 8.6364, "step": 138400 }, { "epoch": 0.691203275986916, "grad_norm": 0.08719582855701447, "learning_rate": 9.287091041077374e-06, "loss": 8.6513, "step": 138410 }, { "epoch": 0.6912532148118555, "grad_norm": 0.09610260277986526, "learning_rate": 9.285589126135823e-06, "loss": 8.6279, "step": 138420 }, { "epoch": 0.691303153636795, "grad_norm": 0.08931412547826767, "learning_rate": 9.284087211194273e-06, "loss": 8.6179, "step": 138430 }, { "epoch": 0.6913530924617344, "grad_norm": 0.09096454083919525, "learning_rate": 9.282585296252723e-06, "loss": 8.6538, "step": 138440 }, { "epoch": 0.6914030312866738, "grad_norm": 0.09301277250051498, "learning_rate": 9.281083381311172e-06, "loss": 8.6439, "step": 138450 }, { "epoch": 0.6914529701116132, "grad_norm": 0.09470412135124207, "learning_rate": 9.279581466369622e-06, "loss": 8.6572, "step": 138460 }, { "epoch": 0.6915029089365528, "grad_norm": 0.08895006030797958, "learning_rate": 9.27807955142807e-06, "loss": 8.6397, "step": 138470 }, { "epoch": 0.6915528477614922, "grad_norm": 0.09335384517908096, "learning_rate": 9.27657763648652e-06, "loss": 8.6424, "step": 138480 }, { "epoch": 0.6916027865864316, "grad_norm": 0.08956567943096161, "learning_rate": 9.27507572154497e-06, "loss": 8.6657, "step": 138490 }, { "epoch": 0.691652725411371, "grad_norm": 0.08856837451457977, "learning_rate": 9.273573806603421e-06, "loss": 8.6263, "step": 138500 }, { "epoch": 0.6917026642363105, "grad_norm": 0.09013232588768005, "learning_rate": 9.27207189166187e-06, "loss": 8.6193, "step": 138510 }, { "epoch": 0.69175260306125, "grad_norm": 0.09470102936029434, "learning_rate": 9.270569976720318e-06, "loss": 8.6281, "step": 138520 }, { "epoch": 0.6918025418861894, "grad_norm": 0.08966551721096039, "learning_rate": 9.269068061778768e-06, "loss": 8.6274, "step": 138530 }, { "epoch": 0.6918524807111288, "grad_norm": 0.09647005796432495, "learning_rate": 9.267566146837218e-06, "loss": 8.652, "step": 138540 }, { "epoch": 0.6919024195360683, "grad_norm": 0.08961433917284012, "learning_rate": 9.266064231895668e-06, "loss": 8.634, "step": 138550 }, { "epoch": 0.6919523583610078, "grad_norm": 0.09530511498451233, "learning_rate": 9.264562316954117e-06, "loss": 8.6453, "step": 138560 }, { "epoch": 0.6920022971859472, "grad_norm": 0.09226834028959274, "learning_rate": 9.263060402012565e-06, "loss": 8.6172, "step": 138570 }, { "epoch": 0.6920522360108866, "grad_norm": 0.09644404798746109, "learning_rate": 9.261558487071016e-06, "loss": 8.6318, "step": 138580 }, { "epoch": 0.6921021748358261, "grad_norm": 0.08702649176120758, "learning_rate": 9.260056572129466e-06, "loss": 8.6606, "step": 138590 }, { "epoch": 0.6921521136607656, "grad_norm": 0.08770040422677994, "learning_rate": 9.258554657187916e-06, "loss": 8.6342, "step": 138600 }, { "epoch": 0.692202052485705, "grad_norm": 0.09810072183609009, "learning_rate": 9.257052742246364e-06, "loss": 8.6444, "step": 138610 }, { "epoch": 0.6922519913106444, "grad_norm": 0.09160198271274567, "learning_rate": 9.255550827304813e-06, "loss": 8.6532, "step": 138620 }, { "epoch": 0.6923019301355839, "grad_norm": 0.09566731005907059, "learning_rate": 9.254048912363263e-06, "loss": 8.6305, "step": 138630 }, { "epoch": 0.6923518689605234, "grad_norm": 0.0926600992679596, "learning_rate": 9.252546997421713e-06, "loss": 8.6613, "step": 138640 }, { "epoch": 0.6924018077854628, "grad_norm": 0.0963670089840889, "learning_rate": 9.251045082480163e-06, "loss": 8.6392, "step": 138650 }, { "epoch": 0.6924517466104022, "grad_norm": 0.09352785348892212, "learning_rate": 9.249543167538614e-06, "loss": 8.6547, "step": 138660 }, { "epoch": 0.6925016854353417, "grad_norm": 0.08976289629936218, "learning_rate": 9.24804125259706e-06, "loss": 8.6306, "step": 138670 }, { "epoch": 0.6925516242602812, "grad_norm": 0.0948214903473854, "learning_rate": 9.24653933765551e-06, "loss": 8.6406, "step": 138680 }, { "epoch": 0.6926015630852206, "grad_norm": 0.08709558844566345, "learning_rate": 9.24503742271396e-06, "loss": 8.6603, "step": 138690 }, { "epoch": 0.69265150191016, "grad_norm": 0.09074770659208298, "learning_rate": 9.243535507772411e-06, "loss": 8.6567, "step": 138700 }, { "epoch": 0.6927014407350995, "grad_norm": 0.08999824523925781, "learning_rate": 9.242033592830861e-06, "loss": 8.659, "step": 138710 }, { "epoch": 0.692751379560039, "grad_norm": 0.09150887280702591, "learning_rate": 9.240531677889308e-06, "loss": 8.6724, "step": 138720 }, { "epoch": 0.6928013183849784, "grad_norm": 0.09337803721427917, "learning_rate": 9.239029762947758e-06, "loss": 8.64, "step": 138730 }, { "epoch": 0.6928512572099178, "grad_norm": 0.09013208001852036, "learning_rate": 9.237527848006208e-06, "loss": 8.6463, "step": 138740 }, { "epoch": 0.6929011960348573, "grad_norm": 0.08989313244819641, "learning_rate": 9.236025933064658e-06, "loss": 8.6294, "step": 138750 }, { "epoch": 0.6929511348597968, "grad_norm": 0.09004274755716324, "learning_rate": 9.234524018123109e-06, "loss": 8.6373, "step": 138760 }, { "epoch": 0.6930010736847362, "grad_norm": 0.09396898746490479, "learning_rate": 9.233022103181555e-06, "loss": 8.6434, "step": 138770 }, { "epoch": 0.6930510125096756, "grad_norm": 0.08722883462905884, "learning_rate": 9.231520188240006e-06, "loss": 8.6448, "step": 138780 }, { "epoch": 0.6931009513346151, "grad_norm": 0.0960550382733345, "learning_rate": 9.230018273298456e-06, "loss": 8.6277, "step": 138790 }, { "epoch": 0.6931508901595546, "grad_norm": 0.09497455507516861, "learning_rate": 9.228516358356906e-06, "loss": 8.6294, "step": 138800 }, { "epoch": 0.693200828984494, "grad_norm": 0.086859792470932, "learning_rate": 9.227014443415356e-06, "loss": 8.632, "step": 138810 }, { "epoch": 0.6932507678094334, "grad_norm": 0.08842300623655319, "learning_rate": 9.225512528473805e-06, "loss": 8.6624, "step": 138820 }, { "epoch": 0.6933007066343729, "grad_norm": 0.09223365783691406, "learning_rate": 9.224010613532253e-06, "loss": 8.6479, "step": 138830 }, { "epoch": 0.6933506454593124, "grad_norm": 0.09159765392541885, "learning_rate": 9.222508698590703e-06, "loss": 8.6521, "step": 138840 }, { "epoch": 0.6934005842842518, "grad_norm": 0.0935441255569458, "learning_rate": 9.221006783649153e-06, "loss": 8.6378, "step": 138850 }, { "epoch": 0.6934505231091912, "grad_norm": 0.09290704131126404, "learning_rate": 9.219504868707604e-06, "loss": 8.6148, "step": 138860 }, { "epoch": 0.6935004619341307, "grad_norm": 0.08997565507888794, "learning_rate": 9.218002953766052e-06, "loss": 8.6324, "step": 138870 }, { "epoch": 0.6935504007590702, "grad_norm": 0.09210515767335892, "learning_rate": 9.2165010388245e-06, "loss": 8.6539, "step": 138880 }, { "epoch": 0.6936003395840096, "grad_norm": 0.09159453958272934, "learning_rate": 9.21499912388295e-06, "loss": 8.6455, "step": 138890 }, { "epoch": 0.693650278408949, "grad_norm": 0.09023798257112503, "learning_rate": 9.213497208941401e-06, "loss": 8.6536, "step": 138900 }, { "epoch": 0.6937002172338885, "grad_norm": 0.08913420885801315, "learning_rate": 9.211995293999851e-06, "loss": 8.6457, "step": 138910 }, { "epoch": 0.693750156058828, "grad_norm": 0.09656532853841782, "learning_rate": 9.2104933790583e-06, "loss": 8.6271, "step": 138920 }, { "epoch": 0.6938000948837674, "grad_norm": 0.08836175501346588, "learning_rate": 9.208991464116748e-06, "loss": 8.6521, "step": 138930 }, { "epoch": 0.6938500337087068, "grad_norm": 0.09175171703100204, "learning_rate": 9.207489549175198e-06, "loss": 8.6369, "step": 138940 }, { "epoch": 0.6938999725336463, "grad_norm": 0.09058476984500885, "learning_rate": 9.205987634233648e-06, "loss": 8.637, "step": 138950 }, { "epoch": 0.6939499113585857, "grad_norm": 0.08583276718854904, "learning_rate": 9.204485719292099e-06, "loss": 8.6521, "step": 138960 }, { "epoch": 0.6939998501835252, "grad_norm": 0.09340415894985199, "learning_rate": 9.202983804350547e-06, "loss": 8.645, "step": 138970 }, { "epoch": 0.6940497890084646, "grad_norm": 0.09227906912565231, "learning_rate": 9.201481889408997e-06, "loss": 8.6404, "step": 138980 }, { "epoch": 0.6940997278334041, "grad_norm": 0.09200102090835571, "learning_rate": 9.199979974467446e-06, "loss": 8.6315, "step": 138990 }, { "epoch": 0.6941496666583435, "grad_norm": 0.09255097806453705, "learning_rate": 9.198478059525896e-06, "loss": 8.6211, "step": 139000 }, { "epoch": 0.694199605483283, "grad_norm": 0.09157124161720276, "learning_rate": 9.196976144584346e-06, "loss": 8.648, "step": 139010 }, { "epoch": 0.6942495443082224, "grad_norm": 0.09558318555355072, "learning_rate": 9.195474229642795e-06, "loss": 8.6472, "step": 139020 }, { "epoch": 0.6942994831331619, "grad_norm": 0.09198082983493805, "learning_rate": 9.193972314701245e-06, "loss": 8.6296, "step": 139030 }, { "epoch": 0.6943494219581013, "grad_norm": 0.09510613977909088, "learning_rate": 9.192470399759693e-06, "loss": 8.6449, "step": 139040 }, { "epoch": 0.6943993607830408, "grad_norm": 0.0944787859916687, "learning_rate": 9.190968484818143e-06, "loss": 8.6387, "step": 139050 }, { "epoch": 0.6944492996079802, "grad_norm": 0.09569986164569855, "learning_rate": 9.189466569876594e-06, "loss": 8.6267, "step": 139060 }, { "epoch": 0.6944992384329197, "grad_norm": 0.09105094522237778, "learning_rate": 9.187964654935042e-06, "loss": 8.6455, "step": 139070 }, { "epoch": 0.6945491772578591, "grad_norm": 0.09231606870889664, "learning_rate": 9.186462739993492e-06, "loss": 8.6481, "step": 139080 }, { "epoch": 0.6945991160827986, "grad_norm": 0.09214016050100327, "learning_rate": 9.18496082505194e-06, "loss": 8.637, "step": 139090 }, { "epoch": 0.694649054907738, "grad_norm": 0.09304431825876236, "learning_rate": 9.183458910110391e-06, "loss": 8.6331, "step": 139100 }, { "epoch": 0.6946989937326775, "grad_norm": 0.09022108465433121, "learning_rate": 9.181956995168841e-06, "loss": 8.6322, "step": 139110 }, { "epoch": 0.6947489325576169, "grad_norm": 0.09341921657323837, "learning_rate": 9.18045508022729e-06, "loss": 8.6466, "step": 139120 }, { "epoch": 0.6947988713825564, "grad_norm": 0.09652432799339294, "learning_rate": 9.17895316528574e-06, "loss": 8.6152, "step": 139130 }, { "epoch": 0.6948488102074958, "grad_norm": 0.08961055427789688, "learning_rate": 9.17745125034419e-06, "loss": 8.6514, "step": 139140 }, { "epoch": 0.6948987490324353, "grad_norm": 0.09760846197605133, "learning_rate": 9.175949335402638e-06, "loss": 8.635, "step": 139150 }, { "epoch": 0.6949486878573747, "grad_norm": 0.0923495665192604, "learning_rate": 9.174447420461089e-06, "loss": 8.6445, "step": 139160 }, { "epoch": 0.6949986266823142, "grad_norm": 0.09096313267946243, "learning_rate": 9.172945505519537e-06, "loss": 8.6452, "step": 139170 }, { "epoch": 0.6950485655072536, "grad_norm": 0.09231965243816376, "learning_rate": 9.171443590577987e-06, "loss": 8.6316, "step": 139180 }, { "epoch": 0.6950985043321931, "grad_norm": 0.09298811107873917, "learning_rate": 9.169941675636437e-06, "loss": 8.6394, "step": 139190 }, { "epoch": 0.6951484431571325, "grad_norm": 0.08991467207670212, "learning_rate": 9.168439760694886e-06, "loss": 8.6401, "step": 139200 }, { "epoch": 0.695198381982072, "grad_norm": 0.09446527063846588, "learning_rate": 9.166937845753336e-06, "loss": 8.6471, "step": 139210 }, { "epoch": 0.6952483208070114, "grad_norm": 0.09214930981397629, "learning_rate": 9.165435930811785e-06, "loss": 8.6469, "step": 139220 }, { "epoch": 0.6952982596319509, "grad_norm": 0.09560856968164444, "learning_rate": 9.163934015870235e-06, "loss": 8.6143, "step": 139230 }, { "epoch": 0.6953481984568903, "grad_norm": 0.09127038717269897, "learning_rate": 9.162432100928685e-06, "loss": 8.6509, "step": 139240 }, { "epoch": 0.6953981372818298, "grad_norm": 0.09658581763505936, "learning_rate": 9.160930185987133e-06, "loss": 8.6312, "step": 139250 }, { "epoch": 0.6954480761067692, "grad_norm": 0.08580411970615387, "learning_rate": 9.159428271045584e-06, "loss": 8.6339, "step": 139260 }, { "epoch": 0.6954980149317087, "grad_norm": 0.0958205908536911, "learning_rate": 9.157926356104032e-06, "loss": 8.6357, "step": 139270 }, { "epoch": 0.6955479537566481, "grad_norm": 0.09207163751125336, "learning_rate": 9.156424441162482e-06, "loss": 8.6253, "step": 139280 }, { "epoch": 0.6955978925815876, "grad_norm": 0.09957921504974365, "learning_rate": 9.154922526220932e-06, "loss": 8.6448, "step": 139290 }, { "epoch": 0.695647831406527, "grad_norm": 0.08785407990217209, "learning_rate": 9.153420611279383e-06, "loss": 8.6244, "step": 139300 }, { "epoch": 0.6956977702314665, "grad_norm": 0.08667879551649094, "learning_rate": 9.151918696337831e-06, "loss": 8.6377, "step": 139310 }, { "epoch": 0.6957477090564059, "grad_norm": 0.09369443356990814, "learning_rate": 9.15041678139628e-06, "loss": 8.6415, "step": 139320 }, { "epoch": 0.6957976478813453, "grad_norm": 0.08814938366413116, "learning_rate": 9.14891486645473e-06, "loss": 8.629, "step": 139330 }, { "epoch": 0.6958475867062848, "grad_norm": 0.0961841344833374, "learning_rate": 9.14741295151318e-06, "loss": 8.6497, "step": 139340 }, { "epoch": 0.6958975255312243, "grad_norm": 0.09453871101140976, "learning_rate": 9.14591103657163e-06, "loss": 8.6451, "step": 139350 }, { "epoch": 0.6959474643561637, "grad_norm": 0.09213295578956604, "learning_rate": 9.144409121630079e-06, "loss": 8.6234, "step": 139360 }, { "epoch": 0.6959974031811031, "grad_norm": 0.08923090249300003, "learning_rate": 9.142907206688527e-06, "loss": 8.617, "step": 139370 }, { "epoch": 0.6960473420060426, "grad_norm": 0.09008591622114182, "learning_rate": 9.141405291746977e-06, "loss": 8.6443, "step": 139380 }, { "epoch": 0.6960972808309821, "grad_norm": 0.0908149927854538, "learning_rate": 9.139903376805427e-06, "loss": 8.6455, "step": 139390 }, { "epoch": 0.6961472196559215, "grad_norm": 0.09176421910524368, "learning_rate": 9.138401461863878e-06, "loss": 8.6389, "step": 139400 }, { "epoch": 0.696197158480861, "grad_norm": 0.09254884719848633, "learning_rate": 9.136899546922326e-06, "loss": 8.6449, "step": 139410 }, { "epoch": 0.6962470973058004, "grad_norm": 0.08738537132740021, "learning_rate": 9.135397631980775e-06, "loss": 8.6249, "step": 139420 }, { "epoch": 0.6962970361307398, "grad_norm": 0.09267020225524902, "learning_rate": 9.133895717039225e-06, "loss": 8.6208, "step": 139430 }, { "epoch": 0.6963469749556793, "grad_norm": 0.09037657082080841, "learning_rate": 9.132393802097675e-06, "loss": 8.6347, "step": 139440 }, { "epoch": 0.6963969137806187, "grad_norm": 0.08870069682598114, "learning_rate": 9.130891887156125e-06, "loss": 8.6323, "step": 139450 }, { "epoch": 0.6964468526055582, "grad_norm": 0.08991122990846634, "learning_rate": 9.129389972214575e-06, "loss": 8.6758, "step": 139460 }, { "epoch": 0.6964967914304976, "grad_norm": 0.08251538872718811, "learning_rate": 9.127888057273022e-06, "loss": 8.6402, "step": 139470 }, { "epoch": 0.6965467302554371, "grad_norm": 0.08768171072006226, "learning_rate": 9.126386142331472e-06, "loss": 8.6293, "step": 139480 }, { "epoch": 0.6965966690803765, "grad_norm": 0.09158322215080261, "learning_rate": 9.124884227389922e-06, "loss": 8.6397, "step": 139490 }, { "epoch": 0.696646607905316, "grad_norm": 0.08981853723526001, "learning_rate": 9.123382312448373e-06, "loss": 8.638, "step": 139500 }, { "epoch": 0.6966965467302554, "grad_norm": 0.08998404443264008, "learning_rate": 9.121880397506823e-06, "loss": 8.6505, "step": 139510 }, { "epoch": 0.6967464855551949, "grad_norm": 0.09334992617368698, "learning_rate": 9.12037848256527e-06, "loss": 8.6324, "step": 139520 }, { "epoch": 0.6967964243801343, "grad_norm": 0.09155179560184479, "learning_rate": 9.11887656762372e-06, "loss": 8.6379, "step": 139530 }, { "epoch": 0.6968463632050738, "grad_norm": 0.09096579253673553, "learning_rate": 9.11737465268217e-06, "loss": 8.6277, "step": 139540 }, { "epoch": 0.6968963020300132, "grad_norm": 0.0905652865767479, "learning_rate": 9.11587273774062e-06, "loss": 8.6074, "step": 139550 }, { "epoch": 0.6969462408549527, "grad_norm": 0.09262055158615112, "learning_rate": 9.11437082279907e-06, "loss": 8.6542, "step": 139560 }, { "epoch": 0.6969961796798921, "grad_norm": 0.09247998148202896, "learning_rate": 9.112868907857517e-06, "loss": 8.6331, "step": 139570 }, { "epoch": 0.6970461185048316, "grad_norm": 0.09279992431402206, "learning_rate": 9.111366992915967e-06, "loss": 8.6329, "step": 139580 }, { "epoch": 0.697096057329771, "grad_norm": 0.09361322969198227, "learning_rate": 9.109865077974417e-06, "loss": 8.6413, "step": 139590 }, { "epoch": 0.6971459961547105, "grad_norm": 0.09208791702985764, "learning_rate": 9.108363163032868e-06, "loss": 8.6303, "step": 139600 }, { "epoch": 0.6971959349796499, "grad_norm": 0.09434138238430023, "learning_rate": 9.106861248091318e-06, "loss": 8.626, "step": 139610 }, { "epoch": 0.6972458738045894, "grad_norm": 0.09279754012823105, "learning_rate": 9.105359333149766e-06, "loss": 8.627, "step": 139620 }, { "epoch": 0.6972958126295288, "grad_norm": 0.08741941303014755, "learning_rate": 9.103857418208215e-06, "loss": 8.6508, "step": 139630 }, { "epoch": 0.6973457514544683, "grad_norm": 0.09483351558446884, "learning_rate": 9.102355503266665e-06, "loss": 8.6326, "step": 139640 }, { "epoch": 0.6973956902794077, "grad_norm": 0.09228503704071045, "learning_rate": 9.100853588325115e-06, "loss": 8.6424, "step": 139650 }, { "epoch": 0.6974456291043472, "grad_norm": 0.08870236575603485, "learning_rate": 9.099351673383565e-06, "loss": 8.6325, "step": 139660 }, { "epoch": 0.6974955679292866, "grad_norm": 0.09134595096111298, "learning_rate": 9.097849758442014e-06, "loss": 8.6322, "step": 139670 }, { "epoch": 0.6975455067542261, "grad_norm": 0.09109502285718918, "learning_rate": 9.096347843500462e-06, "loss": 8.636, "step": 139680 }, { "epoch": 0.6975954455791655, "grad_norm": 0.09011939913034439, "learning_rate": 9.094845928558912e-06, "loss": 8.6288, "step": 139690 }, { "epoch": 0.697645384404105, "grad_norm": 0.09101170301437378, "learning_rate": 9.093344013617363e-06, "loss": 8.6427, "step": 139700 }, { "epoch": 0.6976953232290444, "grad_norm": 0.09532671421766281, "learning_rate": 9.091842098675813e-06, "loss": 8.6448, "step": 139710 }, { "epoch": 0.6977452620539839, "grad_norm": 0.08789071440696716, "learning_rate": 9.090340183734261e-06, "loss": 8.6285, "step": 139720 }, { "epoch": 0.6977952008789233, "grad_norm": 0.08861180394887924, "learning_rate": 9.08883826879271e-06, "loss": 8.6353, "step": 139730 }, { "epoch": 0.6978451397038627, "grad_norm": 0.0869184210896492, "learning_rate": 9.08733635385116e-06, "loss": 8.6337, "step": 139740 }, { "epoch": 0.6978950785288022, "grad_norm": 0.09149453043937683, "learning_rate": 9.08583443890961e-06, "loss": 8.631, "step": 139750 }, { "epoch": 0.6979450173537417, "grad_norm": 0.09057032316923141, "learning_rate": 9.08433252396806e-06, "loss": 8.6373, "step": 139760 }, { "epoch": 0.6979949561786811, "grad_norm": 0.08867466449737549, "learning_rate": 9.082830609026509e-06, "loss": 8.6174, "step": 139770 }, { "epoch": 0.6980448950036205, "grad_norm": 0.09577593952417374, "learning_rate": 9.081328694084959e-06, "loss": 8.6414, "step": 139780 }, { "epoch": 0.69809483382856, "grad_norm": 0.08875248581171036, "learning_rate": 9.079826779143407e-06, "loss": 8.6312, "step": 139790 }, { "epoch": 0.6981447726534995, "grad_norm": 0.09105167537927628, "learning_rate": 9.078324864201858e-06, "loss": 8.6298, "step": 139800 }, { "epoch": 0.6981947114784389, "grad_norm": 0.09408155083656311, "learning_rate": 9.076822949260308e-06, "loss": 8.6485, "step": 139810 }, { "epoch": 0.6982446503033783, "grad_norm": 0.09340722113847733, "learning_rate": 9.075321034318756e-06, "loss": 8.6215, "step": 139820 }, { "epoch": 0.6982945891283178, "grad_norm": 0.10109424591064453, "learning_rate": 9.073819119377206e-06, "loss": 8.631, "step": 139830 }, { "epoch": 0.6983445279532573, "grad_norm": 0.0897102952003479, "learning_rate": 9.072317204435655e-06, "loss": 8.6321, "step": 139840 }, { "epoch": 0.6983944667781967, "grad_norm": 0.09369130432605743, "learning_rate": 9.070815289494105e-06, "loss": 8.633, "step": 139850 }, { "epoch": 0.6984444056031361, "grad_norm": 0.09355512261390686, "learning_rate": 9.069313374552555e-06, "loss": 8.6267, "step": 139860 }, { "epoch": 0.6984943444280756, "grad_norm": 0.09666591137647629, "learning_rate": 9.067811459611004e-06, "loss": 8.6556, "step": 139870 }, { "epoch": 0.6985442832530151, "grad_norm": 0.09669771045446396, "learning_rate": 9.066309544669454e-06, "loss": 8.6225, "step": 139880 }, { "epoch": 0.6985942220779545, "grad_norm": 0.0945110023021698, "learning_rate": 9.064807629727902e-06, "loss": 8.6422, "step": 139890 }, { "epoch": 0.6986441609028939, "grad_norm": 0.09108574688434601, "learning_rate": 9.063305714786353e-06, "loss": 8.6459, "step": 139900 }, { "epoch": 0.6986940997278334, "grad_norm": 0.09179191291332245, "learning_rate": 9.061803799844803e-06, "loss": 8.6282, "step": 139910 }, { "epoch": 0.6987440385527729, "grad_norm": 0.08860944956541061, "learning_rate": 9.060301884903251e-06, "loss": 8.6106, "step": 139920 }, { "epoch": 0.6987939773777123, "grad_norm": 0.09390242397785187, "learning_rate": 9.058799969961701e-06, "loss": 8.622, "step": 139930 }, { "epoch": 0.6988439162026517, "grad_norm": 0.08761897683143616, "learning_rate": 9.057298055020152e-06, "loss": 8.6207, "step": 139940 }, { "epoch": 0.6988938550275912, "grad_norm": 0.0982297733426094, "learning_rate": 9.0557961400786e-06, "loss": 8.6384, "step": 139950 }, { "epoch": 0.6989437938525307, "grad_norm": 0.09506106376647949, "learning_rate": 9.05429422513705e-06, "loss": 8.6514, "step": 139960 }, { "epoch": 0.6989937326774701, "grad_norm": 0.09104983508586884, "learning_rate": 9.052792310195499e-06, "loss": 8.629, "step": 139970 }, { "epoch": 0.6990436715024095, "grad_norm": 0.08770845830440521, "learning_rate": 9.051290395253949e-06, "loss": 8.6221, "step": 139980 }, { "epoch": 0.699093610327349, "grad_norm": 0.09654659777879715, "learning_rate": 9.0497884803124e-06, "loss": 8.6329, "step": 139990 }, { "epoch": 0.6991435491522885, "grad_norm": 0.0931318923830986, "learning_rate": 9.048286565370848e-06, "loss": 8.6416, "step": 140000 }, { "epoch": 0.6991934879772279, "grad_norm": 0.09113478660583496, "learning_rate": 9.046784650429298e-06, "loss": 8.6452, "step": 140010 }, { "epoch": 0.6992434268021673, "grad_norm": 0.09566029906272888, "learning_rate": 9.045282735487746e-06, "loss": 8.6487, "step": 140020 }, { "epoch": 0.6992933656271068, "grad_norm": 0.08825311809778214, "learning_rate": 9.043780820546197e-06, "loss": 8.6336, "step": 140030 }, { "epoch": 0.6993433044520463, "grad_norm": 0.09055044502019882, "learning_rate": 9.042278905604647e-06, "loss": 8.6425, "step": 140040 }, { "epoch": 0.6993932432769857, "grad_norm": 0.08617035299539566, "learning_rate": 9.040776990663095e-06, "loss": 8.6324, "step": 140050 }, { "epoch": 0.6994431821019251, "grad_norm": 0.09692193567752838, "learning_rate": 9.039275075721545e-06, "loss": 8.6194, "step": 140060 }, { "epoch": 0.6994931209268646, "grad_norm": 0.09942319244146347, "learning_rate": 9.037773160779994e-06, "loss": 8.6313, "step": 140070 }, { "epoch": 0.6995430597518041, "grad_norm": 0.0971224308013916, "learning_rate": 9.036271245838444e-06, "loss": 8.6337, "step": 140080 }, { "epoch": 0.6995929985767435, "grad_norm": 0.09974560886621475, "learning_rate": 9.034769330896894e-06, "loss": 8.6208, "step": 140090 }, { "epoch": 0.6996429374016829, "grad_norm": 0.09148532152175903, "learning_rate": 9.033267415955344e-06, "loss": 8.6361, "step": 140100 }, { "epoch": 0.6996928762266224, "grad_norm": 0.09783138334751129, "learning_rate": 9.031765501013793e-06, "loss": 8.6281, "step": 140110 }, { "epoch": 0.6997428150515619, "grad_norm": 0.09642284363508224, "learning_rate": 9.030263586072241e-06, "loss": 8.6295, "step": 140120 }, { "epoch": 0.6997927538765013, "grad_norm": 0.088691346347332, "learning_rate": 9.028761671130692e-06, "loss": 8.6492, "step": 140130 }, { "epoch": 0.6998426927014407, "grad_norm": 0.09170258790254593, "learning_rate": 9.027259756189142e-06, "loss": 8.6167, "step": 140140 }, { "epoch": 0.6998926315263801, "grad_norm": 0.09011033177375793, "learning_rate": 9.025757841247592e-06, "loss": 8.6359, "step": 140150 }, { "epoch": 0.6999425703513197, "grad_norm": 0.09351085126399994, "learning_rate": 9.02425592630604e-06, "loss": 8.6323, "step": 140160 }, { "epoch": 0.6999925091762591, "grad_norm": 0.09307374805212021, "learning_rate": 9.022754011364489e-06, "loss": 8.6389, "step": 140170 }, { "epoch": 0.7000424480011985, "grad_norm": 0.09782139211893082, "learning_rate": 9.021252096422939e-06, "loss": 8.6427, "step": 140180 }, { "epoch": 0.700092386826138, "grad_norm": 0.09016304463148117, "learning_rate": 9.01975018148139e-06, "loss": 8.6376, "step": 140190 }, { "epoch": 0.7001423256510775, "grad_norm": 0.0927334874868393, "learning_rate": 9.01824826653984e-06, "loss": 8.6239, "step": 140200 }, { "epoch": 0.7001922644760169, "grad_norm": 0.09416099637746811, "learning_rate": 9.016746351598288e-06, "loss": 8.6107, "step": 140210 }, { "epoch": 0.7002422033009563, "grad_norm": 0.09217461943626404, "learning_rate": 9.015244436656736e-06, "loss": 8.6225, "step": 140220 }, { "epoch": 0.7002921421258957, "grad_norm": 0.08289490640163422, "learning_rate": 9.013742521715187e-06, "loss": 8.6443, "step": 140230 }, { "epoch": 0.7003420809508353, "grad_norm": 0.08873941749334335, "learning_rate": 9.012240606773637e-06, "loss": 8.6334, "step": 140240 }, { "epoch": 0.7003920197757747, "grad_norm": 0.09045631438493729, "learning_rate": 9.010738691832087e-06, "loss": 8.631, "step": 140250 }, { "epoch": 0.7004419586007141, "grad_norm": 0.08844128996133804, "learning_rate": 9.009236776890537e-06, "loss": 8.6378, "step": 140260 }, { "epoch": 0.7004918974256535, "grad_norm": 0.09297400712966919, "learning_rate": 9.007734861948984e-06, "loss": 8.6214, "step": 140270 }, { "epoch": 0.7005418362505931, "grad_norm": 0.09032850712537766, "learning_rate": 9.006232947007434e-06, "loss": 8.6302, "step": 140280 }, { "epoch": 0.7005917750755325, "grad_norm": 0.09603878855705261, "learning_rate": 9.004731032065884e-06, "loss": 8.6308, "step": 140290 }, { "epoch": 0.7006417139004719, "grad_norm": 0.09244571626186371, "learning_rate": 9.003229117124334e-06, "loss": 8.6455, "step": 140300 }, { "epoch": 0.7006916527254113, "grad_norm": 0.09039461612701416, "learning_rate": 9.001727202182785e-06, "loss": 8.6357, "step": 140310 }, { "epoch": 0.7007415915503509, "grad_norm": 0.09459111839532852, "learning_rate": 9.000225287241231e-06, "loss": 8.6303, "step": 140320 }, { "epoch": 0.7007915303752903, "grad_norm": 0.09677302837371826, "learning_rate": 8.998723372299682e-06, "loss": 8.6394, "step": 140330 }, { "epoch": 0.7008414692002297, "grad_norm": 0.08825703710317612, "learning_rate": 8.997221457358132e-06, "loss": 8.6318, "step": 140340 }, { "epoch": 0.7008914080251691, "grad_norm": 0.09611725807189941, "learning_rate": 8.995719542416582e-06, "loss": 8.6359, "step": 140350 }, { "epoch": 0.7009413468501087, "grad_norm": 0.09157848358154297, "learning_rate": 8.994217627475032e-06, "loss": 8.6286, "step": 140360 }, { "epoch": 0.7009912856750481, "grad_norm": 0.08757877349853516, "learning_rate": 8.992715712533479e-06, "loss": 8.639, "step": 140370 }, { "epoch": 0.7010412244999875, "grad_norm": 0.09328636527061462, "learning_rate": 8.991213797591929e-06, "loss": 8.6316, "step": 140380 }, { "epoch": 0.7010911633249269, "grad_norm": 0.08965921401977539, "learning_rate": 8.98971188265038e-06, "loss": 8.624, "step": 140390 }, { "epoch": 0.7011411021498665, "grad_norm": 0.0938333123922348, "learning_rate": 8.98820996770883e-06, "loss": 8.6466, "step": 140400 }, { "epoch": 0.7011910409748059, "grad_norm": 0.08751453459262848, "learning_rate": 8.98670805276728e-06, "loss": 8.6313, "step": 140410 }, { "epoch": 0.7012409797997453, "grad_norm": 0.09409631788730621, "learning_rate": 8.985206137825728e-06, "loss": 8.6354, "step": 140420 }, { "epoch": 0.7012909186246847, "grad_norm": 0.09224843233823776, "learning_rate": 8.983704222884177e-06, "loss": 8.646, "step": 140430 }, { "epoch": 0.7013408574496242, "grad_norm": 0.08712205290794373, "learning_rate": 8.982202307942627e-06, "loss": 8.6293, "step": 140440 }, { "epoch": 0.7013907962745637, "grad_norm": 0.08894158154726028, "learning_rate": 8.980700393001077e-06, "loss": 8.6525, "step": 140450 }, { "epoch": 0.7014407350995031, "grad_norm": 0.09181137382984161, "learning_rate": 8.979198478059527e-06, "loss": 8.6246, "step": 140460 }, { "epoch": 0.7014906739244425, "grad_norm": 0.08507677912712097, "learning_rate": 8.977696563117976e-06, "loss": 8.6357, "step": 140470 }, { "epoch": 0.701540612749382, "grad_norm": 0.09141664952039719, "learning_rate": 8.976194648176424e-06, "loss": 8.6319, "step": 140480 }, { "epoch": 0.7015905515743215, "grad_norm": 0.09606952220201492, "learning_rate": 8.974692733234874e-06, "loss": 8.6253, "step": 140490 }, { "epoch": 0.7016404903992609, "grad_norm": 0.0922846719622612, "learning_rate": 8.973190818293324e-06, "loss": 8.6208, "step": 140500 }, { "epoch": 0.7016904292242003, "grad_norm": 0.0884494036436081, "learning_rate": 8.971688903351775e-06, "loss": 8.6359, "step": 140510 }, { "epoch": 0.7017403680491398, "grad_norm": 0.09224381297826767, "learning_rate": 8.970186988410223e-06, "loss": 8.6228, "step": 140520 }, { "epoch": 0.7017903068740793, "grad_norm": 0.09082509577274323, "learning_rate": 8.968685073468672e-06, "loss": 8.6334, "step": 140530 }, { "epoch": 0.7018402456990187, "grad_norm": 0.0936235710978508, "learning_rate": 8.967183158527122e-06, "loss": 8.6158, "step": 140540 }, { "epoch": 0.7018901845239581, "grad_norm": 0.08928533643484116, "learning_rate": 8.965681243585572e-06, "loss": 8.6382, "step": 140550 }, { "epoch": 0.7019401233488975, "grad_norm": 0.08770529180765152, "learning_rate": 8.964179328644022e-06, "loss": 8.6297, "step": 140560 }, { "epoch": 0.7019900621738371, "grad_norm": 0.08795421570539474, "learning_rate": 8.96267741370247e-06, "loss": 8.6302, "step": 140570 }, { "epoch": 0.7020400009987765, "grad_norm": 0.09204691648483276, "learning_rate": 8.961175498760919e-06, "loss": 8.6417, "step": 140580 }, { "epoch": 0.7020899398237159, "grad_norm": 0.09336613863706589, "learning_rate": 8.95967358381937e-06, "loss": 8.6493, "step": 140590 }, { "epoch": 0.7021398786486553, "grad_norm": 0.09377633780241013, "learning_rate": 8.95817166887782e-06, "loss": 8.6375, "step": 140600 }, { "epoch": 0.7021898174735949, "grad_norm": 0.0860079750418663, "learning_rate": 8.95666975393627e-06, "loss": 8.6426, "step": 140610 }, { "epoch": 0.7022397562985343, "grad_norm": 0.0926608294248581, "learning_rate": 8.95516783899472e-06, "loss": 8.6271, "step": 140620 }, { "epoch": 0.7022896951234737, "grad_norm": 0.09390340000391006, "learning_rate": 8.953665924053168e-06, "loss": 8.6388, "step": 140630 }, { "epoch": 0.7023396339484131, "grad_norm": 0.09352290630340576, "learning_rate": 8.952164009111617e-06, "loss": 8.6309, "step": 140640 }, { "epoch": 0.7023895727733527, "grad_norm": 0.09155908972024918, "learning_rate": 8.950662094170067e-06, "loss": 8.6282, "step": 140650 }, { "epoch": 0.7024395115982921, "grad_norm": 0.08889105916023254, "learning_rate": 8.949160179228517e-06, "loss": 8.6313, "step": 140660 }, { "epoch": 0.7024894504232315, "grad_norm": 0.0867091491818428, "learning_rate": 8.947658264286967e-06, "loss": 8.6344, "step": 140670 }, { "epoch": 0.7025393892481709, "grad_norm": 0.08940424025058746, "learning_rate": 8.946156349345416e-06, "loss": 8.641, "step": 140680 }, { "epoch": 0.7025893280731105, "grad_norm": 0.09525980055332184, "learning_rate": 8.944654434403864e-06, "loss": 8.6463, "step": 140690 }, { "epoch": 0.7026392668980499, "grad_norm": 0.08965422958135605, "learning_rate": 8.943152519462314e-06, "loss": 8.6191, "step": 140700 }, { "epoch": 0.7026892057229893, "grad_norm": 0.09086750447750092, "learning_rate": 8.941650604520765e-06, "loss": 8.6479, "step": 140710 }, { "epoch": 0.7027391445479287, "grad_norm": 0.08906187117099762, "learning_rate": 8.940148689579215e-06, "loss": 8.6292, "step": 140720 }, { "epoch": 0.7027890833728683, "grad_norm": 0.09022345393896103, "learning_rate": 8.938646774637663e-06, "loss": 8.6404, "step": 140730 }, { "epoch": 0.7028390221978077, "grad_norm": 0.08862177282571793, "learning_rate": 8.937144859696112e-06, "loss": 8.63, "step": 140740 }, { "epoch": 0.7028889610227471, "grad_norm": 0.09106519818305969, "learning_rate": 8.935642944754562e-06, "loss": 8.6177, "step": 140750 }, { "epoch": 0.7029388998476865, "grad_norm": 0.09161339700222015, "learning_rate": 8.934141029813012e-06, "loss": 8.6267, "step": 140760 }, { "epoch": 0.7029888386726261, "grad_norm": 0.0945722833275795, "learning_rate": 8.932639114871462e-06, "loss": 8.6188, "step": 140770 }, { "epoch": 0.7030387774975655, "grad_norm": 0.0907975360751152, "learning_rate": 8.93113719992991e-06, "loss": 8.6323, "step": 140780 }, { "epoch": 0.7030887163225049, "grad_norm": 0.08957850933074951, "learning_rate": 8.929635284988361e-06, "loss": 8.6396, "step": 140790 }, { "epoch": 0.7031386551474443, "grad_norm": 0.0977301150560379, "learning_rate": 8.92813337004681e-06, "loss": 8.6227, "step": 140800 }, { "epoch": 0.7031885939723839, "grad_norm": 0.09034769982099533, "learning_rate": 8.92663145510526e-06, "loss": 8.6368, "step": 140810 }, { "epoch": 0.7032385327973233, "grad_norm": 0.0880945548415184, "learning_rate": 8.92512954016371e-06, "loss": 8.6343, "step": 140820 }, { "epoch": 0.7032884716222627, "grad_norm": 0.09109944850206375, "learning_rate": 8.923627625222158e-06, "loss": 8.6357, "step": 140830 }, { "epoch": 0.7033384104472021, "grad_norm": 0.0931500494480133, "learning_rate": 8.922125710280608e-06, "loss": 8.6303, "step": 140840 }, { "epoch": 0.7033883492721417, "grad_norm": 0.09611831605434418, "learning_rate": 8.920623795339057e-06, "loss": 8.62, "step": 140850 }, { "epoch": 0.7034382880970811, "grad_norm": 0.08877561241388321, "learning_rate": 8.919121880397507e-06, "loss": 8.6357, "step": 140860 }, { "epoch": 0.7034882269220205, "grad_norm": 0.0883457213640213, "learning_rate": 8.917619965455957e-06, "loss": 8.6247, "step": 140870 }, { "epoch": 0.7035381657469599, "grad_norm": 0.08729522675275803, "learning_rate": 8.916118050514406e-06, "loss": 8.6459, "step": 140880 }, { "epoch": 0.7035881045718995, "grad_norm": 0.09188755601644516, "learning_rate": 8.914616135572856e-06, "loss": 8.6239, "step": 140890 }, { "epoch": 0.7036380433968389, "grad_norm": 0.09601496160030365, "learning_rate": 8.913114220631304e-06, "loss": 8.6126, "step": 140900 }, { "epoch": 0.7036879822217783, "grad_norm": 0.0922279953956604, "learning_rate": 8.911612305689755e-06, "loss": 8.6379, "step": 140910 }, { "epoch": 0.7037379210467177, "grad_norm": 0.08827944844961166, "learning_rate": 8.910110390748205e-06, "loss": 8.6256, "step": 140920 }, { "epoch": 0.7037878598716573, "grad_norm": 0.09115996211767197, "learning_rate": 8.908608475806653e-06, "loss": 8.6132, "step": 140930 }, { "epoch": 0.7038377986965967, "grad_norm": 0.09394663572311401, "learning_rate": 8.907106560865103e-06, "loss": 8.6283, "step": 140940 }, { "epoch": 0.7038877375215361, "grad_norm": 0.08945653587579727, "learning_rate": 8.905604645923554e-06, "loss": 8.6324, "step": 140950 }, { "epoch": 0.7039376763464755, "grad_norm": 0.09907026588916779, "learning_rate": 8.904102730982002e-06, "loss": 8.6333, "step": 140960 }, { "epoch": 0.7039876151714151, "grad_norm": 0.09264247864484787, "learning_rate": 8.902600816040452e-06, "loss": 8.6323, "step": 140970 }, { "epoch": 0.7040375539963545, "grad_norm": 0.08847159892320633, "learning_rate": 8.9010989010989e-06, "loss": 8.6337, "step": 140980 }, { "epoch": 0.7040874928212939, "grad_norm": 0.08942971378564835, "learning_rate": 8.899596986157351e-06, "loss": 8.6253, "step": 140990 }, { "epoch": 0.7041374316462333, "grad_norm": 0.0965491458773613, "learning_rate": 8.898095071215801e-06, "loss": 8.6361, "step": 141000 }, { "epoch": 0.7041873704711729, "grad_norm": 0.09533529728651047, "learning_rate": 8.89659315627425e-06, "loss": 8.6196, "step": 141010 }, { "epoch": 0.7042373092961123, "grad_norm": 0.09187507629394531, "learning_rate": 8.8950912413327e-06, "loss": 8.6384, "step": 141020 }, { "epoch": 0.7042872481210517, "grad_norm": 0.09570582956075668, "learning_rate": 8.893589326391148e-06, "loss": 8.6171, "step": 141030 }, { "epoch": 0.7043371869459911, "grad_norm": 0.09152864664793015, "learning_rate": 8.892087411449598e-06, "loss": 8.6202, "step": 141040 }, { "epoch": 0.7043871257709307, "grad_norm": 0.08897831290960312, "learning_rate": 8.890585496508049e-06, "loss": 8.6357, "step": 141050 }, { "epoch": 0.7044370645958701, "grad_norm": 0.09181348979473114, "learning_rate": 8.889083581566497e-06, "loss": 8.6106, "step": 141060 }, { "epoch": 0.7044870034208095, "grad_norm": 0.09355124831199646, "learning_rate": 8.887581666624947e-06, "loss": 8.631, "step": 141070 }, { "epoch": 0.7045369422457489, "grad_norm": 0.08978011459112167, "learning_rate": 8.886079751683396e-06, "loss": 8.6163, "step": 141080 }, { "epoch": 0.7045868810706885, "grad_norm": 0.09040986001491547, "learning_rate": 8.884577836741846e-06, "loss": 8.6161, "step": 141090 }, { "epoch": 0.7046368198956279, "grad_norm": 0.09216012805700302, "learning_rate": 8.883075921800296e-06, "loss": 8.6069, "step": 141100 }, { "epoch": 0.7046867587205673, "grad_norm": 0.09498348832130432, "learning_rate": 8.881574006858746e-06, "loss": 8.6323, "step": 141110 }, { "epoch": 0.7047366975455067, "grad_norm": 0.09142297506332397, "learning_rate": 8.880072091917195e-06, "loss": 8.6146, "step": 141120 }, { "epoch": 0.7047866363704463, "grad_norm": 0.08392765372991562, "learning_rate": 8.878570176975643e-06, "loss": 8.6236, "step": 141130 }, { "epoch": 0.7048365751953857, "grad_norm": 0.08954204618930817, "learning_rate": 8.877068262034093e-06, "loss": 8.6459, "step": 141140 }, { "epoch": 0.7048865140203251, "grad_norm": 0.09618078172206879, "learning_rate": 8.875566347092544e-06, "loss": 8.6317, "step": 141150 }, { "epoch": 0.7049364528452645, "grad_norm": 0.09143991023302078, "learning_rate": 8.874064432150994e-06, "loss": 8.6466, "step": 141160 }, { "epoch": 0.704986391670204, "grad_norm": 0.09367083013057709, "learning_rate": 8.872562517209442e-06, "loss": 8.6332, "step": 141170 }, { "epoch": 0.7050363304951435, "grad_norm": 0.09456432610750198, "learning_rate": 8.87106060226789e-06, "loss": 8.6154, "step": 141180 }, { "epoch": 0.7050862693200829, "grad_norm": 0.09094260632991791, "learning_rate": 8.869558687326341e-06, "loss": 8.6337, "step": 141190 }, { "epoch": 0.7051362081450223, "grad_norm": 0.09079168736934662, "learning_rate": 8.868056772384791e-06, "loss": 8.6399, "step": 141200 }, { "epoch": 0.7051861469699618, "grad_norm": 0.08768593519926071, "learning_rate": 8.866554857443241e-06, "loss": 8.6241, "step": 141210 }, { "epoch": 0.7052360857949013, "grad_norm": 0.09279026836156845, "learning_rate": 8.86505294250169e-06, "loss": 8.6303, "step": 141220 }, { "epoch": 0.7052860246198407, "grad_norm": 0.09045767784118652, "learning_rate": 8.863551027560138e-06, "loss": 8.6399, "step": 141230 }, { "epoch": 0.7053359634447801, "grad_norm": 0.09411778301000595, "learning_rate": 8.862049112618588e-06, "loss": 8.6315, "step": 141240 }, { "epoch": 0.7053859022697196, "grad_norm": 0.08939050883054733, "learning_rate": 8.860547197677039e-06, "loss": 8.6213, "step": 141250 }, { "epoch": 0.7054358410946591, "grad_norm": 0.08885614573955536, "learning_rate": 8.859045282735489e-06, "loss": 8.6282, "step": 141260 }, { "epoch": 0.7054857799195985, "grad_norm": 0.09590033441781998, "learning_rate": 8.857543367793939e-06, "loss": 8.6296, "step": 141270 }, { "epoch": 0.7055357187445379, "grad_norm": 0.09497443586587906, "learning_rate": 8.856041452852386e-06, "loss": 8.6256, "step": 141280 }, { "epoch": 0.7055856575694774, "grad_norm": 0.09108743816614151, "learning_rate": 8.854539537910836e-06, "loss": 8.6314, "step": 141290 }, { "epoch": 0.7056355963944169, "grad_norm": 0.08908271044492722, "learning_rate": 8.853037622969286e-06, "loss": 8.6233, "step": 141300 }, { "epoch": 0.7056855352193563, "grad_norm": 0.10099402815103531, "learning_rate": 8.851535708027736e-06, "loss": 8.6468, "step": 141310 }, { "epoch": 0.7057354740442957, "grad_norm": 0.09605363756418228, "learning_rate": 8.850033793086187e-06, "loss": 8.6166, "step": 141320 }, { "epoch": 0.7057854128692352, "grad_norm": 0.09077009558677673, "learning_rate": 8.848531878144633e-06, "loss": 8.6215, "step": 141330 }, { "epoch": 0.7058353516941747, "grad_norm": 0.0913512110710144, "learning_rate": 8.847029963203083e-06, "loss": 8.6587, "step": 141340 }, { "epoch": 0.7058852905191141, "grad_norm": 0.08867861330509186, "learning_rate": 8.845528048261534e-06, "loss": 8.6569, "step": 141350 }, { "epoch": 0.7059352293440535, "grad_norm": 0.09407979995012283, "learning_rate": 8.844026133319984e-06, "loss": 8.606, "step": 141360 }, { "epoch": 0.705985168168993, "grad_norm": 0.0922011062502861, "learning_rate": 8.842524218378434e-06, "loss": 8.6163, "step": 141370 }, { "epoch": 0.7060351069939325, "grad_norm": 0.09292416274547577, "learning_rate": 8.84102230343688e-06, "loss": 8.6203, "step": 141380 }, { "epoch": 0.7060850458188719, "grad_norm": 0.09504501521587372, "learning_rate": 8.839520388495331e-06, "loss": 8.6259, "step": 141390 }, { "epoch": 0.7061349846438113, "grad_norm": 0.09113211929798126, "learning_rate": 8.838018473553781e-06, "loss": 8.6264, "step": 141400 }, { "epoch": 0.7061849234687507, "grad_norm": 0.09200391918420792, "learning_rate": 8.836516558612231e-06, "loss": 8.6251, "step": 141410 }, { "epoch": 0.7062348622936903, "grad_norm": 0.09039604663848877, "learning_rate": 8.835014643670682e-06, "loss": 8.6115, "step": 141420 }, { "epoch": 0.7062848011186297, "grad_norm": 0.0915713682770729, "learning_rate": 8.83351272872913e-06, "loss": 8.6198, "step": 141430 }, { "epoch": 0.7063347399435691, "grad_norm": 0.09520621597766876, "learning_rate": 8.832010813787578e-06, "loss": 8.6043, "step": 141440 }, { "epoch": 0.7063846787685085, "grad_norm": 0.0928700864315033, "learning_rate": 8.830508898846029e-06, "loss": 8.61, "step": 141450 }, { "epoch": 0.706434617593448, "grad_norm": 0.09517066925764084, "learning_rate": 8.829006983904479e-06, "loss": 8.6289, "step": 141460 }, { "epoch": 0.7064845564183875, "grad_norm": 0.0947510227560997, "learning_rate": 8.827505068962929e-06, "loss": 8.6077, "step": 141470 }, { "epoch": 0.7065344952433269, "grad_norm": 0.09221941232681274, "learning_rate": 8.826003154021377e-06, "loss": 8.6342, "step": 141480 }, { "epoch": 0.7065844340682663, "grad_norm": 0.08994131535291672, "learning_rate": 8.824501239079826e-06, "loss": 8.6346, "step": 141490 }, { "epoch": 0.7066343728932059, "grad_norm": 0.09264985471963882, "learning_rate": 8.822999324138276e-06, "loss": 8.6291, "step": 141500 }, { "epoch": 0.7066843117181453, "grad_norm": 0.09057199954986572, "learning_rate": 8.821497409196726e-06, "loss": 8.6444, "step": 141510 }, { "epoch": 0.7067342505430847, "grad_norm": 0.08880162984132767, "learning_rate": 8.819995494255177e-06, "loss": 8.6288, "step": 141520 }, { "epoch": 0.7067841893680241, "grad_norm": 0.08788347989320755, "learning_rate": 8.818493579313625e-06, "loss": 8.6211, "step": 141530 }, { "epoch": 0.7068341281929637, "grad_norm": 0.08884553611278534, "learning_rate": 8.816991664372073e-06, "loss": 8.6423, "step": 141540 }, { "epoch": 0.7068840670179031, "grad_norm": 0.090285524725914, "learning_rate": 8.815489749430524e-06, "loss": 8.6358, "step": 141550 }, { "epoch": 0.7069340058428425, "grad_norm": 0.09516801685094833, "learning_rate": 8.813987834488974e-06, "loss": 8.618, "step": 141560 }, { "epoch": 0.7069839446677819, "grad_norm": 0.09445168077945709, "learning_rate": 8.812485919547424e-06, "loss": 8.6284, "step": 141570 }, { "epoch": 0.7070338834927214, "grad_norm": 0.08974064886569977, "learning_rate": 8.810984004605873e-06, "loss": 8.612, "step": 141580 }, { "epoch": 0.7070838223176609, "grad_norm": 0.09538102895021439, "learning_rate": 8.809482089664323e-06, "loss": 8.6337, "step": 141590 }, { "epoch": 0.7071337611426003, "grad_norm": 0.08882886171340942, "learning_rate": 8.807980174722771e-06, "loss": 8.6367, "step": 141600 }, { "epoch": 0.7071836999675397, "grad_norm": 0.09137466549873352, "learning_rate": 8.806478259781221e-06, "loss": 8.6261, "step": 141610 }, { "epoch": 0.7072336387924792, "grad_norm": 0.08818534016609192, "learning_rate": 8.804976344839672e-06, "loss": 8.6378, "step": 141620 }, { "epoch": 0.7072835776174187, "grad_norm": 0.09256480634212494, "learning_rate": 8.80347442989812e-06, "loss": 8.6342, "step": 141630 }, { "epoch": 0.7073335164423581, "grad_norm": 0.08914809674024582, "learning_rate": 8.80197251495657e-06, "loss": 8.6384, "step": 141640 }, { "epoch": 0.7073834552672975, "grad_norm": 0.09401541948318481, "learning_rate": 8.800470600015019e-06, "loss": 8.6253, "step": 141650 }, { "epoch": 0.707433394092237, "grad_norm": 0.09287466108798981, "learning_rate": 8.798968685073469e-06, "loss": 8.6103, "step": 141660 }, { "epoch": 0.7074833329171765, "grad_norm": 0.09313994646072388, "learning_rate": 8.797466770131919e-06, "loss": 8.6352, "step": 141670 }, { "epoch": 0.7075332717421159, "grad_norm": 0.09282007813453674, "learning_rate": 8.795964855190368e-06, "loss": 8.6173, "step": 141680 }, { "epoch": 0.7075832105670553, "grad_norm": 0.08810416609048843, "learning_rate": 8.794462940248818e-06, "loss": 8.6341, "step": 141690 }, { "epoch": 0.7076331493919948, "grad_norm": 0.09486988186836243, "learning_rate": 8.792961025307266e-06, "loss": 8.6187, "step": 141700 }, { "epoch": 0.7076830882169343, "grad_norm": 0.08923081308603287, "learning_rate": 8.791459110365716e-06, "loss": 8.6303, "step": 141710 }, { "epoch": 0.7077330270418737, "grad_norm": 0.09007243067026138, "learning_rate": 8.789957195424167e-06, "loss": 8.6309, "step": 141720 }, { "epoch": 0.7077829658668131, "grad_norm": 0.09199108928442001, "learning_rate": 8.788455280482615e-06, "loss": 8.6047, "step": 141730 }, { "epoch": 0.7078329046917526, "grad_norm": 0.09315073490142822, "learning_rate": 8.786953365541065e-06, "loss": 8.639, "step": 141740 }, { "epoch": 0.7078828435166921, "grad_norm": 0.09393049031496048, "learning_rate": 8.785451450599515e-06, "loss": 8.637, "step": 141750 }, { "epoch": 0.7079327823416315, "grad_norm": 0.08970038592815399, "learning_rate": 8.783949535657964e-06, "loss": 8.6206, "step": 141760 }, { "epoch": 0.7079827211665709, "grad_norm": 0.09079226106405258, "learning_rate": 8.782447620716414e-06, "loss": 8.6221, "step": 141770 }, { "epoch": 0.7080326599915104, "grad_norm": 0.09092725813388824, "learning_rate": 8.780945705774863e-06, "loss": 8.6283, "step": 141780 }, { "epoch": 0.7080825988164499, "grad_norm": 0.08902301639318466, "learning_rate": 8.779443790833313e-06, "loss": 8.6266, "step": 141790 }, { "epoch": 0.7081325376413893, "grad_norm": 0.09129643440246582, "learning_rate": 8.777941875891763e-06, "loss": 8.6308, "step": 141800 }, { "epoch": 0.7081824764663287, "grad_norm": 0.09005031734704971, "learning_rate": 8.776439960950211e-06, "loss": 8.6405, "step": 141810 }, { "epoch": 0.7082324152912682, "grad_norm": 0.0907624140381813, "learning_rate": 8.774938046008662e-06, "loss": 8.6345, "step": 141820 }, { "epoch": 0.7082823541162077, "grad_norm": 0.09212564677000046, "learning_rate": 8.77343613106711e-06, "loss": 8.6532, "step": 141830 }, { "epoch": 0.7083322929411471, "grad_norm": 0.09632585197687149, "learning_rate": 8.77193421612556e-06, "loss": 8.611, "step": 141840 }, { "epoch": 0.7083822317660865, "grad_norm": 0.09049876779317856, "learning_rate": 8.77043230118401e-06, "loss": 8.6288, "step": 141850 }, { "epoch": 0.708432170591026, "grad_norm": 0.08971288055181503, "learning_rate": 8.768930386242459e-06, "loss": 8.6298, "step": 141860 }, { "epoch": 0.7084821094159655, "grad_norm": 0.09906616806983948, "learning_rate": 8.767428471300909e-06, "loss": 8.6211, "step": 141870 }, { "epoch": 0.7085320482409049, "grad_norm": 0.09421475976705551, "learning_rate": 8.765926556359358e-06, "loss": 8.6384, "step": 141880 }, { "epoch": 0.7085819870658443, "grad_norm": 0.08919620513916016, "learning_rate": 8.764424641417808e-06, "loss": 8.6428, "step": 141890 }, { "epoch": 0.7086319258907838, "grad_norm": 0.0901108905673027, "learning_rate": 8.762922726476258e-06, "loss": 8.6232, "step": 141900 }, { "epoch": 0.7086818647157233, "grad_norm": 0.0895710289478302, "learning_rate": 8.761420811534708e-06, "loss": 8.6461, "step": 141910 }, { "epoch": 0.7087318035406627, "grad_norm": 0.09124090522527695, "learning_rate": 8.759918896593157e-06, "loss": 8.6357, "step": 141920 }, { "epoch": 0.7087817423656021, "grad_norm": 0.09022539854049683, "learning_rate": 8.758416981651605e-06, "loss": 8.622, "step": 141930 }, { "epoch": 0.7088316811905416, "grad_norm": 0.08793357014656067, "learning_rate": 8.756915066710055e-06, "loss": 8.6265, "step": 141940 }, { "epoch": 0.708881620015481, "grad_norm": 0.09152141213417053, "learning_rate": 8.755413151768505e-06, "loss": 8.6229, "step": 141950 }, { "epoch": 0.7089315588404205, "grad_norm": 0.09446197748184204, "learning_rate": 8.753911236826956e-06, "loss": 8.6395, "step": 141960 }, { "epoch": 0.7089814976653599, "grad_norm": 0.08696843683719635, "learning_rate": 8.752409321885404e-06, "loss": 8.6207, "step": 141970 }, { "epoch": 0.7090314364902994, "grad_norm": 0.09359335899353027, "learning_rate": 8.750907406943853e-06, "loss": 8.6385, "step": 141980 }, { "epoch": 0.7090813753152388, "grad_norm": 0.09001364558935165, "learning_rate": 8.749405492002303e-06, "loss": 8.6352, "step": 141990 }, { "epoch": 0.7091313141401783, "grad_norm": 0.09298966825008392, "learning_rate": 8.747903577060753e-06, "loss": 8.6246, "step": 142000 }, { "epoch": 0.7091812529651177, "grad_norm": 0.09238903969526291, "learning_rate": 8.746401662119203e-06, "loss": 8.6325, "step": 142010 }, { "epoch": 0.7092311917900572, "grad_norm": 0.09266587346792221, "learning_rate": 8.744899747177652e-06, "loss": 8.6339, "step": 142020 }, { "epoch": 0.7092811306149966, "grad_norm": 0.090972401201725, "learning_rate": 8.7433978322361e-06, "loss": 8.634, "step": 142030 }, { "epoch": 0.7093310694399361, "grad_norm": 0.09120898693799973, "learning_rate": 8.74189591729455e-06, "loss": 8.6247, "step": 142040 }, { "epoch": 0.7093810082648755, "grad_norm": 0.09463303536176682, "learning_rate": 8.740394002353e-06, "loss": 8.6374, "step": 142050 }, { "epoch": 0.709430947089815, "grad_norm": 0.09146030992269516, "learning_rate": 8.73889208741145e-06, "loss": 8.6386, "step": 142060 }, { "epoch": 0.7094808859147544, "grad_norm": 0.09224395453929901, "learning_rate": 8.7373901724699e-06, "loss": 8.6257, "step": 142070 }, { "epoch": 0.7095308247396939, "grad_norm": 0.08982054889202118, "learning_rate": 8.735888257528348e-06, "loss": 8.6414, "step": 142080 }, { "epoch": 0.7095807635646333, "grad_norm": 0.09617902338504791, "learning_rate": 8.734386342586798e-06, "loss": 8.6255, "step": 142090 }, { "epoch": 0.7096307023895728, "grad_norm": 0.09528889507055283, "learning_rate": 8.732884427645248e-06, "loss": 8.622, "step": 142100 }, { "epoch": 0.7096806412145122, "grad_norm": 0.0894269272685051, "learning_rate": 8.731382512703698e-06, "loss": 8.6288, "step": 142110 }, { "epoch": 0.7097305800394517, "grad_norm": 0.09145845472812653, "learning_rate": 8.729880597762148e-06, "loss": 8.6278, "step": 142120 }, { "epoch": 0.7097805188643911, "grad_norm": 0.09675304591655731, "learning_rate": 8.728378682820595e-06, "loss": 8.6259, "step": 142130 }, { "epoch": 0.7098304576893306, "grad_norm": 0.09058880060911179, "learning_rate": 8.726876767879045e-06, "loss": 8.6513, "step": 142140 }, { "epoch": 0.70988039651427, "grad_norm": 0.0966552197933197, "learning_rate": 8.725374852937495e-06, "loss": 8.61, "step": 142150 }, { "epoch": 0.7099303353392095, "grad_norm": 0.08744119107723236, "learning_rate": 8.723872937995946e-06, "loss": 8.6147, "step": 142160 }, { "epoch": 0.7099802741641489, "grad_norm": 0.09429807960987091, "learning_rate": 8.722371023054396e-06, "loss": 8.6291, "step": 142170 }, { "epoch": 0.7100302129890884, "grad_norm": 0.08756644278764725, "learning_rate": 8.720869108112843e-06, "loss": 8.6386, "step": 142180 }, { "epoch": 0.7100801518140278, "grad_norm": 0.08758307993412018, "learning_rate": 8.719367193171293e-06, "loss": 8.6153, "step": 142190 }, { "epoch": 0.7101300906389673, "grad_norm": 0.09212186932563782, "learning_rate": 8.717865278229743e-06, "loss": 8.6224, "step": 142200 }, { "epoch": 0.7101800294639067, "grad_norm": 0.09597406536340714, "learning_rate": 8.716363363288193e-06, "loss": 8.6243, "step": 142210 }, { "epoch": 0.7102299682888462, "grad_norm": 0.09284794330596924, "learning_rate": 8.714861448346643e-06, "loss": 8.6202, "step": 142220 }, { "epoch": 0.7102799071137856, "grad_norm": 0.09078753739595413, "learning_rate": 8.713359533405092e-06, "loss": 8.625, "step": 142230 }, { "epoch": 0.7103298459387251, "grad_norm": 0.09475717693567276, "learning_rate": 8.71185761846354e-06, "loss": 8.6252, "step": 142240 }, { "epoch": 0.7103797847636645, "grad_norm": 0.09465180337429047, "learning_rate": 8.71035570352199e-06, "loss": 8.6361, "step": 142250 }, { "epoch": 0.710429723588604, "grad_norm": 0.09588777273893356, "learning_rate": 8.70885378858044e-06, "loss": 8.6176, "step": 142260 }, { "epoch": 0.7104796624135434, "grad_norm": 0.09004874527454376, "learning_rate": 8.70735187363889e-06, "loss": 8.6325, "step": 142270 }, { "epoch": 0.7105296012384829, "grad_norm": 0.09361999481916428, "learning_rate": 8.70584995869734e-06, "loss": 8.6366, "step": 142280 }, { "epoch": 0.7105795400634223, "grad_norm": 0.09634486585855484, "learning_rate": 8.704348043755788e-06, "loss": 8.6259, "step": 142290 }, { "epoch": 0.7106294788883618, "grad_norm": 0.0938863530755043, "learning_rate": 8.702846128814238e-06, "loss": 8.6259, "step": 142300 }, { "epoch": 0.7106794177133012, "grad_norm": 0.09446729719638824, "learning_rate": 8.701344213872688e-06, "loss": 8.6237, "step": 142310 }, { "epoch": 0.7107293565382407, "grad_norm": 0.09354673326015472, "learning_rate": 8.699842298931138e-06, "loss": 8.6212, "step": 142320 }, { "epoch": 0.7107792953631801, "grad_norm": 0.08772012591362, "learning_rate": 8.698340383989587e-06, "loss": 8.6214, "step": 142330 }, { "epoch": 0.7108292341881196, "grad_norm": 0.0878673866391182, "learning_rate": 8.696838469048035e-06, "loss": 8.6261, "step": 142340 }, { "epoch": 0.710879173013059, "grad_norm": 0.08873125165700912, "learning_rate": 8.695336554106485e-06, "loss": 8.6183, "step": 142350 }, { "epoch": 0.7109291118379985, "grad_norm": 0.0938340574502945, "learning_rate": 8.693834639164936e-06, "loss": 8.6124, "step": 142360 }, { "epoch": 0.7109790506629379, "grad_norm": 0.09345322847366333, "learning_rate": 8.692332724223386e-06, "loss": 8.6146, "step": 142370 }, { "epoch": 0.7110289894878774, "grad_norm": 0.09127715975046158, "learning_rate": 8.690830809281834e-06, "loss": 8.6477, "step": 142380 }, { "epoch": 0.7110789283128168, "grad_norm": 0.09575607627630234, "learning_rate": 8.689328894340284e-06, "loss": 8.6153, "step": 142390 }, { "epoch": 0.7111288671377562, "grad_norm": 0.08808735758066177, "learning_rate": 8.687826979398733e-06, "loss": 8.6387, "step": 142400 }, { "epoch": 0.7111788059626957, "grad_norm": 0.09160853177309036, "learning_rate": 8.686325064457183e-06, "loss": 8.6374, "step": 142410 }, { "epoch": 0.7112287447876351, "grad_norm": 0.09475651383399963, "learning_rate": 8.684823149515633e-06, "loss": 8.6342, "step": 142420 }, { "epoch": 0.7112786836125746, "grad_norm": 0.08886786550283432, "learning_rate": 8.683321234574082e-06, "loss": 8.6129, "step": 142430 }, { "epoch": 0.711328622437514, "grad_norm": 0.0918954536318779, "learning_rate": 8.681819319632532e-06, "loss": 8.6083, "step": 142440 }, { "epoch": 0.7113785612624535, "grad_norm": 0.09870171546936035, "learning_rate": 8.68031740469098e-06, "loss": 8.6117, "step": 142450 }, { "epoch": 0.7114285000873929, "grad_norm": 0.09634058177471161, "learning_rate": 8.67881548974943e-06, "loss": 8.6249, "step": 142460 }, { "epoch": 0.7114784389123324, "grad_norm": 0.09128417074680328, "learning_rate": 8.67731357480788e-06, "loss": 8.6259, "step": 142470 }, { "epoch": 0.7115283777372718, "grad_norm": 0.08941973000764847, "learning_rate": 8.67581165986633e-06, "loss": 8.6221, "step": 142480 }, { "epoch": 0.7115783165622113, "grad_norm": 0.10016234964132309, "learning_rate": 8.67430974492478e-06, "loss": 8.633, "step": 142490 }, { "epoch": 0.7116282553871507, "grad_norm": 0.08851133286952972, "learning_rate": 8.672807829983228e-06, "loss": 8.6333, "step": 142500 }, { "epoch": 0.7116781942120902, "grad_norm": 0.08761074393987656, "learning_rate": 8.671305915041678e-06, "loss": 8.6358, "step": 142510 }, { "epoch": 0.7117281330370296, "grad_norm": 0.0912003293633461, "learning_rate": 8.669804000100128e-06, "loss": 8.6103, "step": 142520 }, { "epoch": 0.7117780718619691, "grad_norm": 0.09523065388202667, "learning_rate": 8.668302085158577e-06, "loss": 8.6148, "step": 142530 }, { "epoch": 0.7118280106869085, "grad_norm": 0.09399678558111191, "learning_rate": 8.666800170217027e-06, "loss": 8.622, "step": 142540 }, { "epoch": 0.711877949511848, "grad_norm": 0.09112656861543655, "learning_rate": 8.665298255275477e-06, "loss": 8.6207, "step": 142550 }, { "epoch": 0.7119278883367874, "grad_norm": 0.08925676345825195, "learning_rate": 8.663796340333926e-06, "loss": 8.6353, "step": 142560 }, { "epoch": 0.7119778271617269, "grad_norm": 0.09203901141881943, "learning_rate": 8.662294425392376e-06, "loss": 8.6239, "step": 142570 }, { "epoch": 0.7120277659866663, "grad_norm": 0.08438628166913986, "learning_rate": 8.660792510450824e-06, "loss": 8.6296, "step": 142580 }, { "epoch": 0.7120777048116058, "grad_norm": 0.08993806689977646, "learning_rate": 8.659290595509274e-06, "loss": 8.6178, "step": 142590 }, { "epoch": 0.7121276436365452, "grad_norm": 0.08885114639997482, "learning_rate": 8.657788680567725e-06, "loss": 8.6262, "step": 142600 }, { "epoch": 0.7121775824614847, "grad_norm": 0.09332863986492157, "learning_rate": 8.656286765626173e-06, "loss": 8.6335, "step": 142610 }, { "epoch": 0.7122275212864241, "grad_norm": 0.09431906044483185, "learning_rate": 8.654784850684623e-06, "loss": 8.6263, "step": 142620 }, { "epoch": 0.7122774601113636, "grad_norm": 0.09060939401388168, "learning_rate": 8.653282935743072e-06, "loss": 8.621, "step": 142630 }, { "epoch": 0.712327398936303, "grad_norm": 0.09892533719539642, "learning_rate": 8.651781020801522e-06, "loss": 8.6007, "step": 142640 }, { "epoch": 0.7123773377612425, "grad_norm": 0.09287749230861664, "learning_rate": 8.650279105859972e-06, "loss": 8.6396, "step": 142650 }, { "epoch": 0.7124272765861819, "grad_norm": 0.09754089266061783, "learning_rate": 8.64877719091842e-06, "loss": 8.6199, "step": 142660 }, { "epoch": 0.7124772154111214, "grad_norm": 0.09147115051746368, "learning_rate": 8.64727527597687e-06, "loss": 8.6157, "step": 142670 }, { "epoch": 0.7125271542360608, "grad_norm": 0.09473909437656403, "learning_rate": 8.64577336103532e-06, "loss": 8.6091, "step": 142680 }, { "epoch": 0.7125770930610003, "grad_norm": 0.08881004899740219, "learning_rate": 8.64427144609377e-06, "loss": 8.6372, "step": 142690 }, { "epoch": 0.7126270318859397, "grad_norm": 0.08714466542005539, "learning_rate": 8.64276953115222e-06, "loss": 8.6302, "step": 142700 }, { "epoch": 0.7126769707108792, "grad_norm": 0.08891968429088593, "learning_rate": 8.64126761621067e-06, "loss": 8.6191, "step": 142710 }, { "epoch": 0.7127269095358186, "grad_norm": 0.09196534752845764, "learning_rate": 8.639765701269118e-06, "loss": 8.62, "step": 142720 }, { "epoch": 0.712776848360758, "grad_norm": 0.08720948547124863, "learning_rate": 8.638263786327568e-06, "loss": 8.6318, "step": 142730 }, { "epoch": 0.7128267871856975, "grad_norm": 0.0971934124827385, "learning_rate": 8.636761871386017e-06, "loss": 8.6121, "step": 142740 }, { "epoch": 0.712876726010637, "grad_norm": 0.09166713804006577, "learning_rate": 8.635259956444467e-06, "loss": 8.6297, "step": 142750 }, { "epoch": 0.7129266648355764, "grad_norm": 0.09517423063516617, "learning_rate": 8.633758041502917e-06, "loss": 8.6293, "step": 142760 }, { "epoch": 0.7129766036605159, "grad_norm": 0.09297715872526169, "learning_rate": 8.632256126561366e-06, "loss": 8.6296, "step": 142770 }, { "epoch": 0.7130265424854553, "grad_norm": 0.09390052407979965, "learning_rate": 8.630754211619816e-06, "loss": 8.6186, "step": 142780 }, { "epoch": 0.7130764813103948, "grad_norm": 0.09712127596139908, "learning_rate": 8.629252296678264e-06, "loss": 8.6155, "step": 142790 }, { "epoch": 0.7131264201353342, "grad_norm": 0.0925319716334343, "learning_rate": 8.627750381736715e-06, "loss": 8.6215, "step": 142800 }, { "epoch": 0.7131763589602736, "grad_norm": 0.09186070412397385, "learning_rate": 8.626248466795165e-06, "loss": 8.6158, "step": 142810 }, { "epoch": 0.7132262977852131, "grad_norm": 0.09534446150064468, "learning_rate": 8.624746551853613e-06, "loss": 8.624, "step": 142820 }, { "epoch": 0.7132762366101526, "grad_norm": 0.09688442945480347, "learning_rate": 8.623244636912063e-06, "loss": 8.5915, "step": 142830 }, { "epoch": 0.713326175435092, "grad_norm": 0.0908428207039833, "learning_rate": 8.621742721970512e-06, "loss": 8.6246, "step": 142840 }, { "epoch": 0.7133761142600314, "grad_norm": 0.08914895355701447, "learning_rate": 8.620240807028962e-06, "loss": 8.6274, "step": 142850 }, { "epoch": 0.7134260530849709, "grad_norm": 0.0967710018157959, "learning_rate": 8.618738892087412e-06, "loss": 8.6185, "step": 142860 }, { "epoch": 0.7134759919099104, "grad_norm": 0.09269560873508453, "learning_rate": 8.617236977145863e-06, "loss": 8.6148, "step": 142870 }, { "epoch": 0.7135259307348498, "grad_norm": 0.09392331540584564, "learning_rate": 8.615735062204311e-06, "loss": 8.6251, "step": 142880 }, { "epoch": 0.7135758695597892, "grad_norm": 0.08788944780826569, "learning_rate": 8.61423314726276e-06, "loss": 8.6238, "step": 142890 }, { "epoch": 0.7136258083847287, "grad_norm": 0.09034030884504318, "learning_rate": 8.61273123232121e-06, "loss": 8.622, "step": 142900 }, { "epoch": 0.7136757472096682, "grad_norm": 0.08973328024148941, "learning_rate": 8.61122931737966e-06, "loss": 8.6273, "step": 142910 }, { "epoch": 0.7137256860346076, "grad_norm": 0.08710794895887375, "learning_rate": 8.60972740243811e-06, "loss": 8.6138, "step": 142920 }, { "epoch": 0.713775624859547, "grad_norm": 0.09544447809457779, "learning_rate": 8.608225487496558e-06, "loss": 8.6187, "step": 142930 }, { "epoch": 0.7138255636844865, "grad_norm": 0.09114502370357513, "learning_rate": 8.606723572555007e-06, "loss": 8.6234, "step": 142940 }, { "epoch": 0.713875502509426, "grad_norm": 0.09414034336805344, "learning_rate": 8.605221657613457e-06, "loss": 8.6191, "step": 142950 }, { "epoch": 0.7139254413343654, "grad_norm": 0.08966190367937088, "learning_rate": 8.603719742671907e-06, "loss": 8.6134, "step": 142960 }, { "epoch": 0.7139753801593048, "grad_norm": 0.08923894166946411, "learning_rate": 8.602217827730358e-06, "loss": 8.6135, "step": 142970 }, { "epoch": 0.7140253189842443, "grad_norm": 0.09734483063220978, "learning_rate": 8.600715912788806e-06, "loss": 8.6281, "step": 142980 }, { "epoch": 0.7140752578091838, "grad_norm": 0.09202791005373001, "learning_rate": 8.599213997847254e-06, "loss": 8.6208, "step": 142990 }, { "epoch": 0.7141251966341232, "grad_norm": 0.09096809476613998, "learning_rate": 8.597712082905705e-06, "loss": 8.6286, "step": 143000 }, { "epoch": 0.7141751354590626, "grad_norm": 0.09096328914165497, "learning_rate": 8.596210167964155e-06, "loss": 8.6242, "step": 143010 }, { "epoch": 0.7142250742840021, "grad_norm": 0.09118296205997467, "learning_rate": 8.594708253022605e-06, "loss": 8.6227, "step": 143020 }, { "epoch": 0.7142750131089416, "grad_norm": 0.09323076903820038, "learning_rate": 8.593206338081055e-06, "loss": 8.6223, "step": 143030 }, { "epoch": 0.714324951933881, "grad_norm": 0.08754134923219681, "learning_rate": 8.591704423139502e-06, "loss": 8.6249, "step": 143040 }, { "epoch": 0.7143748907588204, "grad_norm": 0.09009493887424469, "learning_rate": 8.590202508197952e-06, "loss": 8.6254, "step": 143050 }, { "epoch": 0.7144248295837599, "grad_norm": 0.09118343889713287, "learning_rate": 8.588700593256402e-06, "loss": 8.6078, "step": 143060 }, { "epoch": 0.7144747684086994, "grad_norm": 0.0941152274608612, "learning_rate": 8.587198678314853e-06, "loss": 8.6298, "step": 143070 }, { "epoch": 0.7145247072336388, "grad_norm": 0.08993294835090637, "learning_rate": 8.585696763373303e-06, "loss": 8.6334, "step": 143080 }, { "epoch": 0.7145746460585782, "grad_norm": 0.0892404243350029, "learning_rate": 8.58419484843175e-06, "loss": 8.5978, "step": 143090 }, { "epoch": 0.7146245848835177, "grad_norm": 0.08918585628271103, "learning_rate": 8.5826929334902e-06, "loss": 8.6161, "step": 143100 }, { "epoch": 0.7146745237084572, "grad_norm": 0.08921711891889572, "learning_rate": 8.58119101854865e-06, "loss": 8.6231, "step": 143110 }, { "epoch": 0.7147244625333966, "grad_norm": 0.10099107027053833, "learning_rate": 8.5796891036071e-06, "loss": 8.6362, "step": 143120 }, { "epoch": 0.714774401358336, "grad_norm": 0.09065527468919754, "learning_rate": 8.57818718866555e-06, "loss": 8.6122, "step": 143130 }, { "epoch": 0.7148243401832755, "grad_norm": 0.09168516844511032, "learning_rate": 8.576685273723997e-06, "loss": 8.6107, "step": 143140 }, { "epoch": 0.714874279008215, "grad_norm": 0.0854279100894928, "learning_rate": 8.575183358782447e-06, "loss": 8.6273, "step": 143150 }, { "epoch": 0.7149242178331544, "grad_norm": 0.08854588121175766, "learning_rate": 8.573681443840897e-06, "loss": 8.6061, "step": 143160 }, { "epoch": 0.7149741566580938, "grad_norm": 0.0906798392534256, "learning_rate": 8.572179528899348e-06, "loss": 8.6264, "step": 143170 }, { "epoch": 0.7150240954830333, "grad_norm": 0.09377674758434296, "learning_rate": 8.570677613957798e-06, "loss": 8.6236, "step": 143180 }, { "epoch": 0.7150740343079728, "grad_norm": 0.09252772480249405, "learning_rate": 8.569175699016246e-06, "loss": 8.6202, "step": 143190 }, { "epoch": 0.7151239731329122, "grad_norm": 0.09276933968067169, "learning_rate": 8.567673784074695e-06, "loss": 8.6067, "step": 143200 }, { "epoch": 0.7151739119578516, "grad_norm": 0.09133142232894897, "learning_rate": 8.566171869133145e-06, "loss": 8.6154, "step": 143210 }, { "epoch": 0.715223850782791, "grad_norm": 0.09052088111639023, "learning_rate": 8.564669954191595e-06, "loss": 8.6426, "step": 143220 }, { "epoch": 0.7152737896077306, "grad_norm": 0.0909975916147232, "learning_rate": 8.563168039250045e-06, "loss": 8.6208, "step": 143230 }, { "epoch": 0.71532372843267, "grad_norm": 0.08892220258712769, "learning_rate": 8.561666124308494e-06, "loss": 8.61, "step": 143240 }, { "epoch": 0.7153736672576094, "grad_norm": 0.09050557017326355, "learning_rate": 8.560164209366942e-06, "loss": 8.6121, "step": 143250 }, { "epoch": 0.7154236060825488, "grad_norm": 0.09054877609014511, "learning_rate": 8.558662294425392e-06, "loss": 8.6173, "step": 143260 }, { "epoch": 0.7154735449074884, "grad_norm": 0.09878473728895187, "learning_rate": 8.557160379483843e-06, "loss": 8.611, "step": 143270 }, { "epoch": 0.7155234837324278, "grad_norm": 0.08595815300941467, "learning_rate": 8.555658464542293e-06, "loss": 8.6321, "step": 143280 }, { "epoch": 0.7155734225573672, "grad_norm": 0.0915842056274414, "learning_rate": 8.554156549600741e-06, "loss": 8.6161, "step": 143290 }, { "epoch": 0.7156233613823066, "grad_norm": 0.08792208880186081, "learning_rate": 8.55265463465919e-06, "loss": 8.6099, "step": 143300 }, { "epoch": 0.7156733002072462, "grad_norm": 0.0902579203248024, "learning_rate": 8.55115271971764e-06, "loss": 8.6081, "step": 143310 }, { "epoch": 0.7157232390321856, "grad_norm": 0.09018343687057495, "learning_rate": 8.54965080477609e-06, "loss": 8.6206, "step": 143320 }, { "epoch": 0.715773177857125, "grad_norm": 0.09098510444164276, "learning_rate": 8.54814888983454e-06, "loss": 8.6374, "step": 143330 }, { "epoch": 0.7158231166820644, "grad_norm": 0.09270698577165604, "learning_rate": 8.546646974892989e-06, "loss": 8.6342, "step": 143340 }, { "epoch": 0.715873055507004, "grad_norm": 0.09691673517227173, "learning_rate": 8.545145059951439e-06, "loss": 8.6208, "step": 143350 }, { "epoch": 0.7159229943319434, "grad_norm": 0.09676571935415268, "learning_rate": 8.543643145009887e-06, "loss": 8.6159, "step": 143360 }, { "epoch": 0.7159729331568828, "grad_norm": 0.08486426621675491, "learning_rate": 8.542141230068338e-06, "loss": 8.6335, "step": 143370 }, { "epoch": 0.7160228719818222, "grad_norm": 0.09273499995470047, "learning_rate": 8.540639315126788e-06, "loss": 8.62, "step": 143380 }, { "epoch": 0.7160728108067617, "grad_norm": 0.09392505139112473, "learning_rate": 8.539137400185236e-06, "loss": 8.6318, "step": 143390 }, { "epoch": 0.7161227496317012, "grad_norm": 0.08955458551645279, "learning_rate": 8.537635485243686e-06, "loss": 8.6154, "step": 143400 }, { "epoch": 0.7161726884566406, "grad_norm": 0.08472466468811035, "learning_rate": 8.536133570302135e-06, "loss": 8.6181, "step": 143410 }, { "epoch": 0.71622262728158, "grad_norm": 0.08813430368900299, "learning_rate": 8.534631655360585e-06, "loss": 8.6169, "step": 143420 }, { "epoch": 0.7162725661065195, "grad_norm": 0.09203272312879562, "learning_rate": 8.533129740419035e-06, "loss": 8.6113, "step": 143430 }, { "epoch": 0.716322504931459, "grad_norm": 0.08561133593320847, "learning_rate": 8.531627825477484e-06, "loss": 8.6069, "step": 143440 }, { "epoch": 0.7163724437563984, "grad_norm": 0.08707552403211594, "learning_rate": 8.530125910535934e-06, "loss": 8.6089, "step": 143450 }, { "epoch": 0.7164223825813378, "grad_norm": 0.09338618814945221, "learning_rate": 8.528623995594382e-06, "loss": 8.6026, "step": 143460 }, { "epoch": 0.7164723214062773, "grad_norm": 0.08720285445451736, "learning_rate": 8.527122080652833e-06, "loss": 8.6415, "step": 143470 }, { "epoch": 0.7165222602312168, "grad_norm": 0.09213987737894058, "learning_rate": 8.525620165711283e-06, "loss": 8.6261, "step": 143480 }, { "epoch": 0.7165721990561562, "grad_norm": 0.09045135974884033, "learning_rate": 8.524118250769731e-06, "loss": 8.6133, "step": 143490 }, { "epoch": 0.7166221378810956, "grad_norm": 0.08416689932346344, "learning_rate": 8.522616335828181e-06, "loss": 8.6213, "step": 143500 }, { "epoch": 0.716672076706035, "grad_norm": 0.09071792662143707, "learning_rate": 8.52111442088663e-06, "loss": 8.606, "step": 143510 }, { "epoch": 0.7167220155309746, "grad_norm": 0.08766292035579681, "learning_rate": 8.51961250594508e-06, "loss": 8.6364, "step": 143520 }, { "epoch": 0.716771954355914, "grad_norm": 0.09889094531536102, "learning_rate": 8.51811059100353e-06, "loss": 8.6084, "step": 143530 }, { "epoch": 0.7168218931808534, "grad_norm": 0.09228768199682236, "learning_rate": 8.516608676061979e-06, "loss": 8.617, "step": 143540 }, { "epoch": 0.7168718320057929, "grad_norm": 0.08915939927101135, "learning_rate": 8.515106761120429e-06, "loss": 8.6044, "step": 143550 }, { "epoch": 0.7169217708307324, "grad_norm": 0.08671806752681732, "learning_rate": 8.513604846178879e-06, "loss": 8.6338, "step": 143560 }, { "epoch": 0.7169717096556718, "grad_norm": 0.090422123670578, "learning_rate": 8.512102931237328e-06, "loss": 8.611, "step": 143570 }, { "epoch": 0.7170216484806112, "grad_norm": 0.08737920969724655, "learning_rate": 8.510601016295778e-06, "loss": 8.6161, "step": 143580 }, { "epoch": 0.7170715873055507, "grad_norm": 0.08856778591871262, "learning_rate": 8.509099101354226e-06, "loss": 8.63, "step": 143590 }, { "epoch": 0.7171215261304902, "grad_norm": 0.0872621163725853, "learning_rate": 8.507597186412676e-06, "loss": 8.6203, "step": 143600 }, { "epoch": 0.7171714649554296, "grad_norm": 0.08896245807409286, "learning_rate": 8.506095271471127e-06, "loss": 8.6228, "step": 143610 }, { "epoch": 0.717221403780369, "grad_norm": 0.09879324585199356, "learning_rate": 8.504593356529575e-06, "loss": 8.6155, "step": 143620 }, { "epoch": 0.7172713426053084, "grad_norm": 0.08767727017402649, "learning_rate": 8.503091441588025e-06, "loss": 8.6018, "step": 143630 }, { "epoch": 0.717321281430248, "grad_norm": 0.0934598445892334, "learning_rate": 8.501589526646474e-06, "loss": 8.6157, "step": 143640 }, { "epoch": 0.7173712202551874, "grad_norm": 0.09154853224754333, "learning_rate": 8.500087611704924e-06, "loss": 8.618, "step": 143650 }, { "epoch": 0.7174211590801268, "grad_norm": 0.09346166253089905, "learning_rate": 8.498585696763374e-06, "loss": 8.6144, "step": 143660 }, { "epoch": 0.7174710979050662, "grad_norm": 0.09311819821596146, "learning_rate": 8.497083781821823e-06, "loss": 8.6105, "step": 143670 }, { "epoch": 0.7175210367300058, "grad_norm": 0.08571168035268784, "learning_rate": 8.495581866880273e-06, "loss": 8.6284, "step": 143680 }, { "epoch": 0.7175709755549452, "grad_norm": 0.09284307062625885, "learning_rate": 8.494079951938721e-06, "loss": 8.6317, "step": 143690 }, { "epoch": 0.7176209143798846, "grad_norm": 0.08840735256671906, "learning_rate": 8.492578036997171e-06, "loss": 8.6275, "step": 143700 }, { "epoch": 0.717670853204824, "grad_norm": 0.09564638137817383, "learning_rate": 8.491076122055622e-06, "loss": 8.5992, "step": 143710 }, { "epoch": 0.7177207920297636, "grad_norm": 0.09461285173892975, "learning_rate": 8.489574207114072e-06, "loss": 8.6209, "step": 143720 }, { "epoch": 0.717770730854703, "grad_norm": 0.0862046629190445, "learning_rate": 8.48807229217252e-06, "loss": 8.6381, "step": 143730 }, { "epoch": 0.7178206696796424, "grad_norm": 0.0956760123372078, "learning_rate": 8.486570377230969e-06, "loss": 8.6162, "step": 143740 }, { "epoch": 0.7178706085045818, "grad_norm": 0.09319616854190826, "learning_rate": 8.485068462289419e-06, "loss": 8.617, "step": 143750 }, { "epoch": 0.7179205473295214, "grad_norm": 0.08868087083101273, "learning_rate": 8.483566547347869e-06, "loss": 8.6159, "step": 143760 }, { "epoch": 0.7179704861544608, "grad_norm": 0.0888964906334877, "learning_rate": 8.48206463240632e-06, "loss": 8.6165, "step": 143770 }, { "epoch": 0.7180204249794002, "grad_norm": 0.0897190198302269, "learning_rate": 8.480562717464768e-06, "loss": 8.6291, "step": 143780 }, { "epoch": 0.7180703638043396, "grad_norm": 0.0912693589925766, "learning_rate": 8.479060802523216e-06, "loss": 8.6018, "step": 143790 }, { "epoch": 0.7181203026292792, "grad_norm": 0.09534019976854324, "learning_rate": 8.477558887581666e-06, "loss": 8.6268, "step": 143800 }, { "epoch": 0.7181702414542186, "grad_norm": 0.0912948027253151, "learning_rate": 8.476056972640117e-06, "loss": 8.6165, "step": 143810 }, { "epoch": 0.718220180279158, "grad_norm": 0.09484043717384338, "learning_rate": 8.474555057698567e-06, "loss": 8.6171, "step": 143820 }, { "epoch": 0.7182701191040974, "grad_norm": 0.09343184530735016, "learning_rate": 8.473053142757015e-06, "loss": 8.6158, "step": 143830 }, { "epoch": 0.718320057929037, "grad_norm": 0.09978407621383667, "learning_rate": 8.471551227815464e-06, "loss": 8.6112, "step": 143840 }, { "epoch": 0.7183699967539764, "grad_norm": 0.09003512561321259, "learning_rate": 8.470049312873914e-06, "loss": 8.6293, "step": 143850 }, { "epoch": 0.7184199355789158, "grad_norm": 0.0940660908818245, "learning_rate": 8.468547397932364e-06, "loss": 8.6131, "step": 143860 }, { "epoch": 0.7184698744038552, "grad_norm": 0.09280209988355637, "learning_rate": 8.467045482990814e-06, "loss": 8.6309, "step": 143870 }, { "epoch": 0.7185198132287948, "grad_norm": 0.0923769474029541, "learning_rate": 8.465543568049264e-06, "loss": 8.6371, "step": 143880 }, { "epoch": 0.7185697520537342, "grad_norm": 0.09040191769599915, "learning_rate": 8.464041653107711e-06, "loss": 8.6111, "step": 143890 }, { "epoch": 0.7186196908786736, "grad_norm": 0.09328994154930115, "learning_rate": 8.462539738166161e-06, "loss": 8.6138, "step": 143900 }, { "epoch": 0.718669629703613, "grad_norm": 0.0912991613149643, "learning_rate": 8.461037823224612e-06, "loss": 8.601, "step": 143910 }, { "epoch": 0.7187195685285526, "grad_norm": 0.09061001241207123, "learning_rate": 8.459535908283062e-06, "loss": 8.6132, "step": 143920 }, { "epoch": 0.718769507353492, "grad_norm": 0.09003961086273193, "learning_rate": 8.458033993341512e-06, "loss": 8.6153, "step": 143930 }, { "epoch": 0.7188194461784314, "grad_norm": 0.09332337230443954, "learning_rate": 8.456532078399959e-06, "loss": 8.6154, "step": 143940 }, { "epoch": 0.7188693850033708, "grad_norm": 0.09105270355939865, "learning_rate": 8.455030163458409e-06, "loss": 8.6062, "step": 143950 }, { "epoch": 0.7189193238283104, "grad_norm": 0.0938153937458992, "learning_rate": 8.453528248516859e-06, "loss": 8.6181, "step": 143960 }, { "epoch": 0.7189692626532498, "grad_norm": 0.08828999102115631, "learning_rate": 8.45202633357531e-06, "loss": 8.6192, "step": 143970 }, { "epoch": 0.7190192014781892, "grad_norm": 0.09276334941387177, "learning_rate": 8.45052441863376e-06, "loss": 8.609, "step": 143980 }, { "epoch": 0.7190691403031286, "grad_norm": 0.09385494142770767, "learning_rate": 8.449022503692206e-06, "loss": 8.6149, "step": 143990 }, { "epoch": 0.7191190791280682, "grad_norm": 0.09120655804872513, "learning_rate": 8.447520588750656e-06, "loss": 8.6296, "step": 144000 }, { "epoch": 0.7191690179530076, "grad_norm": 0.08751309663057327, "learning_rate": 8.446018673809107e-06, "loss": 8.6187, "step": 144010 }, { "epoch": 0.719218956777947, "grad_norm": 0.08984875679016113, "learning_rate": 8.444516758867557e-06, "loss": 8.627, "step": 144020 }, { "epoch": 0.7192688956028864, "grad_norm": 0.08760925382375717, "learning_rate": 8.443014843926007e-06, "loss": 8.6132, "step": 144030 }, { "epoch": 0.719318834427826, "grad_norm": 0.09052575379610062, "learning_rate": 8.441512928984455e-06, "loss": 8.6184, "step": 144040 }, { "epoch": 0.7193687732527654, "grad_norm": 0.08988479524850845, "learning_rate": 8.440011014042904e-06, "loss": 8.6226, "step": 144050 }, { "epoch": 0.7194187120777048, "grad_norm": 0.0874130055308342, "learning_rate": 8.438509099101354e-06, "loss": 8.6209, "step": 144060 }, { "epoch": 0.7194686509026442, "grad_norm": 0.09357550740242004, "learning_rate": 8.437007184159804e-06, "loss": 8.5982, "step": 144070 }, { "epoch": 0.7195185897275838, "grad_norm": 0.0861838087439537, "learning_rate": 8.435505269218254e-06, "loss": 8.6101, "step": 144080 }, { "epoch": 0.7195685285525232, "grad_norm": 0.0887862890958786, "learning_rate": 8.434003354276703e-06, "loss": 8.6, "step": 144090 }, { "epoch": 0.7196184673774626, "grad_norm": 0.09258834272623062, "learning_rate": 8.432501439335151e-06, "loss": 8.6318, "step": 144100 }, { "epoch": 0.719668406202402, "grad_norm": 0.0883084088563919, "learning_rate": 8.430999524393602e-06, "loss": 8.6222, "step": 144110 }, { "epoch": 0.7197183450273416, "grad_norm": 0.09339895844459534, "learning_rate": 8.429497609452052e-06, "loss": 8.6054, "step": 144120 }, { "epoch": 0.719768283852281, "grad_norm": 0.0934942364692688, "learning_rate": 8.427995694510502e-06, "loss": 8.6172, "step": 144130 }, { "epoch": 0.7198182226772204, "grad_norm": 0.09863001853227615, "learning_rate": 8.42649377956895e-06, "loss": 8.6149, "step": 144140 }, { "epoch": 0.7198681615021598, "grad_norm": 0.08822454512119293, "learning_rate": 8.424991864627399e-06, "loss": 8.614, "step": 144150 }, { "epoch": 0.7199181003270994, "grad_norm": 0.08899369835853577, "learning_rate": 8.423489949685849e-06, "loss": 8.6169, "step": 144160 }, { "epoch": 0.7199680391520388, "grad_norm": 0.08983699232339859, "learning_rate": 8.4219880347443e-06, "loss": 8.606, "step": 144170 }, { "epoch": 0.7200179779769782, "grad_norm": 0.092051200568676, "learning_rate": 8.42048611980275e-06, "loss": 8.6196, "step": 144180 }, { "epoch": 0.7200679168019176, "grad_norm": 0.09895046800374985, "learning_rate": 8.418984204861198e-06, "loss": 8.6161, "step": 144190 }, { "epoch": 0.7201178556268572, "grad_norm": 0.09208567440509796, "learning_rate": 8.417482289919648e-06, "loss": 8.6099, "step": 144200 }, { "epoch": 0.7201677944517966, "grad_norm": 0.08995640277862549, "learning_rate": 8.415980374978097e-06, "loss": 8.626, "step": 144210 }, { "epoch": 0.720217733276736, "grad_norm": 0.08942251652479172, "learning_rate": 8.414478460036547e-06, "loss": 8.614, "step": 144220 }, { "epoch": 0.7202676721016754, "grad_norm": 0.08955737203359604, "learning_rate": 8.412976545094997e-06, "loss": 8.6169, "step": 144230 }, { "epoch": 0.720317610926615, "grad_norm": 0.08591482788324356, "learning_rate": 8.411474630153445e-06, "loss": 8.5999, "step": 144240 }, { "epoch": 0.7203675497515544, "grad_norm": 0.09663660079240799, "learning_rate": 8.409972715211896e-06, "loss": 8.6279, "step": 144250 }, { "epoch": 0.7204174885764938, "grad_norm": 0.10267946124076843, "learning_rate": 8.408470800270344e-06, "loss": 8.5984, "step": 144260 }, { "epoch": 0.7204674274014332, "grad_norm": 0.08785830438137054, "learning_rate": 8.406968885328794e-06, "loss": 8.6214, "step": 144270 }, { "epoch": 0.7205173662263727, "grad_norm": 0.0935339480638504, "learning_rate": 8.405466970387244e-06, "loss": 8.6095, "step": 144280 }, { "epoch": 0.7205673050513122, "grad_norm": 0.08838778734207153, "learning_rate": 8.403965055445693e-06, "loss": 8.6216, "step": 144290 }, { "epoch": 0.7206172438762516, "grad_norm": 0.09756764769554138, "learning_rate": 8.402463140504143e-06, "loss": 8.61, "step": 144300 }, { "epoch": 0.720667182701191, "grad_norm": 0.0923360213637352, "learning_rate": 8.400961225562592e-06, "loss": 8.5908, "step": 144310 }, { "epoch": 0.7207171215261305, "grad_norm": 0.08914266526699066, "learning_rate": 8.399459310621042e-06, "loss": 8.5996, "step": 144320 }, { "epoch": 0.72076706035107, "grad_norm": 0.09269039332866669, "learning_rate": 8.397957395679492e-06, "loss": 8.63, "step": 144330 }, { "epoch": 0.7208169991760094, "grad_norm": 0.09222870320081711, "learning_rate": 8.39645548073794e-06, "loss": 8.6083, "step": 144340 }, { "epoch": 0.7208669380009488, "grad_norm": 0.09075245261192322, "learning_rate": 8.39495356579639e-06, "loss": 8.6032, "step": 144350 }, { "epoch": 0.7209168768258883, "grad_norm": 0.0898914784193039, "learning_rate": 8.39345165085484e-06, "loss": 8.5895, "step": 144360 }, { "epoch": 0.7209668156508278, "grad_norm": 0.08764638006687164, "learning_rate": 8.39194973591329e-06, "loss": 8.5997, "step": 144370 }, { "epoch": 0.7210167544757672, "grad_norm": 0.08820385485887527, "learning_rate": 8.39044782097174e-06, "loss": 8.6189, "step": 144380 }, { "epoch": 0.7210666933007066, "grad_norm": 0.08908724784851074, "learning_rate": 8.388945906030188e-06, "loss": 8.6078, "step": 144390 }, { "epoch": 0.721116632125646, "grad_norm": 0.0920150950551033, "learning_rate": 8.387443991088638e-06, "loss": 8.6197, "step": 144400 }, { "epoch": 0.7211665709505856, "grad_norm": 0.09331239759922028, "learning_rate": 8.385942076147088e-06, "loss": 8.6267, "step": 144410 }, { "epoch": 0.721216509775525, "grad_norm": 0.08835045248270035, "learning_rate": 8.384440161205537e-06, "loss": 8.599, "step": 144420 }, { "epoch": 0.7212664486004644, "grad_norm": 0.0986829549074173, "learning_rate": 8.382938246263987e-06, "loss": 8.6299, "step": 144430 }, { "epoch": 0.7213163874254038, "grad_norm": 0.09488464146852493, "learning_rate": 8.381436331322435e-06, "loss": 8.6146, "step": 144440 }, { "epoch": 0.7213663262503434, "grad_norm": 0.09445064514875412, "learning_rate": 8.379934416380886e-06, "loss": 8.6191, "step": 144450 }, { "epoch": 0.7214162650752828, "grad_norm": 0.10629602521657944, "learning_rate": 8.378432501439336e-06, "loss": 8.6149, "step": 144460 }, { "epoch": 0.7214662039002222, "grad_norm": 0.09508983045816422, "learning_rate": 8.376930586497784e-06, "loss": 8.6109, "step": 144470 }, { "epoch": 0.7215161427251616, "grad_norm": 0.0893370732665062, "learning_rate": 8.375428671556234e-06, "loss": 8.6295, "step": 144480 }, { "epoch": 0.7215660815501012, "grad_norm": 0.09526433795690536, "learning_rate": 8.373926756614683e-06, "loss": 8.6181, "step": 144490 }, { "epoch": 0.7216160203750406, "grad_norm": 0.08652014285326004, "learning_rate": 8.372424841673133e-06, "loss": 8.6274, "step": 144500 }, { "epoch": 0.72166595919998, "grad_norm": 0.09043004363775253, "learning_rate": 8.370922926731583e-06, "loss": 8.6233, "step": 144510 }, { "epoch": 0.7217158980249194, "grad_norm": 0.09349585324525833, "learning_rate": 8.369421011790034e-06, "loss": 8.6119, "step": 144520 }, { "epoch": 0.721765836849859, "grad_norm": 0.09839218109846115, "learning_rate": 8.367919096848482e-06, "loss": 8.5964, "step": 144530 }, { "epoch": 0.7218157756747984, "grad_norm": 0.09267604351043701, "learning_rate": 8.36641718190693e-06, "loss": 8.6024, "step": 144540 }, { "epoch": 0.7218657144997378, "grad_norm": 0.08900393545627594, "learning_rate": 8.36491526696538e-06, "loss": 8.6205, "step": 144550 }, { "epoch": 0.7219156533246772, "grad_norm": 0.08693215250968933, "learning_rate": 8.36341335202383e-06, "loss": 8.6103, "step": 144560 }, { "epoch": 0.7219655921496168, "grad_norm": 0.09515117853879929, "learning_rate": 8.361911437082281e-06, "loss": 8.6159, "step": 144570 }, { "epoch": 0.7220155309745562, "grad_norm": 0.08883559703826904, "learning_rate": 8.36040952214073e-06, "loss": 8.6283, "step": 144580 }, { "epoch": 0.7220654697994956, "grad_norm": 0.0927378311753273, "learning_rate": 8.358907607199178e-06, "loss": 8.6171, "step": 144590 }, { "epoch": 0.722115408624435, "grad_norm": 0.09397240728139877, "learning_rate": 8.357405692257628e-06, "loss": 8.6131, "step": 144600 }, { "epoch": 0.7221653474493746, "grad_norm": 0.0927707701921463, "learning_rate": 8.355903777316078e-06, "loss": 8.6159, "step": 144610 }, { "epoch": 0.722215286274314, "grad_norm": 0.09005044400691986, "learning_rate": 8.354401862374529e-06, "loss": 8.6134, "step": 144620 }, { "epoch": 0.7222652250992534, "grad_norm": 0.08939428627490997, "learning_rate": 8.352899947432977e-06, "loss": 8.6081, "step": 144630 }, { "epoch": 0.7223151639241928, "grad_norm": 0.09121254086494446, "learning_rate": 8.351398032491425e-06, "loss": 8.6085, "step": 144640 }, { "epoch": 0.7223651027491323, "grad_norm": 0.08655776083469391, "learning_rate": 8.349896117549876e-06, "loss": 8.6199, "step": 144650 }, { "epoch": 0.7224150415740718, "grad_norm": 0.09415041655302048, "learning_rate": 8.348394202608326e-06, "loss": 8.605, "step": 144660 }, { "epoch": 0.7224649803990112, "grad_norm": 0.09268126636743546, "learning_rate": 8.346892287666776e-06, "loss": 8.6162, "step": 144670 }, { "epoch": 0.7225149192239506, "grad_norm": 0.1028081476688385, "learning_rate": 8.345390372725226e-06, "loss": 8.6293, "step": 144680 }, { "epoch": 0.7225648580488901, "grad_norm": 0.09145328402519226, "learning_rate": 8.343888457783673e-06, "loss": 8.6075, "step": 144690 }, { "epoch": 0.7226147968738296, "grad_norm": 0.08995439857244492, "learning_rate": 8.342386542842123e-06, "loss": 8.5961, "step": 144700 }, { "epoch": 0.722664735698769, "grad_norm": 0.08796056360006332, "learning_rate": 8.340884627900573e-06, "loss": 8.6194, "step": 144710 }, { "epoch": 0.7227146745237084, "grad_norm": 0.0950382873415947, "learning_rate": 8.339382712959024e-06, "loss": 8.6196, "step": 144720 }, { "epoch": 0.7227646133486479, "grad_norm": 0.09233977645635605, "learning_rate": 8.337880798017474e-06, "loss": 8.6228, "step": 144730 }, { "epoch": 0.7228145521735874, "grad_norm": 0.09013422578573227, "learning_rate": 8.33637888307592e-06, "loss": 8.6222, "step": 144740 }, { "epoch": 0.7228644909985268, "grad_norm": 0.0920189619064331, "learning_rate": 8.33487696813437e-06, "loss": 8.6278, "step": 144750 }, { "epoch": 0.7229144298234662, "grad_norm": 0.09212841838598251, "learning_rate": 8.333375053192821e-06, "loss": 8.6109, "step": 144760 }, { "epoch": 0.7229643686484057, "grad_norm": 0.09851231426000595, "learning_rate": 8.331873138251271e-06, "loss": 8.6134, "step": 144770 }, { "epoch": 0.7230143074733452, "grad_norm": 0.09373413026332855, "learning_rate": 8.330371223309721e-06, "loss": 8.5994, "step": 144780 }, { "epoch": 0.7230642462982846, "grad_norm": 0.09392852336168289, "learning_rate": 8.328869308368168e-06, "loss": 8.628, "step": 144790 }, { "epoch": 0.723114185123224, "grad_norm": 0.09037362039089203, "learning_rate": 8.327367393426618e-06, "loss": 8.6152, "step": 144800 }, { "epoch": 0.7231641239481635, "grad_norm": 0.08910314738750458, "learning_rate": 8.325865478485068e-06, "loss": 8.5945, "step": 144810 }, { "epoch": 0.723214062773103, "grad_norm": 0.08930026739835739, "learning_rate": 8.324363563543519e-06, "loss": 8.61, "step": 144820 }, { "epoch": 0.7232640015980424, "grad_norm": 0.09371279925107956, "learning_rate": 8.322861648601969e-06, "loss": 8.6156, "step": 144830 }, { "epoch": 0.7233139404229818, "grad_norm": 0.0969967246055603, "learning_rate": 8.321359733660417e-06, "loss": 8.6191, "step": 144840 }, { "epoch": 0.7233638792479213, "grad_norm": 0.0957605391740799, "learning_rate": 8.319857818718866e-06, "loss": 8.6104, "step": 144850 }, { "epoch": 0.7234138180728608, "grad_norm": 0.09280975908041, "learning_rate": 8.318355903777316e-06, "loss": 8.6152, "step": 144860 }, { "epoch": 0.7234637568978002, "grad_norm": 0.09555871039628983, "learning_rate": 8.316853988835766e-06, "loss": 8.6135, "step": 144870 }, { "epoch": 0.7235136957227396, "grad_norm": 0.08848311007022858, "learning_rate": 8.315352073894216e-06, "loss": 8.6073, "step": 144880 }, { "epoch": 0.7235636345476791, "grad_norm": 0.09260237962007523, "learning_rate": 8.313850158952665e-06, "loss": 8.613, "step": 144890 }, { "epoch": 0.7236135733726186, "grad_norm": 0.08797964453697205, "learning_rate": 8.312348244011113e-06, "loss": 8.6129, "step": 144900 }, { "epoch": 0.723663512197558, "grad_norm": 0.09759566187858582, "learning_rate": 8.310846329069563e-06, "loss": 8.6059, "step": 144910 }, { "epoch": 0.7237134510224974, "grad_norm": 0.09639012068510056, "learning_rate": 8.309344414128014e-06, "loss": 8.607, "step": 144920 }, { "epoch": 0.7237633898474369, "grad_norm": 0.09098049253225327, "learning_rate": 8.307842499186464e-06, "loss": 8.6158, "step": 144930 }, { "epoch": 0.7238133286723764, "grad_norm": 0.08897264301776886, "learning_rate": 8.306340584244912e-06, "loss": 8.6348, "step": 144940 }, { "epoch": 0.7238632674973158, "grad_norm": 0.09100917726755142, "learning_rate": 8.30483866930336e-06, "loss": 8.6146, "step": 144950 }, { "epoch": 0.7239132063222552, "grad_norm": 0.0964989885687828, "learning_rate": 8.303336754361811e-06, "loss": 8.6144, "step": 144960 }, { "epoch": 0.7239631451471947, "grad_norm": 0.10053501278162003, "learning_rate": 8.301834839420261e-06, "loss": 8.6179, "step": 144970 }, { "epoch": 0.7240130839721342, "grad_norm": 0.09664203226566315, "learning_rate": 8.300332924478711e-06, "loss": 8.6196, "step": 144980 }, { "epoch": 0.7240630227970736, "grad_norm": 0.08678041398525238, "learning_rate": 8.298831009537161e-06, "loss": 8.6115, "step": 144990 }, { "epoch": 0.724112961622013, "grad_norm": 0.09090323746204376, "learning_rate": 8.29732909459561e-06, "loss": 8.6013, "step": 145000 }, { "epoch": 0.7241629004469525, "grad_norm": 0.09203115105628967, "learning_rate": 8.295827179654058e-06, "loss": 8.6139, "step": 145010 }, { "epoch": 0.724212839271892, "grad_norm": 0.0934581458568573, "learning_rate": 8.294325264712509e-06, "loss": 8.6223, "step": 145020 }, { "epoch": 0.7242627780968314, "grad_norm": 0.09179078042507172, "learning_rate": 8.292823349770959e-06, "loss": 8.6099, "step": 145030 }, { "epoch": 0.7243127169217708, "grad_norm": 0.09519582986831665, "learning_rate": 8.291321434829409e-06, "loss": 8.6186, "step": 145040 }, { "epoch": 0.7243626557467103, "grad_norm": 0.08919553458690643, "learning_rate": 8.289819519887857e-06, "loss": 8.607, "step": 145050 }, { "epoch": 0.7244125945716497, "grad_norm": 0.09014479070901871, "learning_rate": 8.288317604946306e-06, "loss": 8.5982, "step": 145060 }, { "epoch": 0.7244625333965892, "grad_norm": 0.0924357920885086, "learning_rate": 8.286815690004756e-06, "loss": 8.6198, "step": 145070 }, { "epoch": 0.7245124722215286, "grad_norm": 0.09133771061897278, "learning_rate": 8.285313775063206e-06, "loss": 8.6248, "step": 145080 }, { "epoch": 0.7245624110464681, "grad_norm": 0.08713619410991669, "learning_rate": 8.283811860121656e-06, "loss": 8.6429, "step": 145090 }, { "epoch": 0.7246123498714075, "grad_norm": 0.09331973642110825, "learning_rate": 8.282309945180105e-06, "loss": 8.6161, "step": 145100 }, { "epoch": 0.724662288696347, "grad_norm": 0.08959534019231796, "learning_rate": 8.280808030238553e-06, "loss": 8.6193, "step": 145110 }, { "epoch": 0.7247122275212864, "grad_norm": 0.09544025361537933, "learning_rate": 8.279306115297004e-06, "loss": 8.6113, "step": 145120 }, { "epoch": 0.7247621663462259, "grad_norm": 0.08845238387584686, "learning_rate": 8.277804200355454e-06, "loss": 8.6096, "step": 145130 }, { "epoch": 0.7248121051711653, "grad_norm": 0.09223693609237671, "learning_rate": 8.276302285413904e-06, "loss": 8.606, "step": 145140 }, { "epoch": 0.7248620439961048, "grad_norm": 0.09197333455085754, "learning_rate": 8.274800370472352e-06, "loss": 8.6065, "step": 145150 }, { "epoch": 0.7249119828210442, "grad_norm": 0.0903843566775322, "learning_rate": 8.273298455530803e-06, "loss": 8.6257, "step": 145160 }, { "epoch": 0.7249619216459837, "grad_norm": 0.08799325674772263, "learning_rate": 8.271796540589251e-06, "loss": 8.6078, "step": 145170 }, { "epoch": 0.7250118604709231, "grad_norm": 0.09171675890684128, "learning_rate": 8.270294625647701e-06, "loss": 8.6055, "step": 145180 }, { "epoch": 0.7250617992958626, "grad_norm": 0.09235136955976486, "learning_rate": 8.268792710706151e-06, "loss": 8.5921, "step": 145190 }, { "epoch": 0.725111738120802, "grad_norm": 0.09448470175266266, "learning_rate": 8.2672907957646e-06, "loss": 8.6096, "step": 145200 }, { "epoch": 0.7251616769457415, "grad_norm": 0.09224043041467667, "learning_rate": 8.26578888082305e-06, "loss": 8.6311, "step": 145210 }, { "epoch": 0.7252116157706809, "grad_norm": 0.09602584689855576, "learning_rate": 8.264286965881499e-06, "loss": 8.6032, "step": 145220 }, { "epoch": 0.7252615545956204, "grad_norm": 0.09572752565145493, "learning_rate": 8.262785050939949e-06, "loss": 8.6264, "step": 145230 }, { "epoch": 0.7253114934205598, "grad_norm": 0.08928846567869186, "learning_rate": 8.261283135998399e-06, "loss": 8.6099, "step": 145240 }, { "epoch": 0.7253614322454993, "grad_norm": 0.09179164469242096, "learning_rate": 8.259781221056847e-06, "loss": 8.6001, "step": 145250 }, { "epoch": 0.7254113710704387, "grad_norm": 0.09306824952363968, "learning_rate": 8.258279306115298e-06, "loss": 8.6105, "step": 145260 }, { "epoch": 0.7254613098953782, "grad_norm": 0.08858099579811096, "learning_rate": 8.256777391173746e-06, "loss": 8.6093, "step": 145270 }, { "epoch": 0.7255112487203176, "grad_norm": 0.09503430128097534, "learning_rate": 8.255275476232196e-06, "loss": 8.5933, "step": 145280 }, { "epoch": 0.7255611875452571, "grad_norm": 0.0941505953669548, "learning_rate": 8.253773561290646e-06, "loss": 8.6164, "step": 145290 }, { "epoch": 0.7256111263701965, "grad_norm": 0.0895562395453453, "learning_rate": 8.252271646349095e-06, "loss": 8.6136, "step": 145300 }, { "epoch": 0.725661065195136, "grad_norm": 0.0945998951792717, "learning_rate": 8.250769731407545e-06, "loss": 8.6132, "step": 145310 }, { "epoch": 0.7257110040200754, "grad_norm": 0.09146866202354431, "learning_rate": 8.249267816465995e-06, "loss": 8.6185, "step": 145320 }, { "epoch": 0.7257609428450149, "grad_norm": 0.09208477288484573, "learning_rate": 8.247765901524444e-06, "loss": 8.6098, "step": 145330 }, { "epoch": 0.7258108816699543, "grad_norm": 0.10175745189189911, "learning_rate": 8.246263986582894e-06, "loss": 8.607, "step": 145340 }, { "epoch": 0.7258608204948938, "grad_norm": 0.08928140252828598, "learning_rate": 8.244762071641342e-06, "loss": 8.611, "step": 145350 }, { "epoch": 0.7259107593198332, "grad_norm": 0.08949202299118042, "learning_rate": 8.243260156699793e-06, "loss": 8.6125, "step": 145360 }, { "epoch": 0.7259606981447726, "grad_norm": 0.10165835916996002, "learning_rate": 8.241758241758243e-06, "loss": 8.6136, "step": 145370 }, { "epoch": 0.7260106369697121, "grad_norm": 0.09630440920591354, "learning_rate": 8.240256326816691e-06, "loss": 8.6119, "step": 145380 }, { "epoch": 0.7260605757946516, "grad_norm": 0.09105534851551056, "learning_rate": 8.238754411875141e-06, "loss": 8.6037, "step": 145390 }, { "epoch": 0.726110514619591, "grad_norm": 0.09389536082744598, "learning_rate": 8.23725249693359e-06, "loss": 8.5986, "step": 145400 }, { "epoch": 0.7261604534445304, "grad_norm": 0.08979053050279617, "learning_rate": 8.23575058199204e-06, "loss": 8.6063, "step": 145410 }, { "epoch": 0.7262103922694699, "grad_norm": 0.09311927855014801, "learning_rate": 8.23424866705049e-06, "loss": 8.6059, "step": 145420 }, { "epoch": 0.7262603310944094, "grad_norm": 0.08796468377113342, "learning_rate": 8.232746752108939e-06, "loss": 8.6218, "step": 145430 }, { "epoch": 0.7263102699193488, "grad_norm": 0.0910877138376236, "learning_rate": 8.231244837167389e-06, "loss": 8.611, "step": 145440 }, { "epoch": 0.7263602087442882, "grad_norm": 0.09477691352367401, "learning_rate": 8.229742922225837e-06, "loss": 8.6215, "step": 145450 }, { "epoch": 0.7264101475692277, "grad_norm": 0.0948781967163086, "learning_rate": 8.228241007284288e-06, "loss": 8.5978, "step": 145460 }, { "epoch": 0.7264600863941671, "grad_norm": 0.08527876436710358, "learning_rate": 8.226739092342738e-06, "loss": 8.6191, "step": 145470 }, { "epoch": 0.7265100252191066, "grad_norm": 0.09014835208654404, "learning_rate": 8.225237177401188e-06, "loss": 8.6033, "step": 145480 }, { "epoch": 0.726559964044046, "grad_norm": 0.09688574820756912, "learning_rate": 8.223735262459636e-06, "loss": 8.6061, "step": 145490 }, { "epoch": 0.7266099028689855, "grad_norm": 0.08917857706546783, "learning_rate": 8.222233347518085e-06, "loss": 8.6284, "step": 145500 }, { "epoch": 0.726659841693925, "grad_norm": 0.09284509718418121, "learning_rate": 8.220731432576535e-06, "loss": 8.6115, "step": 145510 }, { "epoch": 0.7267097805188644, "grad_norm": 0.08809034526348114, "learning_rate": 8.219229517634985e-06, "loss": 8.6158, "step": 145520 }, { "epoch": 0.7267597193438038, "grad_norm": 0.09083345532417297, "learning_rate": 8.217727602693435e-06, "loss": 8.6272, "step": 145530 }, { "epoch": 0.7268096581687433, "grad_norm": 0.08907760679721832, "learning_rate": 8.216225687751884e-06, "loss": 8.6008, "step": 145540 }, { "epoch": 0.7268595969936827, "grad_norm": 0.08739221096038818, "learning_rate": 8.214723772810332e-06, "loss": 8.6271, "step": 145550 }, { "epoch": 0.7269095358186222, "grad_norm": 0.0876481756567955, "learning_rate": 8.213221857868783e-06, "loss": 8.6061, "step": 145560 }, { "epoch": 0.7269594746435616, "grad_norm": 0.08937806636095047, "learning_rate": 8.211719942927233e-06, "loss": 8.629, "step": 145570 }, { "epoch": 0.7270094134685011, "grad_norm": 0.08746001869440079, "learning_rate": 8.210218027985683e-06, "loss": 8.6011, "step": 145580 }, { "epoch": 0.7270593522934405, "grad_norm": 0.09080873429775238, "learning_rate": 8.208716113044131e-06, "loss": 8.6195, "step": 145590 }, { "epoch": 0.72710929111838, "grad_norm": 0.09297414124011993, "learning_rate": 8.20721419810258e-06, "loss": 8.6009, "step": 145600 }, { "epoch": 0.7271592299433194, "grad_norm": 0.09286045283079147, "learning_rate": 8.20571228316103e-06, "loss": 8.6177, "step": 145610 }, { "epoch": 0.7272091687682589, "grad_norm": 0.08479689061641693, "learning_rate": 8.20421036821948e-06, "loss": 8.6263, "step": 145620 }, { "epoch": 0.7272591075931983, "grad_norm": 0.09006743133068085, "learning_rate": 8.20270845327793e-06, "loss": 8.598, "step": 145630 }, { "epoch": 0.7273090464181378, "grad_norm": 0.09547232836484909, "learning_rate": 8.20120653833638e-06, "loss": 8.5996, "step": 145640 }, { "epoch": 0.7273589852430772, "grad_norm": 0.0883583128452301, "learning_rate": 8.199704623394827e-06, "loss": 8.6021, "step": 145650 }, { "epoch": 0.7274089240680167, "grad_norm": 0.09698734432458878, "learning_rate": 8.198202708453278e-06, "loss": 8.6079, "step": 145660 }, { "epoch": 0.7274588628929561, "grad_norm": 0.09151437133550644, "learning_rate": 8.196700793511728e-06, "loss": 8.6057, "step": 145670 }, { "epoch": 0.7275088017178956, "grad_norm": 0.09400162845849991, "learning_rate": 8.195198878570178e-06, "loss": 8.6039, "step": 145680 }, { "epoch": 0.727558740542835, "grad_norm": 0.08918856829404831, "learning_rate": 8.193696963628628e-06, "loss": 8.6045, "step": 145690 }, { "epoch": 0.7276086793677745, "grad_norm": 0.09551417827606201, "learning_rate": 8.192195048687075e-06, "loss": 8.6184, "step": 145700 }, { "epoch": 0.7276586181927139, "grad_norm": 0.09140796959400177, "learning_rate": 8.190693133745525e-06, "loss": 8.5989, "step": 145710 }, { "epoch": 0.7277085570176534, "grad_norm": 0.09093940258026123, "learning_rate": 8.189191218803975e-06, "loss": 8.6054, "step": 145720 }, { "epoch": 0.7277584958425928, "grad_norm": 0.08896015584468842, "learning_rate": 8.187689303862425e-06, "loss": 8.6201, "step": 145730 }, { "epoch": 0.7278084346675323, "grad_norm": 0.08995377272367477, "learning_rate": 8.186187388920876e-06, "loss": 8.6244, "step": 145740 }, { "epoch": 0.7278583734924717, "grad_norm": 0.08884330838918686, "learning_rate": 8.184685473979322e-06, "loss": 8.6063, "step": 145750 }, { "epoch": 0.7279083123174112, "grad_norm": 0.09230002015829086, "learning_rate": 8.183183559037773e-06, "loss": 8.6138, "step": 145760 }, { "epoch": 0.7279582511423506, "grad_norm": 0.0911339670419693, "learning_rate": 8.181681644096223e-06, "loss": 8.5995, "step": 145770 }, { "epoch": 0.7280081899672901, "grad_norm": 0.08715797960758209, "learning_rate": 8.180179729154673e-06, "loss": 8.613, "step": 145780 }, { "epoch": 0.7280581287922295, "grad_norm": 0.09273428469896317, "learning_rate": 8.178677814213123e-06, "loss": 8.6247, "step": 145790 }, { "epoch": 0.728108067617169, "grad_norm": 0.09207670390605927, "learning_rate": 8.177175899271572e-06, "loss": 8.6245, "step": 145800 }, { "epoch": 0.7281580064421084, "grad_norm": 0.09679616987705231, "learning_rate": 8.17567398433002e-06, "loss": 8.6092, "step": 145810 }, { "epoch": 0.7282079452670479, "grad_norm": 0.09426999092102051, "learning_rate": 8.17417206938847e-06, "loss": 8.6225, "step": 145820 }, { "epoch": 0.7282578840919873, "grad_norm": 0.09463541954755783, "learning_rate": 8.17267015444692e-06, "loss": 8.6008, "step": 145830 }, { "epoch": 0.7283078229169268, "grad_norm": 0.09443298727273941, "learning_rate": 8.17116823950537e-06, "loss": 8.5991, "step": 145840 }, { "epoch": 0.7283577617418662, "grad_norm": 0.09195612370967865, "learning_rate": 8.169666324563819e-06, "loss": 8.5961, "step": 145850 }, { "epoch": 0.7284077005668057, "grad_norm": 0.08928360790014267, "learning_rate": 8.168164409622268e-06, "loss": 8.634, "step": 145860 }, { "epoch": 0.7284576393917451, "grad_norm": 0.09235423803329468, "learning_rate": 8.166662494680718e-06, "loss": 8.6134, "step": 145870 }, { "epoch": 0.7285075782166845, "grad_norm": 0.09218775480985641, "learning_rate": 8.165160579739168e-06, "loss": 8.6084, "step": 145880 }, { "epoch": 0.728557517041624, "grad_norm": 0.09094615280628204, "learning_rate": 8.163658664797618e-06, "loss": 8.6155, "step": 145890 }, { "epoch": 0.7286074558665635, "grad_norm": 0.08593304455280304, "learning_rate": 8.162156749856067e-06, "loss": 8.6124, "step": 145900 }, { "epoch": 0.7286573946915029, "grad_norm": 0.09099908173084259, "learning_rate": 8.160654834914515e-06, "loss": 8.6163, "step": 145910 }, { "epoch": 0.7287073335164423, "grad_norm": 0.0902167484164238, "learning_rate": 8.159152919972965e-06, "loss": 8.6227, "step": 145920 }, { "epoch": 0.7287572723413818, "grad_norm": 0.09443819522857666, "learning_rate": 8.157651005031415e-06, "loss": 8.6165, "step": 145930 }, { "epoch": 0.7288072111663213, "grad_norm": 0.09116199612617493, "learning_rate": 8.156149090089866e-06, "loss": 8.6201, "step": 145940 }, { "epoch": 0.7288571499912607, "grad_norm": 0.09759961068630219, "learning_rate": 8.154647175148314e-06, "loss": 8.6189, "step": 145950 }, { "epoch": 0.7289070888162001, "grad_norm": 0.09084867686033249, "learning_rate": 8.153145260206764e-06, "loss": 8.5922, "step": 145960 }, { "epoch": 0.7289570276411396, "grad_norm": 0.09356144815683365, "learning_rate": 8.151643345265213e-06, "loss": 8.6069, "step": 145970 }, { "epoch": 0.7290069664660791, "grad_norm": 0.08888962864875793, "learning_rate": 8.150141430323663e-06, "loss": 8.6001, "step": 145980 }, { "epoch": 0.7290569052910185, "grad_norm": 0.08829820901155472, "learning_rate": 8.148639515382113e-06, "loss": 8.6117, "step": 145990 }, { "epoch": 0.7291068441159579, "grad_norm": 0.092521533370018, "learning_rate": 8.147137600440562e-06, "loss": 8.6116, "step": 146000 }, { "epoch": 0.7291567829408974, "grad_norm": 0.09116620570421219, "learning_rate": 8.145635685499012e-06, "loss": 8.6103, "step": 146010 }, { "epoch": 0.7292067217658369, "grad_norm": 0.09370893239974976, "learning_rate": 8.14413377055746e-06, "loss": 8.5975, "step": 146020 }, { "epoch": 0.7292566605907763, "grad_norm": 0.09606023132801056, "learning_rate": 8.14263185561591e-06, "loss": 8.6029, "step": 146030 }, { "epoch": 0.7293065994157157, "grad_norm": 0.08850355446338654, "learning_rate": 8.14112994067436e-06, "loss": 8.6077, "step": 146040 }, { "epoch": 0.7293565382406552, "grad_norm": 0.09388591349124908, "learning_rate": 8.13962802573281e-06, "loss": 8.6063, "step": 146050 }, { "epoch": 0.7294064770655947, "grad_norm": 0.09386676549911499, "learning_rate": 8.13812611079126e-06, "loss": 8.5971, "step": 146060 }, { "epoch": 0.7294564158905341, "grad_norm": 0.09026592969894409, "learning_rate": 8.136624195849708e-06, "loss": 8.5921, "step": 146070 }, { "epoch": 0.7295063547154735, "grad_norm": 0.09007930010557175, "learning_rate": 8.135122280908158e-06, "loss": 8.6017, "step": 146080 }, { "epoch": 0.729556293540413, "grad_norm": 0.0955241322517395, "learning_rate": 8.133620365966608e-06, "loss": 8.6081, "step": 146090 }, { "epoch": 0.7296062323653525, "grad_norm": 0.09348488599061966, "learning_rate": 8.132118451025057e-06, "loss": 8.5981, "step": 146100 }, { "epoch": 0.7296561711902919, "grad_norm": 0.08962753415107727, "learning_rate": 8.130616536083507e-06, "loss": 8.6063, "step": 146110 }, { "epoch": 0.7297061100152313, "grad_norm": 0.09236380457878113, "learning_rate": 8.129114621141957e-06, "loss": 8.5859, "step": 146120 }, { "epoch": 0.7297560488401708, "grad_norm": 0.08918324112892151, "learning_rate": 8.127612706200406e-06, "loss": 8.6115, "step": 146130 }, { "epoch": 0.7298059876651103, "grad_norm": 0.090632364153862, "learning_rate": 8.126110791258856e-06, "loss": 8.6016, "step": 146140 }, { "epoch": 0.7298559264900497, "grad_norm": 0.09161046892404556, "learning_rate": 8.124608876317304e-06, "loss": 8.5882, "step": 146150 }, { "epoch": 0.7299058653149891, "grad_norm": 0.09371636062860489, "learning_rate": 8.123106961375754e-06, "loss": 8.5995, "step": 146160 }, { "epoch": 0.7299558041399286, "grad_norm": 0.08753936737775803, "learning_rate": 8.121605046434205e-06, "loss": 8.5968, "step": 146170 }, { "epoch": 0.7300057429648681, "grad_norm": 0.0922481119632721, "learning_rate": 8.120103131492653e-06, "loss": 8.5951, "step": 146180 }, { "epoch": 0.7300556817898075, "grad_norm": 0.0926506519317627, "learning_rate": 8.118601216551103e-06, "loss": 8.603, "step": 146190 }, { "epoch": 0.7301056206147469, "grad_norm": 0.08759573101997375, "learning_rate": 8.117099301609552e-06, "loss": 8.6076, "step": 146200 }, { "epoch": 0.7301555594396864, "grad_norm": 0.09604066610336304, "learning_rate": 8.115597386668002e-06, "loss": 8.6029, "step": 146210 }, { "epoch": 0.7302054982646259, "grad_norm": 0.08609198778867722, "learning_rate": 8.114095471726452e-06, "loss": 8.6056, "step": 146220 }, { "epoch": 0.7302554370895653, "grad_norm": 0.08941318094730377, "learning_rate": 8.1125935567849e-06, "loss": 8.6103, "step": 146230 }, { "epoch": 0.7303053759145047, "grad_norm": 0.09113612025976181, "learning_rate": 8.11109164184335e-06, "loss": 8.6069, "step": 146240 }, { "epoch": 0.7303553147394442, "grad_norm": 0.09444758296012878, "learning_rate": 8.1095897269018e-06, "loss": 8.6201, "step": 146250 }, { "epoch": 0.7304052535643837, "grad_norm": 0.0867748036980629, "learning_rate": 8.10808781196025e-06, "loss": 8.6056, "step": 146260 }, { "epoch": 0.7304551923893231, "grad_norm": 0.09332028776407242, "learning_rate": 8.1065858970187e-06, "loss": 8.6122, "step": 146270 }, { "epoch": 0.7305051312142625, "grad_norm": 0.09203875064849854, "learning_rate": 8.10508398207715e-06, "loss": 8.6157, "step": 146280 }, { "epoch": 0.730555070039202, "grad_norm": 0.09554773569107056, "learning_rate": 8.103582067135598e-06, "loss": 8.5996, "step": 146290 }, { "epoch": 0.7306050088641415, "grad_norm": 0.09731141477823257, "learning_rate": 8.102080152194047e-06, "loss": 8.5881, "step": 146300 }, { "epoch": 0.7306549476890809, "grad_norm": 0.0938003659248352, "learning_rate": 8.100578237252497e-06, "loss": 8.6107, "step": 146310 }, { "epoch": 0.7307048865140203, "grad_norm": 0.09249870479106903, "learning_rate": 8.099076322310947e-06, "loss": 8.6074, "step": 146320 }, { "epoch": 0.7307548253389597, "grad_norm": 0.08894717693328857, "learning_rate": 8.097574407369397e-06, "loss": 8.6157, "step": 146330 }, { "epoch": 0.7308047641638993, "grad_norm": 0.08928055316209793, "learning_rate": 8.096072492427846e-06, "loss": 8.611, "step": 146340 }, { "epoch": 0.7308547029888387, "grad_norm": 0.08695624768733978, "learning_rate": 8.094570577486294e-06, "loss": 8.5939, "step": 146350 }, { "epoch": 0.7309046418137781, "grad_norm": 0.09484840929508209, "learning_rate": 8.093068662544744e-06, "loss": 8.6148, "step": 146360 }, { "epoch": 0.7309545806387175, "grad_norm": 0.09645149856805801, "learning_rate": 8.091566747603195e-06, "loss": 8.6244, "step": 146370 }, { "epoch": 0.731004519463657, "grad_norm": 0.09232309460639954, "learning_rate": 8.090064832661645e-06, "loss": 8.6222, "step": 146380 }, { "epoch": 0.7310544582885965, "grad_norm": 0.0919228121638298, "learning_rate": 8.088562917720093e-06, "loss": 8.5962, "step": 146390 }, { "epoch": 0.7311043971135359, "grad_norm": 0.09435618668794632, "learning_rate": 8.087061002778542e-06, "loss": 8.62, "step": 146400 }, { "epoch": 0.7311543359384753, "grad_norm": 0.09660220891237259, "learning_rate": 8.085559087836992e-06, "loss": 8.5962, "step": 146410 }, { "epoch": 0.7312042747634148, "grad_norm": 0.0912780910730362, "learning_rate": 8.084057172895442e-06, "loss": 8.6183, "step": 146420 }, { "epoch": 0.7312542135883543, "grad_norm": 0.09284259378910065, "learning_rate": 8.082555257953892e-06, "loss": 8.61, "step": 146430 }, { "epoch": 0.7313041524132937, "grad_norm": 0.09274186193943024, "learning_rate": 8.08105334301234e-06, "loss": 8.6186, "step": 146440 }, { "epoch": 0.7313540912382331, "grad_norm": 0.09855927526950836, "learning_rate": 8.07955142807079e-06, "loss": 8.5975, "step": 146450 }, { "epoch": 0.7314040300631726, "grad_norm": 0.0931464433670044, "learning_rate": 8.07804951312924e-06, "loss": 8.6029, "step": 146460 }, { "epoch": 0.7314539688881121, "grad_norm": 0.09151794761419296, "learning_rate": 8.07654759818769e-06, "loss": 8.596, "step": 146470 }, { "epoch": 0.7315039077130515, "grad_norm": 0.08983919769525528, "learning_rate": 8.07504568324614e-06, "loss": 8.6227, "step": 146480 }, { "epoch": 0.7315538465379909, "grad_norm": 0.09787040948867798, "learning_rate": 8.07354376830459e-06, "loss": 8.6004, "step": 146490 }, { "epoch": 0.7316037853629304, "grad_norm": 0.09325719624757767, "learning_rate": 8.072041853363037e-06, "loss": 8.612, "step": 146500 }, { "epoch": 0.7316537241878699, "grad_norm": 0.09518974274396896, "learning_rate": 8.070539938421487e-06, "loss": 8.6086, "step": 146510 }, { "epoch": 0.7317036630128093, "grad_norm": 0.09281483292579651, "learning_rate": 8.069038023479937e-06, "loss": 8.6059, "step": 146520 }, { "epoch": 0.7317536018377487, "grad_norm": 0.09161432087421417, "learning_rate": 8.067536108538387e-06, "loss": 8.6162, "step": 146530 }, { "epoch": 0.7318035406626882, "grad_norm": 0.08932630717754364, "learning_rate": 8.066034193596837e-06, "loss": 8.6122, "step": 146540 }, { "epoch": 0.7318534794876277, "grad_norm": 0.10480964183807373, "learning_rate": 8.064532278655284e-06, "loss": 8.5951, "step": 146550 }, { "epoch": 0.7319034183125671, "grad_norm": 0.09288931638002396, "learning_rate": 8.063030363713734e-06, "loss": 8.6034, "step": 146560 }, { "epoch": 0.7319533571375065, "grad_norm": 0.09532026201486588, "learning_rate": 8.061528448772185e-06, "loss": 8.6096, "step": 146570 }, { "epoch": 0.732003295962446, "grad_norm": 0.09342137724161148, "learning_rate": 8.060026533830635e-06, "loss": 8.5875, "step": 146580 }, { "epoch": 0.7320532347873855, "grad_norm": 0.091585174202919, "learning_rate": 8.058524618889085e-06, "loss": 8.61, "step": 146590 }, { "epoch": 0.7321031736123249, "grad_norm": 0.08510378003120422, "learning_rate": 8.057022703947532e-06, "loss": 8.6082, "step": 146600 }, { "epoch": 0.7321531124372643, "grad_norm": 0.0953449234366417, "learning_rate": 8.055520789005982e-06, "loss": 8.6204, "step": 146610 }, { "epoch": 0.7322030512622038, "grad_norm": 0.0904490277171135, "learning_rate": 8.054018874064432e-06, "loss": 8.6037, "step": 146620 }, { "epoch": 0.7322529900871433, "grad_norm": 0.09040288627147675, "learning_rate": 8.052516959122882e-06, "loss": 8.6055, "step": 146630 }, { "epoch": 0.7323029289120827, "grad_norm": 0.09579188376665115, "learning_rate": 8.051015044181332e-06, "loss": 8.6217, "step": 146640 }, { "epoch": 0.7323528677370221, "grad_norm": 0.09186054021120071, "learning_rate": 8.049513129239781e-06, "loss": 8.5966, "step": 146650 }, { "epoch": 0.7324028065619616, "grad_norm": 0.08724722266197205, "learning_rate": 8.04801121429823e-06, "loss": 8.6201, "step": 146660 }, { "epoch": 0.7324527453869011, "grad_norm": 0.09225723147392273, "learning_rate": 8.04650929935668e-06, "loss": 8.604, "step": 146670 }, { "epoch": 0.7325026842118405, "grad_norm": 0.0851278230547905, "learning_rate": 8.04500738441513e-06, "loss": 8.6148, "step": 146680 }, { "epoch": 0.7325526230367799, "grad_norm": 0.09377813339233398, "learning_rate": 8.04350546947358e-06, "loss": 8.6004, "step": 146690 }, { "epoch": 0.7326025618617193, "grad_norm": 0.0915536880493164, "learning_rate": 8.042003554532028e-06, "loss": 8.6146, "step": 146700 }, { "epoch": 0.7326525006866589, "grad_norm": 0.09648875147104263, "learning_rate": 8.040501639590477e-06, "loss": 8.6067, "step": 146710 }, { "epoch": 0.7327024395115983, "grad_norm": 0.09112456440925598, "learning_rate": 8.038999724648927e-06, "loss": 8.6042, "step": 146720 }, { "epoch": 0.7327523783365377, "grad_norm": 0.08740793168544769, "learning_rate": 8.037497809707377e-06, "loss": 8.6129, "step": 146730 }, { "epoch": 0.7328023171614771, "grad_norm": 0.09064393490552902, "learning_rate": 8.035995894765827e-06, "loss": 8.6168, "step": 146740 }, { "epoch": 0.7328522559864167, "grad_norm": 0.09343009442090988, "learning_rate": 8.034493979824276e-06, "loss": 8.5964, "step": 146750 }, { "epoch": 0.7329021948113561, "grad_norm": 0.10053983330726624, "learning_rate": 8.032992064882724e-06, "loss": 8.5884, "step": 146760 }, { "epoch": 0.7329521336362955, "grad_norm": 0.09632902592420578, "learning_rate": 8.031490149941175e-06, "loss": 8.6121, "step": 146770 }, { "epoch": 0.7330020724612349, "grad_norm": 0.09040207415819168, "learning_rate": 8.029988234999625e-06, "loss": 8.6016, "step": 146780 }, { "epoch": 0.7330520112861745, "grad_norm": 0.09205528348684311, "learning_rate": 8.028486320058075e-06, "loss": 8.5859, "step": 146790 }, { "epoch": 0.7331019501111139, "grad_norm": 0.09228353202342987, "learning_rate": 8.026984405116523e-06, "loss": 8.6206, "step": 146800 }, { "epoch": 0.7331518889360533, "grad_norm": 0.08614012598991394, "learning_rate": 8.025482490174974e-06, "loss": 8.6226, "step": 146810 }, { "epoch": 0.7332018277609927, "grad_norm": 0.0924997478723526, "learning_rate": 8.023980575233422e-06, "loss": 8.5959, "step": 146820 }, { "epoch": 0.7332517665859323, "grad_norm": 0.09448036551475525, "learning_rate": 8.022478660291872e-06, "loss": 8.6089, "step": 146830 }, { "epoch": 0.7333017054108717, "grad_norm": 0.09214811772108078, "learning_rate": 8.020976745350322e-06, "loss": 8.5967, "step": 146840 }, { "epoch": 0.7333516442358111, "grad_norm": 0.10002180933952332, "learning_rate": 8.019474830408771e-06, "loss": 8.5945, "step": 146850 }, { "epoch": 0.7334015830607505, "grad_norm": 0.08861683309078217, "learning_rate": 8.017972915467221e-06, "loss": 8.5851, "step": 146860 }, { "epoch": 0.7334515218856901, "grad_norm": 0.09217624366283417, "learning_rate": 8.01647100052567e-06, "loss": 8.6059, "step": 146870 }, { "epoch": 0.7335014607106295, "grad_norm": 0.0931958556175232, "learning_rate": 8.01496908558412e-06, "loss": 8.6036, "step": 146880 }, { "epoch": 0.7335513995355689, "grad_norm": 0.09389421343803406, "learning_rate": 8.01346717064257e-06, "loss": 8.6066, "step": 146890 }, { "epoch": 0.7336013383605083, "grad_norm": 0.08797122538089752, "learning_rate": 8.011965255701018e-06, "loss": 8.6272, "step": 146900 }, { "epoch": 0.7336512771854479, "grad_norm": 0.09919506311416626, "learning_rate": 8.010463340759469e-06, "loss": 8.5877, "step": 146910 }, { "epoch": 0.7337012160103873, "grad_norm": 0.09523117542266846, "learning_rate": 8.008961425817917e-06, "loss": 8.5809, "step": 146920 }, { "epoch": 0.7337511548353267, "grad_norm": 0.0903659462928772, "learning_rate": 8.007459510876367e-06, "loss": 8.6084, "step": 146930 }, { "epoch": 0.7338010936602661, "grad_norm": 0.09395240247249603, "learning_rate": 8.005957595934817e-06, "loss": 8.59, "step": 146940 }, { "epoch": 0.7338510324852057, "grad_norm": 0.0915970653295517, "learning_rate": 8.004455680993266e-06, "loss": 8.6173, "step": 146950 }, { "epoch": 0.7339009713101451, "grad_norm": 0.10002275556325912, "learning_rate": 8.002953766051716e-06, "loss": 8.5978, "step": 146960 }, { "epoch": 0.7339509101350845, "grad_norm": 0.10154590010643005, "learning_rate": 8.001451851110166e-06, "loss": 8.5999, "step": 146970 }, { "epoch": 0.7340008489600239, "grad_norm": 0.09146125614643097, "learning_rate": 7.999949936168615e-06, "loss": 8.6126, "step": 146980 }, { "epoch": 0.7340507877849635, "grad_norm": 0.09198717772960663, "learning_rate": 7.998448021227065e-06, "loss": 8.6202, "step": 146990 }, { "epoch": 0.7341007266099029, "grad_norm": 0.09776240587234497, "learning_rate": 7.996946106285513e-06, "loss": 8.6133, "step": 147000 }, { "epoch": 0.7341506654348423, "grad_norm": 0.09213253110647202, "learning_rate": 7.995444191343964e-06, "loss": 8.6106, "step": 147010 }, { "epoch": 0.7342006042597817, "grad_norm": 0.09108315408229828, "learning_rate": 7.993942276402414e-06, "loss": 8.6227, "step": 147020 }, { "epoch": 0.7342505430847213, "grad_norm": 0.08943723142147064, "learning_rate": 7.992440361460862e-06, "loss": 8.6152, "step": 147030 }, { "epoch": 0.7343004819096607, "grad_norm": 0.08865174651145935, "learning_rate": 7.990938446519312e-06, "loss": 8.591, "step": 147040 }, { "epoch": 0.7343504207346001, "grad_norm": 0.0994340255856514, "learning_rate": 7.989436531577763e-06, "loss": 8.6018, "step": 147050 }, { "epoch": 0.7344003595595395, "grad_norm": 0.09007614850997925, "learning_rate": 7.987934616636211e-06, "loss": 8.5896, "step": 147060 }, { "epoch": 0.7344502983844791, "grad_norm": 0.08295390009880066, "learning_rate": 7.986432701694661e-06, "loss": 8.6055, "step": 147070 }, { "epoch": 0.7345002372094185, "grad_norm": 0.09356316924095154, "learning_rate": 7.98493078675311e-06, "loss": 8.6058, "step": 147080 }, { "epoch": 0.7345501760343579, "grad_norm": 0.09198088943958282, "learning_rate": 7.98342887181156e-06, "loss": 8.6089, "step": 147090 }, { "epoch": 0.7346001148592973, "grad_norm": 0.09037932008504868, "learning_rate": 7.98192695687001e-06, "loss": 8.6066, "step": 147100 }, { "epoch": 0.7346500536842369, "grad_norm": 0.0916791781783104, "learning_rate": 7.980425041928459e-06, "loss": 8.6022, "step": 147110 }, { "epoch": 0.7346999925091763, "grad_norm": 0.09494098275899887, "learning_rate": 7.978923126986909e-06, "loss": 8.5989, "step": 147120 }, { "epoch": 0.7347499313341157, "grad_norm": 0.0917518213391304, "learning_rate": 7.977421212045359e-06, "loss": 8.5957, "step": 147130 }, { "epoch": 0.7347998701590551, "grad_norm": 0.08974310755729675, "learning_rate": 7.975919297103807e-06, "loss": 8.6081, "step": 147140 }, { "epoch": 0.7348498089839947, "grad_norm": 0.09109298884868622, "learning_rate": 7.974417382162258e-06, "loss": 8.5945, "step": 147150 }, { "epoch": 0.7348997478089341, "grad_norm": 0.08790141344070435, "learning_rate": 7.972915467220706e-06, "loss": 8.6105, "step": 147160 }, { "epoch": 0.7349496866338735, "grad_norm": 0.09072345495223999, "learning_rate": 7.971413552279156e-06, "loss": 8.6066, "step": 147170 }, { "epoch": 0.7349996254588129, "grad_norm": 0.08956841379404068, "learning_rate": 7.969911637337606e-06, "loss": 8.6191, "step": 147180 }, { "epoch": 0.7350495642837525, "grad_norm": 0.09307093918323517, "learning_rate": 7.968409722396055e-06, "loss": 8.6011, "step": 147190 }, { "epoch": 0.7350995031086919, "grad_norm": 0.09060484170913696, "learning_rate": 7.966907807454505e-06, "loss": 8.6013, "step": 147200 }, { "epoch": 0.7351494419336313, "grad_norm": 0.08914856612682343, "learning_rate": 7.965405892512954e-06, "loss": 8.6046, "step": 147210 }, { "epoch": 0.7351993807585707, "grad_norm": 0.0949515700340271, "learning_rate": 7.963903977571404e-06, "loss": 8.6229, "step": 147220 }, { "epoch": 0.7352493195835103, "grad_norm": 0.09112024307250977, "learning_rate": 7.962402062629854e-06, "loss": 8.6261, "step": 147230 }, { "epoch": 0.7352992584084497, "grad_norm": 0.0883699506521225, "learning_rate": 7.960900147688302e-06, "loss": 8.612, "step": 147240 }, { "epoch": 0.7353491972333891, "grad_norm": 0.094344362616539, "learning_rate": 7.959398232746753e-06, "loss": 8.61, "step": 147250 }, { "epoch": 0.7353991360583285, "grad_norm": 0.08884866535663605, "learning_rate": 7.957896317805201e-06, "loss": 8.6119, "step": 147260 }, { "epoch": 0.735449074883268, "grad_norm": 0.09071295708417892, "learning_rate": 7.956394402863651e-06, "loss": 8.6011, "step": 147270 }, { "epoch": 0.7354990137082075, "grad_norm": 0.09404217451810837, "learning_rate": 7.954892487922101e-06, "loss": 8.5964, "step": 147280 }, { "epoch": 0.7355489525331469, "grad_norm": 0.0958835780620575, "learning_rate": 7.953390572980552e-06, "loss": 8.5959, "step": 147290 }, { "epoch": 0.7355988913580863, "grad_norm": 0.09104613214731216, "learning_rate": 7.951888658039e-06, "loss": 8.5849, "step": 147300 }, { "epoch": 0.7356488301830258, "grad_norm": 0.0909975990653038, "learning_rate": 7.950386743097449e-06, "loss": 8.6005, "step": 147310 }, { "epoch": 0.7356987690079653, "grad_norm": 0.09111376851797104, "learning_rate": 7.948884828155899e-06, "loss": 8.6123, "step": 147320 }, { "epoch": 0.7357487078329047, "grad_norm": 0.08861295878887177, "learning_rate": 7.947382913214349e-06, "loss": 8.5943, "step": 147330 }, { "epoch": 0.7357986466578441, "grad_norm": 0.09001336991786957, "learning_rate": 7.9458809982728e-06, "loss": 8.5862, "step": 147340 }, { "epoch": 0.7358485854827835, "grad_norm": 0.0889565646648407, "learning_rate": 7.944379083331248e-06, "loss": 8.5973, "step": 147350 }, { "epoch": 0.7358985243077231, "grad_norm": 0.09270651638507843, "learning_rate": 7.942877168389696e-06, "loss": 8.5932, "step": 147360 }, { "epoch": 0.7359484631326625, "grad_norm": 0.09283141791820526, "learning_rate": 7.941375253448146e-06, "loss": 8.595, "step": 147370 }, { "epoch": 0.7359984019576019, "grad_norm": 0.09074781090021133, "learning_rate": 7.939873338506596e-06, "loss": 8.6006, "step": 147380 }, { "epoch": 0.7360483407825413, "grad_norm": 0.09841238707304001, "learning_rate": 7.938371423565047e-06, "loss": 8.6104, "step": 147390 }, { "epoch": 0.7360982796074809, "grad_norm": 0.09481830149888992, "learning_rate": 7.936869508623495e-06, "loss": 8.6014, "step": 147400 }, { "epoch": 0.7361482184324203, "grad_norm": 0.08906048536300659, "learning_rate": 7.935367593681944e-06, "loss": 8.6178, "step": 147410 }, { "epoch": 0.7361981572573597, "grad_norm": 0.08914127200841904, "learning_rate": 7.933865678740394e-06, "loss": 8.6059, "step": 147420 }, { "epoch": 0.7362480960822991, "grad_norm": 0.09480444341897964, "learning_rate": 7.932363763798844e-06, "loss": 8.5944, "step": 147430 }, { "epoch": 0.7362980349072387, "grad_norm": 0.09161514043807983, "learning_rate": 7.930861848857294e-06, "loss": 8.6164, "step": 147440 }, { "epoch": 0.7363479737321781, "grad_norm": 0.09558946639299393, "learning_rate": 7.929359933915744e-06, "loss": 8.5894, "step": 147450 }, { "epoch": 0.7363979125571175, "grad_norm": 0.09459544718265533, "learning_rate": 7.927858018974191e-06, "loss": 8.598, "step": 147460 }, { "epoch": 0.7364478513820569, "grad_norm": 0.09031836688518524, "learning_rate": 7.926356104032641e-06, "loss": 8.6046, "step": 147470 }, { "epoch": 0.7364977902069965, "grad_norm": 0.0889408141374588, "learning_rate": 7.924854189091091e-06, "loss": 8.6006, "step": 147480 }, { "epoch": 0.7365477290319359, "grad_norm": 0.09583480656147003, "learning_rate": 7.923352274149542e-06, "loss": 8.6201, "step": 147490 }, { "epoch": 0.7365976678568753, "grad_norm": 0.09124153107404709, "learning_rate": 7.921850359207992e-06, "loss": 8.606, "step": 147500 }, { "epoch": 0.7366476066818147, "grad_norm": 0.09104292094707489, "learning_rate": 7.920348444266439e-06, "loss": 8.5772, "step": 147510 }, { "epoch": 0.7366975455067543, "grad_norm": 0.09210095554590225, "learning_rate": 7.918846529324889e-06, "loss": 8.6044, "step": 147520 }, { "epoch": 0.7367474843316937, "grad_norm": 0.08999733626842499, "learning_rate": 7.917344614383339e-06, "loss": 8.6077, "step": 147530 }, { "epoch": 0.7367974231566331, "grad_norm": 0.09261172264814377, "learning_rate": 7.91584269944179e-06, "loss": 8.5944, "step": 147540 }, { "epoch": 0.7368473619815725, "grad_norm": 0.0916711613535881, "learning_rate": 7.91434078450024e-06, "loss": 8.5811, "step": 147550 }, { "epoch": 0.7368973008065121, "grad_norm": 0.09489434212446213, "learning_rate": 7.912838869558686e-06, "loss": 8.6007, "step": 147560 }, { "epoch": 0.7369472396314515, "grad_norm": 0.09215337038040161, "learning_rate": 7.911336954617136e-06, "loss": 8.6051, "step": 147570 }, { "epoch": 0.7369971784563909, "grad_norm": 0.09621989727020264, "learning_rate": 7.909835039675586e-06, "loss": 8.6042, "step": 147580 }, { "epoch": 0.7370471172813303, "grad_norm": 0.09417767822742462, "learning_rate": 7.908333124734037e-06, "loss": 8.5984, "step": 147590 }, { "epoch": 0.7370970561062699, "grad_norm": 0.09243947267532349, "learning_rate": 7.906831209792487e-06, "loss": 8.6154, "step": 147600 }, { "epoch": 0.7371469949312093, "grad_norm": 0.09667365252971649, "learning_rate": 7.905329294850935e-06, "loss": 8.6127, "step": 147610 }, { "epoch": 0.7371969337561487, "grad_norm": 0.09225791692733765, "learning_rate": 7.903827379909384e-06, "loss": 8.5939, "step": 147620 }, { "epoch": 0.7372468725810881, "grad_norm": 0.09003566205501556, "learning_rate": 7.902325464967834e-06, "loss": 8.5911, "step": 147630 }, { "epoch": 0.7372968114060277, "grad_norm": 0.09092370420694351, "learning_rate": 7.900823550026284e-06, "loss": 8.5904, "step": 147640 }, { "epoch": 0.7373467502309671, "grad_norm": 0.09510780870914459, "learning_rate": 7.899321635084734e-06, "loss": 8.6026, "step": 147650 }, { "epoch": 0.7373966890559065, "grad_norm": 0.09396243095397949, "learning_rate": 7.897819720143183e-06, "loss": 8.6125, "step": 147660 }, { "epoch": 0.7374466278808459, "grad_norm": 0.09083011746406555, "learning_rate": 7.896317805201631e-06, "loss": 8.5926, "step": 147670 }, { "epoch": 0.7374965667057855, "grad_norm": 0.10038017481565475, "learning_rate": 7.894815890260082e-06, "loss": 8.6012, "step": 147680 }, { "epoch": 0.7375465055307249, "grad_norm": 0.0896848812699318, "learning_rate": 7.893313975318532e-06, "loss": 8.5961, "step": 147690 }, { "epoch": 0.7375964443556643, "grad_norm": 0.09542018920183182, "learning_rate": 7.891812060376982e-06, "loss": 8.6294, "step": 147700 }, { "epoch": 0.7376463831806037, "grad_norm": 0.0885300263762474, "learning_rate": 7.89031014543543e-06, "loss": 8.607, "step": 147710 }, { "epoch": 0.7376963220055432, "grad_norm": 0.0911983922123909, "learning_rate": 7.888808230493879e-06, "loss": 8.5836, "step": 147720 }, { "epoch": 0.7377462608304827, "grad_norm": 0.08745992183685303, "learning_rate": 7.887306315552329e-06, "loss": 8.5998, "step": 147730 }, { "epoch": 0.7377961996554221, "grad_norm": 0.09371349215507507, "learning_rate": 7.88580440061078e-06, "loss": 8.5988, "step": 147740 }, { "epoch": 0.7378461384803615, "grad_norm": 0.09324972331523895, "learning_rate": 7.88430248566923e-06, "loss": 8.5987, "step": 147750 }, { "epoch": 0.737896077305301, "grad_norm": 0.09328649193048477, "learning_rate": 7.882800570727678e-06, "loss": 8.6026, "step": 147760 }, { "epoch": 0.7379460161302405, "grad_norm": 0.0888761654496193, "learning_rate": 7.881298655786128e-06, "loss": 8.6012, "step": 147770 }, { "epoch": 0.7379959549551799, "grad_norm": 0.09246739745140076, "learning_rate": 7.879796740844577e-06, "loss": 8.6072, "step": 147780 }, { "epoch": 0.7380458937801193, "grad_norm": 0.09637174010276794, "learning_rate": 7.878294825903027e-06, "loss": 8.6062, "step": 147790 }, { "epoch": 0.7380958326050588, "grad_norm": 0.08997974544763565, "learning_rate": 7.876792910961477e-06, "loss": 8.5919, "step": 147800 }, { "epoch": 0.7381457714299983, "grad_norm": 0.08694270998239517, "learning_rate": 7.875290996019925e-06, "loss": 8.5971, "step": 147810 }, { "epoch": 0.7381957102549377, "grad_norm": 0.09957410395145416, "learning_rate": 7.873789081078376e-06, "loss": 8.5931, "step": 147820 }, { "epoch": 0.7382456490798771, "grad_norm": 0.09661848098039627, "learning_rate": 7.872287166136824e-06, "loss": 8.6017, "step": 147830 }, { "epoch": 0.7382955879048166, "grad_norm": 0.09177085012197495, "learning_rate": 7.870785251195274e-06, "loss": 8.6021, "step": 147840 }, { "epoch": 0.7383455267297561, "grad_norm": 0.0871456116437912, "learning_rate": 7.869283336253724e-06, "loss": 8.5894, "step": 147850 }, { "epoch": 0.7383954655546955, "grad_norm": 0.09130498766899109, "learning_rate": 7.867781421312173e-06, "loss": 8.5875, "step": 147860 }, { "epoch": 0.7384454043796349, "grad_norm": 0.09617603570222855, "learning_rate": 7.866279506370623e-06, "loss": 8.6145, "step": 147870 }, { "epoch": 0.7384953432045744, "grad_norm": 0.08777206391096115, "learning_rate": 7.864777591429072e-06, "loss": 8.6098, "step": 147880 }, { "epoch": 0.7385452820295139, "grad_norm": 0.08969053626060486, "learning_rate": 7.863275676487522e-06, "loss": 8.5935, "step": 147890 }, { "epoch": 0.7385952208544533, "grad_norm": 0.091539166867733, "learning_rate": 7.861773761545972e-06, "loss": 8.5958, "step": 147900 }, { "epoch": 0.7386451596793927, "grad_norm": 0.0880018100142479, "learning_rate": 7.86027184660442e-06, "loss": 8.6105, "step": 147910 }, { "epoch": 0.7386950985043322, "grad_norm": 0.09044984728097916, "learning_rate": 7.85876993166287e-06, "loss": 8.6065, "step": 147920 }, { "epoch": 0.7387450373292717, "grad_norm": 0.0911531150341034, "learning_rate": 7.85726801672132e-06, "loss": 8.6034, "step": 147930 }, { "epoch": 0.7387949761542111, "grad_norm": 0.09581080824136734, "learning_rate": 7.85576610177977e-06, "loss": 8.5987, "step": 147940 }, { "epoch": 0.7388449149791505, "grad_norm": 0.08814167231321335, "learning_rate": 7.85426418683822e-06, "loss": 8.604, "step": 147950 }, { "epoch": 0.73889485380409, "grad_norm": 0.09530513733625412, "learning_rate": 7.852762271896668e-06, "loss": 8.5862, "step": 147960 }, { "epoch": 0.7389447926290295, "grad_norm": 0.08761637657880783, "learning_rate": 7.851260356955118e-06, "loss": 8.6251, "step": 147970 }, { "epoch": 0.7389947314539689, "grad_norm": 0.09182920306921005, "learning_rate": 7.849758442013568e-06, "loss": 8.6002, "step": 147980 }, { "epoch": 0.7390446702789083, "grad_norm": 0.0961126983165741, "learning_rate": 7.848256527072017e-06, "loss": 8.6096, "step": 147990 }, { "epoch": 0.7390946091038478, "grad_norm": 0.09371432662010193, "learning_rate": 7.846754612130467e-06, "loss": 8.616, "step": 148000 }, { "epoch": 0.7391445479287873, "grad_norm": 0.08760569244623184, "learning_rate": 7.845252697188915e-06, "loss": 8.6174, "step": 148010 }, { "epoch": 0.7391944867537267, "grad_norm": 0.09141170978546143, "learning_rate": 7.843750782247366e-06, "loss": 8.6018, "step": 148020 }, { "epoch": 0.7392444255786661, "grad_norm": 0.09060084819793701, "learning_rate": 7.842248867305816e-06, "loss": 8.5826, "step": 148030 }, { "epoch": 0.7392943644036056, "grad_norm": 0.09274831414222717, "learning_rate": 7.840746952364264e-06, "loss": 8.6069, "step": 148040 }, { "epoch": 0.739344303228545, "grad_norm": 0.09324593096971512, "learning_rate": 7.839245037422714e-06, "loss": 8.6036, "step": 148050 }, { "epoch": 0.7393942420534845, "grad_norm": 0.09713485836982727, "learning_rate": 7.837743122481163e-06, "loss": 8.6138, "step": 148060 }, { "epoch": 0.7394441808784239, "grad_norm": 0.08883433789014816, "learning_rate": 7.836241207539613e-06, "loss": 8.6116, "step": 148070 }, { "epoch": 0.7394941197033634, "grad_norm": 0.09417309612035751, "learning_rate": 7.834739292598063e-06, "loss": 8.5914, "step": 148080 }, { "epoch": 0.7395440585283029, "grad_norm": 0.09347934275865555, "learning_rate": 7.833237377656513e-06, "loss": 8.6136, "step": 148090 }, { "epoch": 0.7395939973532423, "grad_norm": 0.09639857709407806, "learning_rate": 7.831735462714962e-06, "loss": 8.5874, "step": 148100 }, { "epoch": 0.7396439361781817, "grad_norm": 0.09195045381784439, "learning_rate": 7.83023354777341e-06, "loss": 8.5943, "step": 148110 }, { "epoch": 0.7396938750031212, "grad_norm": 0.09634042531251907, "learning_rate": 7.82873163283186e-06, "loss": 8.6032, "step": 148120 }, { "epoch": 0.7397438138280606, "grad_norm": 0.08561528474092484, "learning_rate": 7.82722971789031e-06, "loss": 8.5971, "step": 148130 }, { "epoch": 0.7397937526530001, "grad_norm": 0.08745627850294113, "learning_rate": 7.825727802948761e-06, "loss": 8.5908, "step": 148140 }, { "epoch": 0.7398436914779395, "grad_norm": 0.09139811247587204, "learning_rate": 7.82422588800721e-06, "loss": 8.5864, "step": 148150 }, { "epoch": 0.739893630302879, "grad_norm": 0.09640243649482727, "learning_rate": 7.822723973065658e-06, "loss": 8.599, "step": 148160 }, { "epoch": 0.7399435691278184, "grad_norm": 0.09190690517425537, "learning_rate": 7.821222058124108e-06, "loss": 8.6074, "step": 148170 }, { "epoch": 0.7399935079527579, "grad_norm": 0.08793210238218307, "learning_rate": 7.819720143182558e-06, "loss": 8.602, "step": 148180 }, { "epoch": 0.7400434467776973, "grad_norm": 0.09305689483880997, "learning_rate": 7.818218228241008e-06, "loss": 8.5939, "step": 148190 }, { "epoch": 0.7400933856026368, "grad_norm": 0.09034860879182816, "learning_rate": 7.816716313299457e-06, "loss": 8.6036, "step": 148200 }, { "epoch": 0.7401433244275762, "grad_norm": 0.09198985993862152, "learning_rate": 7.815214398357905e-06, "loss": 8.5912, "step": 148210 }, { "epoch": 0.7401932632525157, "grad_norm": 0.0869131088256836, "learning_rate": 7.813712483416356e-06, "loss": 8.6085, "step": 148220 }, { "epoch": 0.7402432020774551, "grad_norm": 0.09751661121845245, "learning_rate": 7.812210568474806e-06, "loss": 8.6094, "step": 148230 }, { "epoch": 0.7402931409023946, "grad_norm": 0.09016689658164978, "learning_rate": 7.810708653533256e-06, "loss": 8.6164, "step": 148240 }, { "epoch": 0.740343079727334, "grad_norm": 0.09467813372612, "learning_rate": 7.809206738591706e-06, "loss": 8.616, "step": 148250 }, { "epoch": 0.7403930185522735, "grad_norm": 0.09038371592760086, "learning_rate": 7.807704823650153e-06, "loss": 8.6001, "step": 148260 }, { "epoch": 0.7404429573772129, "grad_norm": 0.09057788550853729, "learning_rate": 7.806202908708603e-06, "loss": 8.6125, "step": 148270 }, { "epoch": 0.7404928962021524, "grad_norm": 0.09068594127893448, "learning_rate": 7.804700993767053e-06, "loss": 8.5882, "step": 148280 }, { "epoch": 0.7405428350270918, "grad_norm": 0.09720471501350403, "learning_rate": 7.803199078825503e-06, "loss": 8.6165, "step": 148290 }, { "epoch": 0.7405927738520313, "grad_norm": 0.09376747906208038, "learning_rate": 7.801697163883954e-06, "loss": 8.5862, "step": 148300 }, { "epoch": 0.7406427126769707, "grad_norm": 0.09301353991031647, "learning_rate": 7.8001952489424e-06, "loss": 8.5996, "step": 148310 }, { "epoch": 0.7406926515019102, "grad_norm": 0.09005246311426163, "learning_rate": 7.79869333400085e-06, "loss": 8.6072, "step": 148320 }, { "epoch": 0.7407425903268496, "grad_norm": 0.09260305762290955, "learning_rate": 7.7971914190593e-06, "loss": 8.59, "step": 148330 }, { "epoch": 0.7407925291517891, "grad_norm": 0.09277404844760895, "learning_rate": 7.795689504117751e-06, "loss": 8.618, "step": 148340 }, { "epoch": 0.7408424679767285, "grad_norm": 0.09078875184059143, "learning_rate": 7.794187589176201e-06, "loss": 8.612, "step": 148350 }, { "epoch": 0.7408924068016679, "grad_norm": 0.0957048162817955, "learning_rate": 7.792685674234648e-06, "loss": 8.5944, "step": 148360 }, { "epoch": 0.7409423456266074, "grad_norm": 0.08615356683731079, "learning_rate": 7.791183759293098e-06, "loss": 8.6149, "step": 148370 }, { "epoch": 0.7409922844515469, "grad_norm": 0.0927518680691719, "learning_rate": 7.789681844351548e-06, "loss": 8.5918, "step": 148380 }, { "epoch": 0.7410422232764863, "grad_norm": 0.09268731623888016, "learning_rate": 7.788179929409998e-06, "loss": 8.5982, "step": 148390 }, { "epoch": 0.7410921621014257, "grad_norm": 0.09085573256015778, "learning_rate": 7.786678014468449e-06, "loss": 8.6049, "step": 148400 }, { "epoch": 0.7411421009263652, "grad_norm": 0.09364666044712067, "learning_rate": 7.785176099526897e-06, "loss": 8.5953, "step": 148410 }, { "epoch": 0.7411920397513047, "grad_norm": 0.09044606983661652, "learning_rate": 7.783674184585346e-06, "loss": 8.5928, "step": 148420 }, { "epoch": 0.7412419785762441, "grad_norm": 0.08722547441720963, "learning_rate": 7.782172269643796e-06, "loss": 8.5983, "step": 148430 }, { "epoch": 0.7412919174011835, "grad_norm": 0.0928499698638916, "learning_rate": 7.780670354702246e-06, "loss": 8.615, "step": 148440 }, { "epoch": 0.741341856226123, "grad_norm": 0.09413442015647888, "learning_rate": 7.779168439760696e-06, "loss": 8.6119, "step": 148450 }, { "epoch": 0.7413917950510625, "grad_norm": 0.09118451923131943, "learning_rate": 7.777666524819145e-06, "loss": 8.6034, "step": 148460 }, { "epoch": 0.7414417338760019, "grad_norm": 0.08917475491762161, "learning_rate": 7.776164609877593e-06, "loss": 8.5968, "step": 148470 }, { "epoch": 0.7414916727009413, "grad_norm": 0.09661485254764557, "learning_rate": 7.774662694936043e-06, "loss": 8.589, "step": 148480 }, { "epoch": 0.7415416115258808, "grad_norm": 0.0894039124250412, "learning_rate": 7.773160779994493e-06, "loss": 8.5998, "step": 148490 }, { "epoch": 0.7415915503508202, "grad_norm": 0.09486133605241776, "learning_rate": 7.771658865052944e-06, "loss": 8.6045, "step": 148500 }, { "epoch": 0.7416414891757597, "grad_norm": 0.09382303059101105, "learning_rate": 7.770156950111392e-06, "loss": 8.5874, "step": 148510 }, { "epoch": 0.7416914280006991, "grad_norm": 0.0903899222612381, "learning_rate": 7.76865503516984e-06, "loss": 8.5879, "step": 148520 }, { "epoch": 0.7417413668256386, "grad_norm": 0.08889366686344147, "learning_rate": 7.76715312022829e-06, "loss": 8.5956, "step": 148530 }, { "epoch": 0.741791305650578, "grad_norm": 0.09097573906183243, "learning_rate": 7.765651205286741e-06, "loss": 8.5917, "step": 148540 }, { "epoch": 0.7418412444755175, "grad_norm": 0.09453503787517548, "learning_rate": 7.764149290345191e-06, "loss": 8.5922, "step": 148550 }, { "epoch": 0.7418911833004569, "grad_norm": 0.09525911509990692, "learning_rate": 7.76264737540364e-06, "loss": 8.6012, "step": 148560 }, { "epoch": 0.7419411221253964, "grad_norm": 0.08901818841695786, "learning_rate": 7.76114546046209e-06, "loss": 8.6012, "step": 148570 }, { "epoch": 0.7419910609503358, "grad_norm": 0.09080463647842407, "learning_rate": 7.759643545520538e-06, "loss": 8.6011, "step": 148580 }, { "epoch": 0.7420409997752753, "grad_norm": 0.08285506069660187, "learning_rate": 7.758141630578988e-06, "loss": 8.5975, "step": 148590 }, { "epoch": 0.7420909386002147, "grad_norm": 0.09270590543746948, "learning_rate": 7.756639715637439e-06, "loss": 8.6014, "step": 148600 }, { "epoch": 0.7421408774251542, "grad_norm": 0.09853670746088028, "learning_rate": 7.755137800695887e-06, "loss": 8.5918, "step": 148610 }, { "epoch": 0.7421908162500936, "grad_norm": 0.09975389391183853, "learning_rate": 7.753635885754337e-06, "loss": 8.601, "step": 148620 }, { "epoch": 0.7422407550750331, "grad_norm": 0.09311430156230927, "learning_rate": 7.752133970812786e-06, "loss": 8.5938, "step": 148630 }, { "epoch": 0.7422906938999725, "grad_norm": 0.09312795102596283, "learning_rate": 7.750632055871236e-06, "loss": 8.5892, "step": 148640 }, { "epoch": 0.742340632724912, "grad_norm": 0.09401096403598785, "learning_rate": 7.749130140929686e-06, "loss": 8.6029, "step": 148650 }, { "epoch": 0.7423905715498514, "grad_norm": 0.09024547040462494, "learning_rate": 7.747628225988135e-06, "loss": 8.5963, "step": 148660 }, { "epoch": 0.7424405103747909, "grad_norm": 0.09483632445335388, "learning_rate": 7.746126311046585e-06, "loss": 8.611, "step": 148670 }, { "epoch": 0.7424904491997303, "grad_norm": 0.08980057388544083, "learning_rate": 7.744624396105033e-06, "loss": 8.5968, "step": 148680 }, { "epoch": 0.7425403880246698, "grad_norm": 0.09561526775360107, "learning_rate": 7.743122481163483e-06, "loss": 8.6023, "step": 148690 }, { "epoch": 0.7425903268496092, "grad_norm": 0.09579294919967651, "learning_rate": 7.741620566221934e-06, "loss": 8.5962, "step": 148700 }, { "epoch": 0.7426402656745487, "grad_norm": 0.09053392708301544, "learning_rate": 7.740118651280382e-06, "loss": 8.5915, "step": 148710 }, { "epoch": 0.7426902044994881, "grad_norm": 0.08874721825122833, "learning_rate": 7.738616736338832e-06, "loss": 8.6017, "step": 148720 }, { "epoch": 0.7427401433244276, "grad_norm": 0.09630593657493591, "learning_rate": 7.737114821397282e-06, "loss": 8.6011, "step": 148730 }, { "epoch": 0.742790082149367, "grad_norm": 0.0932571217417717, "learning_rate": 7.735612906455731e-06, "loss": 8.6088, "step": 148740 }, { "epoch": 0.7428400209743065, "grad_norm": 0.09587745368480682, "learning_rate": 7.734110991514181e-06, "loss": 8.6054, "step": 148750 }, { "epoch": 0.7428899597992459, "grad_norm": 0.08768852055072784, "learning_rate": 7.73260907657263e-06, "loss": 8.5966, "step": 148760 }, { "epoch": 0.7429398986241854, "grad_norm": 0.08625376224517822, "learning_rate": 7.73110716163108e-06, "loss": 8.5867, "step": 148770 }, { "epoch": 0.7429898374491248, "grad_norm": 0.09649079293012619, "learning_rate": 7.72960524668953e-06, "loss": 8.6001, "step": 148780 }, { "epoch": 0.7430397762740643, "grad_norm": 0.0902811661362648, "learning_rate": 7.728103331747978e-06, "loss": 8.5862, "step": 148790 }, { "epoch": 0.7430897150990037, "grad_norm": 0.08683760464191437, "learning_rate": 7.726601416806429e-06, "loss": 8.5762, "step": 148800 }, { "epoch": 0.7431396539239432, "grad_norm": 0.09725631028413773, "learning_rate": 7.725099501864877e-06, "loss": 8.6059, "step": 148810 }, { "epoch": 0.7431895927488826, "grad_norm": 0.09072460979223251, "learning_rate": 7.723597586923327e-06, "loss": 8.5995, "step": 148820 }, { "epoch": 0.743239531573822, "grad_norm": 0.0900563895702362, "learning_rate": 7.722095671981777e-06, "loss": 8.6039, "step": 148830 }, { "epoch": 0.7432894703987615, "grad_norm": 0.08766742050647736, "learning_rate": 7.720593757040226e-06, "loss": 8.5947, "step": 148840 }, { "epoch": 0.743339409223701, "grad_norm": 0.09723313897848129, "learning_rate": 7.719091842098676e-06, "loss": 8.6027, "step": 148850 }, { "epoch": 0.7433893480486404, "grad_norm": 0.09286182373762131, "learning_rate": 7.717589927157125e-06, "loss": 8.5978, "step": 148860 }, { "epoch": 0.7434392868735799, "grad_norm": 0.09512756764888763, "learning_rate": 7.716088012215575e-06, "loss": 8.5881, "step": 148870 }, { "epoch": 0.7434892256985193, "grad_norm": 0.09215110540390015, "learning_rate": 7.714586097274025e-06, "loss": 8.5987, "step": 148880 }, { "epoch": 0.7435391645234588, "grad_norm": 0.09654014557600021, "learning_rate": 7.713084182332475e-06, "loss": 8.5833, "step": 148890 }, { "epoch": 0.7435891033483982, "grad_norm": 0.09393315017223358, "learning_rate": 7.711582267390924e-06, "loss": 8.6084, "step": 148900 }, { "epoch": 0.7436390421733376, "grad_norm": 0.09271156042814255, "learning_rate": 7.710080352449372e-06, "loss": 8.5837, "step": 148910 }, { "epoch": 0.7436889809982771, "grad_norm": 0.08688397705554962, "learning_rate": 7.708578437507822e-06, "loss": 8.5953, "step": 148920 }, { "epoch": 0.7437389198232166, "grad_norm": 0.08975444734096527, "learning_rate": 7.707076522566272e-06, "loss": 8.585, "step": 148930 }, { "epoch": 0.743788858648156, "grad_norm": 0.09515532851219177, "learning_rate": 7.705574607624723e-06, "loss": 8.5907, "step": 148940 }, { "epoch": 0.7438387974730954, "grad_norm": 0.08886223286390305, "learning_rate": 7.704072692683171e-06, "loss": 8.5786, "step": 148950 }, { "epoch": 0.7438887362980349, "grad_norm": 0.09450791031122208, "learning_rate": 7.70257077774162e-06, "loss": 8.5943, "step": 148960 }, { "epoch": 0.7439386751229744, "grad_norm": 0.08783065527677536, "learning_rate": 7.70106886280007e-06, "loss": 8.6139, "step": 148970 }, { "epoch": 0.7439886139479138, "grad_norm": 0.0896720141172409, "learning_rate": 7.69956694785852e-06, "loss": 8.6009, "step": 148980 }, { "epoch": 0.7440385527728532, "grad_norm": 0.08917935937643051, "learning_rate": 7.69806503291697e-06, "loss": 8.5988, "step": 148990 }, { "epoch": 0.7440884915977927, "grad_norm": 0.09412242472171783, "learning_rate": 7.696563117975419e-06, "loss": 8.5932, "step": 149000 }, { "epoch": 0.7441384304227322, "grad_norm": 0.09121372550725937, "learning_rate": 7.695061203033867e-06, "loss": 8.5708, "step": 149010 }, { "epoch": 0.7441883692476716, "grad_norm": 0.09384799003601074, "learning_rate": 7.693559288092317e-06, "loss": 8.6003, "step": 149020 }, { "epoch": 0.744238308072611, "grad_norm": 0.088929682970047, "learning_rate": 7.692057373150767e-06, "loss": 8.5807, "step": 149030 }, { "epoch": 0.7442882468975505, "grad_norm": 0.09719398617744446, "learning_rate": 7.690555458209218e-06, "loss": 8.5885, "step": 149040 }, { "epoch": 0.74433818572249, "grad_norm": 0.09361552447080612, "learning_rate": 7.689053543267668e-06, "loss": 8.5899, "step": 149050 }, { "epoch": 0.7443881245474294, "grad_norm": 0.08958575874567032, "learning_rate": 7.687551628326115e-06, "loss": 8.6113, "step": 149060 }, { "epoch": 0.7444380633723688, "grad_norm": 0.09068027883768082, "learning_rate": 7.686049713384565e-06, "loss": 8.6056, "step": 149070 }, { "epoch": 0.7444880021973083, "grad_norm": 0.09241528064012527, "learning_rate": 7.684547798443015e-06, "loss": 8.5827, "step": 149080 }, { "epoch": 0.7445379410222478, "grad_norm": 0.09474475681781769, "learning_rate": 7.683045883501465e-06, "loss": 8.6088, "step": 149090 }, { "epoch": 0.7445878798471872, "grad_norm": 0.09101393073797226, "learning_rate": 7.681543968559915e-06, "loss": 8.5973, "step": 149100 }, { "epoch": 0.7446378186721266, "grad_norm": 0.09199006855487823, "learning_rate": 7.680042053618362e-06, "loss": 8.6017, "step": 149110 }, { "epoch": 0.7446877574970661, "grad_norm": 0.09310968220233917, "learning_rate": 7.678540138676812e-06, "loss": 8.5962, "step": 149120 }, { "epoch": 0.7447376963220056, "grad_norm": 0.08850269764661789, "learning_rate": 7.677038223735262e-06, "loss": 8.5832, "step": 149130 }, { "epoch": 0.744787635146945, "grad_norm": 0.09304803609848022, "learning_rate": 7.675536308793713e-06, "loss": 8.5942, "step": 149140 }, { "epoch": 0.7448375739718844, "grad_norm": 0.0940072312951088, "learning_rate": 7.674034393852163e-06, "loss": 8.6149, "step": 149150 }, { "epoch": 0.7448875127968239, "grad_norm": 0.09144038707017899, "learning_rate": 7.672532478910611e-06, "loss": 8.5879, "step": 149160 }, { "epoch": 0.7449374516217634, "grad_norm": 0.09090874344110489, "learning_rate": 7.67103056396906e-06, "loss": 8.595, "step": 149170 }, { "epoch": 0.7449873904467028, "grad_norm": 0.10227791219949722, "learning_rate": 7.66952864902751e-06, "loss": 8.611, "step": 149180 }, { "epoch": 0.7450373292716422, "grad_norm": 0.09328952431678772, "learning_rate": 7.66802673408596e-06, "loss": 8.574, "step": 149190 }, { "epoch": 0.7450872680965817, "grad_norm": 0.09365438669919968, "learning_rate": 7.66652481914441e-06, "loss": 8.6266, "step": 149200 }, { "epoch": 0.7451372069215212, "grad_norm": 0.0949469655752182, "learning_rate": 7.665022904202859e-06, "loss": 8.6018, "step": 149210 }, { "epoch": 0.7451871457464606, "grad_norm": 0.0964326560497284, "learning_rate": 7.663520989261307e-06, "loss": 8.5941, "step": 149220 }, { "epoch": 0.7452370845714, "grad_norm": 0.091887928545475, "learning_rate": 7.662019074319758e-06, "loss": 8.5932, "step": 149230 }, { "epoch": 0.7452870233963395, "grad_norm": 0.09236712753772736, "learning_rate": 7.660517159378208e-06, "loss": 8.5758, "step": 149240 }, { "epoch": 0.745336962221279, "grad_norm": 0.08846684545278549, "learning_rate": 7.659015244436658e-06, "loss": 8.5856, "step": 149250 }, { "epoch": 0.7453869010462184, "grad_norm": 0.10501750558614731, "learning_rate": 7.657513329495106e-06, "loss": 8.5935, "step": 149260 }, { "epoch": 0.7454368398711578, "grad_norm": 0.09326668828725815, "learning_rate": 7.656011414553555e-06, "loss": 8.598, "step": 149270 }, { "epoch": 0.7454867786960973, "grad_norm": 0.09294133633375168, "learning_rate": 7.654509499612005e-06, "loss": 8.5952, "step": 149280 }, { "epoch": 0.7455367175210368, "grad_norm": 0.09185370057821274, "learning_rate": 7.653007584670455e-06, "loss": 8.6152, "step": 149290 }, { "epoch": 0.7455866563459762, "grad_norm": 0.09958349168300629, "learning_rate": 7.651505669728905e-06, "loss": 8.5855, "step": 149300 }, { "epoch": 0.7456365951709156, "grad_norm": 0.1002863273024559, "learning_rate": 7.650003754787356e-06, "loss": 8.5972, "step": 149310 }, { "epoch": 0.745686533995855, "grad_norm": 0.09235794842243195, "learning_rate": 7.648501839845802e-06, "loss": 8.5912, "step": 149320 }, { "epoch": 0.7457364728207945, "grad_norm": 0.09120310097932816, "learning_rate": 7.646999924904253e-06, "loss": 8.5867, "step": 149330 }, { "epoch": 0.745786411645734, "grad_norm": 0.09124782681465149, "learning_rate": 7.645498009962703e-06, "loss": 8.5841, "step": 149340 }, { "epoch": 0.7458363504706734, "grad_norm": 0.09484837204217911, "learning_rate": 7.643996095021153e-06, "loss": 8.5788, "step": 149350 }, { "epoch": 0.7458862892956128, "grad_norm": 0.08904207497835159, "learning_rate": 7.642494180079603e-06, "loss": 8.5973, "step": 149360 }, { "epoch": 0.7459362281205523, "grad_norm": 0.08774899691343307, "learning_rate": 7.64099226513805e-06, "loss": 8.5849, "step": 149370 }, { "epoch": 0.7459861669454918, "grad_norm": 0.09321718662977219, "learning_rate": 7.6394903501965e-06, "loss": 8.598, "step": 149380 }, { "epoch": 0.7460361057704312, "grad_norm": 0.09559307992458344, "learning_rate": 7.63798843525495e-06, "loss": 8.6053, "step": 149390 }, { "epoch": 0.7460860445953706, "grad_norm": 0.09065421670675278, "learning_rate": 7.6364865203134e-06, "loss": 8.5822, "step": 149400 }, { "epoch": 0.7461359834203101, "grad_norm": 0.08813871443271637, "learning_rate": 7.63498460537185e-06, "loss": 8.6158, "step": 149410 }, { "epoch": 0.7461859222452496, "grad_norm": 0.09453373402357101, "learning_rate": 7.633482690430299e-06, "loss": 8.595, "step": 149420 }, { "epoch": 0.746235861070189, "grad_norm": 0.09520356357097626, "learning_rate": 7.631980775488748e-06, "loss": 8.6, "step": 149430 }, { "epoch": 0.7462857998951284, "grad_norm": 0.09131111204624176, "learning_rate": 7.630478860547198e-06, "loss": 8.6118, "step": 149440 }, { "epoch": 0.7463357387200679, "grad_norm": 0.09047097712755203, "learning_rate": 7.628976945605647e-06, "loss": 8.598, "step": 149450 }, { "epoch": 0.7463856775450074, "grad_norm": 0.09119818359613419, "learning_rate": 7.627475030664097e-06, "loss": 8.6094, "step": 149460 }, { "epoch": 0.7464356163699468, "grad_norm": 0.0884728953242302, "learning_rate": 7.625973115722547e-06, "loss": 8.6118, "step": 149470 }, { "epoch": 0.7464855551948862, "grad_norm": 0.0959334447979927, "learning_rate": 7.624471200780995e-06, "loss": 8.5847, "step": 149480 }, { "epoch": 0.7465354940198257, "grad_norm": 0.08748113363981247, "learning_rate": 7.622969285839445e-06, "loss": 8.5978, "step": 149490 }, { "epoch": 0.7465854328447652, "grad_norm": 0.09139006584882736, "learning_rate": 7.6214673708978945e-06, "loss": 8.5974, "step": 149500 }, { "epoch": 0.7466353716697046, "grad_norm": 0.089291051030159, "learning_rate": 7.619965455956345e-06, "loss": 8.5852, "step": 149510 }, { "epoch": 0.746685310494644, "grad_norm": 0.0897984728217125, "learning_rate": 7.618463541014795e-06, "loss": 8.5904, "step": 149520 }, { "epoch": 0.7467352493195835, "grad_norm": 0.09387893229722977, "learning_rate": 7.6169616260732425e-06, "loss": 8.5993, "step": 149530 }, { "epoch": 0.746785188144523, "grad_norm": 0.09599009901285172, "learning_rate": 7.615459711131693e-06, "loss": 8.5884, "step": 149540 }, { "epoch": 0.7468351269694624, "grad_norm": 0.10485220700502396, "learning_rate": 7.613957796190142e-06, "loss": 8.5845, "step": 149550 }, { "epoch": 0.7468850657944018, "grad_norm": 0.09003337472677231, "learning_rate": 7.612455881248592e-06, "loss": 8.5792, "step": 149560 }, { "epoch": 0.7469350046193413, "grad_norm": 0.08689581602811813, "learning_rate": 7.610953966307042e-06, "loss": 8.5919, "step": 149570 }, { "epoch": 0.7469849434442808, "grad_norm": 0.09046222269535065, "learning_rate": 7.609452051365492e-06, "loss": 8.6032, "step": 149580 }, { "epoch": 0.7470348822692202, "grad_norm": 0.0931020975112915, "learning_rate": 7.60795013642394e-06, "loss": 8.5898, "step": 149590 }, { "epoch": 0.7470848210941596, "grad_norm": 0.09115353971719742, "learning_rate": 7.6064482214823895e-06, "loss": 8.5903, "step": 149600 }, { "epoch": 0.7471347599190991, "grad_norm": 0.095846027135849, "learning_rate": 7.60494630654084e-06, "loss": 8.5989, "step": 149610 }, { "epoch": 0.7471846987440386, "grad_norm": 0.0902191624045372, "learning_rate": 7.60344439159929e-06, "loss": 8.5844, "step": 149620 }, { "epoch": 0.747234637568978, "grad_norm": 0.08753103017807007, "learning_rate": 7.601942476657739e-06, "loss": 8.5876, "step": 149630 }, { "epoch": 0.7472845763939174, "grad_norm": 0.09205235540866852, "learning_rate": 7.600440561716188e-06, "loss": 8.6017, "step": 149640 }, { "epoch": 0.7473345152188569, "grad_norm": 0.08869083225727081, "learning_rate": 7.598938646774637e-06, "loss": 8.5879, "step": 149650 }, { "epoch": 0.7473844540437964, "grad_norm": 0.09355755895376205, "learning_rate": 7.597436731833087e-06, "loss": 8.6161, "step": 149660 }, { "epoch": 0.7474343928687358, "grad_norm": 0.09638141095638275, "learning_rate": 7.595934816891537e-06, "loss": 8.5874, "step": 149670 }, { "epoch": 0.7474843316936752, "grad_norm": 0.09386901557445526, "learning_rate": 7.594432901949987e-06, "loss": 8.596, "step": 149680 }, { "epoch": 0.7475342705186147, "grad_norm": 0.08905579149723053, "learning_rate": 7.592930987008435e-06, "loss": 8.5965, "step": 149690 }, { "epoch": 0.7475842093435542, "grad_norm": 0.09240230172872543, "learning_rate": 7.5914290720668845e-06, "loss": 8.5916, "step": 149700 }, { "epoch": 0.7476341481684936, "grad_norm": 0.08725178986787796, "learning_rate": 7.589927157125335e-06, "loss": 8.61, "step": 149710 }, { "epoch": 0.747684086993433, "grad_norm": 0.09127280861139297, "learning_rate": 7.588425242183785e-06, "loss": 8.5987, "step": 149720 }, { "epoch": 0.7477340258183724, "grad_norm": 0.09402144700288773, "learning_rate": 7.586923327242234e-06, "loss": 8.6062, "step": 149730 }, { "epoch": 0.747783964643312, "grad_norm": 0.08989568054676056, "learning_rate": 7.585421412300684e-06, "loss": 8.5849, "step": 149740 }, { "epoch": 0.7478339034682514, "grad_norm": 0.09065549820661545, "learning_rate": 7.583919497359133e-06, "loss": 8.5967, "step": 149750 }, { "epoch": 0.7478838422931908, "grad_norm": 0.09307155758142471, "learning_rate": 7.582417582417582e-06, "loss": 8.6004, "step": 149760 }, { "epoch": 0.7479337811181302, "grad_norm": 0.09423384815454483, "learning_rate": 7.580915667476032e-06, "loss": 8.595, "step": 149770 }, { "epoch": 0.7479837199430698, "grad_norm": 0.09108050912618637, "learning_rate": 7.579413752534482e-06, "loss": 8.5889, "step": 149780 }, { "epoch": 0.7480336587680092, "grad_norm": 0.0906979888677597, "learning_rate": 7.577911837592932e-06, "loss": 8.5869, "step": 149790 }, { "epoch": 0.7480835975929486, "grad_norm": 0.09189769625663757, "learning_rate": 7.57640992265138e-06, "loss": 8.5869, "step": 149800 }, { "epoch": 0.748133536417888, "grad_norm": 0.09509114921092987, "learning_rate": 7.57490800770983e-06, "loss": 8.5979, "step": 149810 }, { "epoch": 0.7481834752428276, "grad_norm": 0.09381816536188126, "learning_rate": 7.57340609276828e-06, "loss": 8.5848, "step": 149820 }, { "epoch": 0.748233414067767, "grad_norm": 0.10044172406196594, "learning_rate": 7.571904177826729e-06, "loss": 8.568, "step": 149830 }, { "epoch": 0.7482833528927064, "grad_norm": 0.09521984308958054, "learning_rate": 7.570402262885179e-06, "loss": 8.5959, "step": 149840 }, { "epoch": 0.7483332917176458, "grad_norm": 0.09529656916856766, "learning_rate": 7.568900347943628e-06, "loss": 8.5873, "step": 149850 }, { "epoch": 0.7483832305425854, "grad_norm": 0.09173472225666046, "learning_rate": 7.567398433002077e-06, "loss": 8.6029, "step": 149860 }, { "epoch": 0.7484331693675248, "grad_norm": 0.0962473452091217, "learning_rate": 7.565896518060527e-06, "loss": 8.6064, "step": 149870 }, { "epoch": 0.7484831081924642, "grad_norm": 0.0965263694524765, "learning_rate": 7.564394603118977e-06, "loss": 8.5685, "step": 149880 }, { "epoch": 0.7485330470174036, "grad_norm": 0.09147603064775467, "learning_rate": 7.562892688177427e-06, "loss": 8.5998, "step": 149890 }, { "epoch": 0.7485829858423432, "grad_norm": 0.09058742225170135, "learning_rate": 7.561390773235877e-06, "loss": 8.5803, "step": 149900 }, { "epoch": 0.7486329246672826, "grad_norm": 0.09538568556308746, "learning_rate": 7.559888858294325e-06, "loss": 8.5839, "step": 149910 }, { "epoch": 0.748682863492222, "grad_norm": 0.08751174807548523, "learning_rate": 7.558386943352775e-06, "loss": 8.5857, "step": 149920 }, { "epoch": 0.7487328023171614, "grad_norm": 0.09170546382665634, "learning_rate": 7.556885028411224e-06, "loss": 8.5967, "step": 149930 }, { "epoch": 0.748782741142101, "grad_norm": 0.09635576605796814, "learning_rate": 7.5553831134696744e-06, "loss": 8.59, "step": 149940 }, { "epoch": 0.7488326799670404, "grad_norm": 0.09509419649839401, "learning_rate": 7.553881198528125e-06, "loss": 8.5917, "step": 149950 }, { "epoch": 0.7488826187919798, "grad_norm": 0.08711056411266327, "learning_rate": 7.552379283586572e-06, "loss": 8.5994, "step": 149960 }, { "epoch": 0.7489325576169192, "grad_norm": 0.0929313451051712, "learning_rate": 7.550877368645022e-06, "loss": 8.5855, "step": 149970 }, { "epoch": 0.7489824964418588, "grad_norm": 0.09222406148910522, "learning_rate": 7.549375453703472e-06, "loss": 8.6066, "step": 149980 }, { "epoch": 0.7490324352667982, "grad_norm": 0.0890507623553276, "learning_rate": 7.547873538761922e-06, "loss": 8.6042, "step": 149990 }, { "epoch": 0.7490823740917376, "grad_norm": 0.08804387599229813, "learning_rate": 7.546371623820372e-06, "loss": 8.6063, "step": 150000 }, { "epoch": 0.749132312916677, "grad_norm": 0.0897492840886116, "learning_rate": 7.54486970887882e-06, "loss": 8.5969, "step": 150010 }, { "epoch": 0.7491822517416166, "grad_norm": 0.08836290240287781, "learning_rate": 7.54336779393727e-06, "loss": 8.5929, "step": 150020 }, { "epoch": 0.749232190566556, "grad_norm": 0.09971511363983154, "learning_rate": 7.541865878995719e-06, "loss": 8.5952, "step": 150030 }, { "epoch": 0.7492821293914954, "grad_norm": 0.09459482133388519, "learning_rate": 7.5403639640541694e-06, "loss": 8.5777, "step": 150040 }, { "epoch": 0.7493320682164348, "grad_norm": 0.09455052018165588, "learning_rate": 7.53886204911262e-06, "loss": 8.5771, "step": 150050 }, { "epoch": 0.7493820070413744, "grad_norm": 0.09091456979513168, "learning_rate": 7.537360134171069e-06, "loss": 8.6112, "step": 150060 }, { "epoch": 0.7494319458663138, "grad_norm": 0.08837003260850906, "learning_rate": 7.5358582192295174e-06, "loss": 8.5919, "step": 150070 }, { "epoch": 0.7494818846912532, "grad_norm": 0.09216312319040298, "learning_rate": 7.534356304287967e-06, "loss": 8.5865, "step": 150080 }, { "epoch": 0.7495318235161926, "grad_norm": 0.09196959435939789, "learning_rate": 7.532854389346417e-06, "loss": 8.5715, "step": 150090 }, { "epoch": 0.7495817623411322, "grad_norm": 0.0905485451221466, "learning_rate": 7.531352474404867e-06, "loss": 8.5851, "step": 150100 }, { "epoch": 0.7496317011660716, "grad_norm": 0.0880269706249237, "learning_rate": 7.5298505594633164e-06, "loss": 8.5813, "step": 150110 }, { "epoch": 0.749681639991011, "grad_norm": 0.08941976726055145, "learning_rate": 7.528348644521765e-06, "loss": 8.5816, "step": 150120 }, { "epoch": 0.7497315788159504, "grad_norm": 0.08973130583763123, "learning_rate": 7.526846729580214e-06, "loss": 8.6029, "step": 150130 }, { "epoch": 0.74978151764089, "grad_norm": 0.0887095034122467, "learning_rate": 7.5253448146386644e-06, "loss": 8.6152, "step": 150140 }, { "epoch": 0.7498314564658294, "grad_norm": 0.09397480636835098, "learning_rate": 7.523842899697115e-06, "loss": 8.5888, "step": 150150 }, { "epoch": 0.7498813952907688, "grad_norm": 0.09192110598087311, "learning_rate": 7.522340984755564e-06, "loss": 8.5769, "step": 150160 }, { "epoch": 0.7499313341157082, "grad_norm": 0.09322792291641235, "learning_rate": 7.5208390698140124e-06, "loss": 8.5972, "step": 150170 }, { "epoch": 0.7499812729406478, "grad_norm": 0.08950521796941757, "learning_rate": 7.519337154872462e-06, "loss": 8.5905, "step": 150180 }, { "epoch": 0.7500312117655872, "grad_norm": 0.0880652666091919, "learning_rate": 7.517835239930912e-06, "loss": 8.6012, "step": 150190 }, { "epoch": 0.7500811505905266, "grad_norm": 0.09776634722948074, "learning_rate": 7.516333324989362e-06, "loss": 8.5777, "step": 150200 }, { "epoch": 0.750131089415466, "grad_norm": 0.09283629804849625, "learning_rate": 7.5148314100478115e-06, "loss": 8.5807, "step": 150210 }, { "epoch": 0.7501810282404056, "grad_norm": 0.09522504359483719, "learning_rate": 7.513329495106262e-06, "loss": 8.576, "step": 150220 }, { "epoch": 0.750230967065345, "grad_norm": 0.09157579392194748, "learning_rate": 7.511827580164709e-06, "loss": 8.5927, "step": 150230 }, { "epoch": 0.7502809058902844, "grad_norm": 0.09480082988739014, "learning_rate": 7.5103256652231594e-06, "loss": 8.6008, "step": 150240 }, { "epoch": 0.7503308447152238, "grad_norm": 0.0909399539232254, "learning_rate": 7.50882375028161e-06, "loss": 8.5823, "step": 150250 }, { "epoch": 0.7503807835401634, "grad_norm": 0.09307267516851425, "learning_rate": 7.507321835340059e-06, "loss": 8.5907, "step": 150260 }, { "epoch": 0.7504307223651028, "grad_norm": 0.08955024927854538, "learning_rate": 7.505819920398509e-06, "loss": 8.5954, "step": 150270 }, { "epoch": 0.7504806611900422, "grad_norm": 0.09251957386732101, "learning_rate": 7.504318005456957e-06, "loss": 8.5802, "step": 150280 }, { "epoch": 0.7505306000149816, "grad_norm": 0.09843941777944565, "learning_rate": 7.502816090515407e-06, "loss": 8.5834, "step": 150290 }, { "epoch": 0.7505805388399212, "grad_norm": 0.09598127752542496, "learning_rate": 7.501314175573857e-06, "loss": 8.599, "step": 150300 }, { "epoch": 0.7506304776648606, "grad_norm": 0.09445951879024506, "learning_rate": 7.4998122606323065e-06, "loss": 8.5844, "step": 150310 }, { "epoch": 0.7506804164898, "grad_norm": 0.08694125711917877, "learning_rate": 7.498310345690756e-06, "loss": 8.606, "step": 150320 }, { "epoch": 0.7507303553147394, "grad_norm": 0.08730349689722061, "learning_rate": 7.496808430749205e-06, "loss": 8.5941, "step": 150330 }, { "epoch": 0.7507802941396788, "grad_norm": 0.0891655758023262, "learning_rate": 7.495306515807655e-06, "loss": 8.5755, "step": 150340 }, { "epoch": 0.7508302329646184, "grad_norm": 0.08927493542432785, "learning_rate": 7.493804600866105e-06, "loss": 8.5948, "step": 150350 }, { "epoch": 0.7508801717895578, "grad_norm": 0.09330999106168747, "learning_rate": 7.492302685924554e-06, "loss": 8.5993, "step": 150360 }, { "epoch": 0.7509301106144972, "grad_norm": 0.09538490325212479, "learning_rate": 7.490800770983003e-06, "loss": 8.5914, "step": 150370 }, { "epoch": 0.7509800494394366, "grad_norm": 0.09310005605220795, "learning_rate": 7.489298856041453e-06, "loss": 8.5874, "step": 150380 }, { "epoch": 0.7510299882643762, "grad_norm": 0.0930032730102539, "learning_rate": 7.487796941099903e-06, "loss": 8.5895, "step": 150390 }, { "epoch": 0.7510799270893156, "grad_norm": 0.08846841007471085, "learning_rate": 7.486295026158352e-06, "loss": 8.5724, "step": 150400 }, { "epoch": 0.751129865914255, "grad_norm": 0.09779760241508484, "learning_rate": 7.4847931112168015e-06, "loss": 8.5736, "step": 150410 }, { "epoch": 0.7511798047391944, "grad_norm": 0.10576785355806351, "learning_rate": 7.483291196275252e-06, "loss": 8.5726, "step": 150420 }, { "epoch": 0.751229743564134, "grad_norm": 0.09451400488615036, "learning_rate": 7.4817892813337e-06, "loss": 8.5857, "step": 150430 }, { "epoch": 0.7512796823890734, "grad_norm": 0.09466114640235901, "learning_rate": 7.48028736639215e-06, "loss": 8.597, "step": 150440 }, { "epoch": 0.7513296212140128, "grad_norm": 0.08923625946044922, "learning_rate": 7.4787854514506e-06, "loss": 8.6096, "step": 150450 }, { "epoch": 0.7513795600389522, "grad_norm": 0.08818081766366959, "learning_rate": 7.477283536509049e-06, "loss": 8.604, "step": 150460 }, { "epoch": 0.7514294988638918, "grad_norm": 0.08820995688438416, "learning_rate": 7.475781621567499e-06, "loss": 8.5916, "step": 150470 }, { "epoch": 0.7514794376888312, "grad_norm": 0.09002698957920074, "learning_rate": 7.474279706625948e-06, "loss": 8.5879, "step": 150480 }, { "epoch": 0.7515293765137706, "grad_norm": 0.09170237928628922, "learning_rate": 7.472777791684398e-06, "loss": 8.5907, "step": 150490 }, { "epoch": 0.75157931533871, "grad_norm": 0.09593456238508224, "learning_rate": 7.471275876742848e-06, "loss": 8.5872, "step": 150500 }, { "epoch": 0.7516292541636496, "grad_norm": 0.09875084459781647, "learning_rate": 7.4697739618012965e-06, "loss": 8.5823, "step": 150510 }, { "epoch": 0.751679192988589, "grad_norm": 0.09118742495775223, "learning_rate": 7.468272046859747e-06, "loss": 8.5823, "step": 150520 }, { "epoch": 0.7517291318135284, "grad_norm": 0.09258361160755157, "learning_rate": 7.466770131918195e-06, "loss": 8.5972, "step": 150530 }, { "epoch": 0.7517790706384678, "grad_norm": 0.08977973461151123, "learning_rate": 7.465268216976645e-06, "loss": 8.6059, "step": 150540 }, { "epoch": 0.7518290094634074, "grad_norm": 0.092478908598423, "learning_rate": 7.4637663020350955e-06, "loss": 8.5935, "step": 150550 }, { "epoch": 0.7518789482883468, "grad_norm": 0.09068572521209717, "learning_rate": 7.462264387093544e-06, "loss": 8.5762, "step": 150560 }, { "epoch": 0.7519288871132862, "grad_norm": 0.09068078547716141, "learning_rate": 7.460762472151994e-06, "loss": 8.5839, "step": 150570 }, { "epoch": 0.7519788259382256, "grad_norm": 0.09269540011882782, "learning_rate": 7.4592605572104435e-06, "loss": 8.5909, "step": 150580 }, { "epoch": 0.7520287647631652, "grad_norm": 0.0924522653222084, "learning_rate": 7.457758642268893e-06, "loss": 8.5919, "step": 150590 }, { "epoch": 0.7520787035881046, "grad_norm": 0.09281891584396362, "learning_rate": 7.456256727327343e-06, "loss": 8.5784, "step": 150600 }, { "epoch": 0.752128642413044, "grad_norm": 0.08694092184305191, "learning_rate": 7.4547548123857915e-06, "loss": 8.5848, "step": 150610 }, { "epoch": 0.7521785812379834, "grad_norm": 0.10442210733890533, "learning_rate": 7.453252897444242e-06, "loss": 8.5919, "step": 150620 }, { "epoch": 0.752228520062923, "grad_norm": 0.09364094585180283, "learning_rate": 7.451750982502691e-06, "loss": 8.6023, "step": 150630 }, { "epoch": 0.7522784588878624, "grad_norm": 0.08750166743993759, "learning_rate": 7.45024906756114e-06, "loss": 8.5849, "step": 150640 }, { "epoch": 0.7523283977128018, "grad_norm": 0.08991952240467072, "learning_rate": 7.4487471526195905e-06, "loss": 8.5911, "step": 150650 }, { "epoch": 0.7523783365377412, "grad_norm": 0.10013854503631592, "learning_rate": 7.44724523767804e-06, "loss": 8.5982, "step": 150660 }, { "epoch": 0.7524282753626808, "grad_norm": 0.09347521513700485, "learning_rate": 7.445743322736489e-06, "loss": 8.5847, "step": 150670 }, { "epoch": 0.7524782141876202, "grad_norm": 0.09399186819791794, "learning_rate": 7.4442414077949385e-06, "loss": 8.6024, "step": 150680 }, { "epoch": 0.7525281530125596, "grad_norm": 0.09026849269866943, "learning_rate": 7.442739492853388e-06, "loss": 8.5992, "step": 150690 }, { "epoch": 0.752578091837499, "grad_norm": 0.08900460600852966, "learning_rate": 7.441237577911838e-06, "loss": 8.5872, "step": 150700 }, { "epoch": 0.7526280306624386, "grad_norm": 0.09363497048616409, "learning_rate": 7.439735662970287e-06, "loss": 8.5889, "step": 150710 }, { "epoch": 0.752677969487378, "grad_norm": 0.09363177418708801, "learning_rate": 7.438233748028737e-06, "loss": 8.5861, "step": 150720 }, { "epoch": 0.7527279083123174, "grad_norm": 0.08974208682775497, "learning_rate": 7.436731833087186e-06, "loss": 8.5841, "step": 150730 }, { "epoch": 0.7527778471372568, "grad_norm": 0.09006848931312561, "learning_rate": 7.435229918145636e-06, "loss": 8.5754, "step": 150740 }, { "epoch": 0.7528277859621963, "grad_norm": 0.08987919241189957, "learning_rate": 7.4337280032040855e-06, "loss": 8.5911, "step": 150750 }, { "epoch": 0.7528777247871358, "grad_norm": 0.0890379548072815, "learning_rate": 7.432226088262535e-06, "loss": 8.5964, "step": 150760 }, { "epoch": 0.7529276636120752, "grad_norm": 0.08632665872573853, "learning_rate": 7.430724173320984e-06, "loss": 8.5989, "step": 150770 }, { "epoch": 0.7529776024370146, "grad_norm": 0.09325092285871506, "learning_rate": 7.4292222583794335e-06, "loss": 8.5849, "step": 150780 }, { "epoch": 0.7530275412619541, "grad_norm": 0.09268093854188919, "learning_rate": 7.427720343437884e-06, "loss": 8.5921, "step": 150790 }, { "epoch": 0.7530774800868936, "grad_norm": 0.09364005923271179, "learning_rate": 7.426218428496333e-06, "loss": 8.5925, "step": 150800 }, { "epoch": 0.753127418911833, "grad_norm": 0.08780599385499954, "learning_rate": 7.424716513554782e-06, "loss": 8.601, "step": 150810 }, { "epoch": 0.7531773577367724, "grad_norm": 0.0943470448255539, "learning_rate": 7.4232145986132325e-06, "loss": 8.6098, "step": 150820 }, { "epoch": 0.753227296561712, "grad_norm": 0.08829768002033234, "learning_rate": 7.421712683671681e-06, "loss": 8.5951, "step": 150830 }, { "epoch": 0.7532772353866514, "grad_norm": 0.0940447673201561, "learning_rate": 7.420210768730131e-06, "loss": 8.5996, "step": 150840 }, { "epoch": 0.7533271742115908, "grad_norm": 0.09546596556901932, "learning_rate": 7.4187088537885805e-06, "loss": 8.5794, "step": 150850 }, { "epoch": 0.7533771130365302, "grad_norm": 0.09206235408782959, "learning_rate": 7.41720693884703e-06, "loss": 8.5881, "step": 150860 }, { "epoch": 0.7534270518614697, "grad_norm": 0.08884277194738388, "learning_rate": 7.41570502390548e-06, "loss": 8.6059, "step": 150870 }, { "epoch": 0.7534769906864092, "grad_norm": 0.0894254744052887, "learning_rate": 7.414203108963929e-06, "loss": 8.6018, "step": 150880 }, { "epoch": 0.7535269295113486, "grad_norm": 0.08869513869285583, "learning_rate": 7.412701194022379e-06, "loss": 8.5989, "step": 150890 }, { "epoch": 0.753576868336288, "grad_norm": 0.08933467417955399, "learning_rate": 7.411199279080829e-06, "loss": 8.5957, "step": 150900 }, { "epoch": 0.7536268071612275, "grad_norm": 0.09024026989936829, "learning_rate": 7.409697364139277e-06, "loss": 8.5688, "step": 150910 }, { "epoch": 0.753676745986167, "grad_norm": 0.08978093415498734, "learning_rate": 7.4081954491977275e-06, "loss": 8.5984, "step": 150920 }, { "epoch": 0.7537266848111064, "grad_norm": 0.09174445271492004, "learning_rate": 7.406693534256177e-06, "loss": 8.5912, "step": 150930 }, { "epoch": 0.7537766236360458, "grad_norm": 0.09789343923330307, "learning_rate": 7.405191619314626e-06, "loss": 8.5811, "step": 150940 }, { "epoch": 0.7538265624609853, "grad_norm": 0.09015634655952454, "learning_rate": 7.403689704373076e-06, "loss": 8.5899, "step": 150950 }, { "epoch": 0.7538765012859248, "grad_norm": 0.08942896872758865, "learning_rate": 7.402187789431525e-06, "loss": 8.5947, "step": 150960 }, { "epoch": 0.7539264401108642, "grad_norm": 0.09148923307657242, "learning_rate": 7.400685874489975e-06, "loss": 8.5759, "step": 150970 }, { "epoch": 0.7539763789358036, "grad_norm": 0.09370266646146774, "learning_rate": 7.399183959548425e-06, "loss": 8.5746, "step": 150980 }, { "epoch": 0.7540263177607431, "grad_norm": 0.09445227682590485, "learning_rate": 7.397682044606874e-06, "loss": 8.6121, "step": 150990 }, { "epoch": 0.7540762565856826, "grad_norm": 0.08653920888900757, "learning_rate": 7.396180129665324e-06, "loss": 8.5719, "step": 151000 }, { "epoch": 0.754126195410622, "grad_norm": 0.09342148154973984, "learning_rate": 7.394678214723772e-06, "loss": 8.5733, "step": 151010 }, { "epoch": 0.7541761342355614, "grad_norm": 0.09228193759918213, "learning_rate": 7.3931762997822225e-06, "loss": 8.597, "step": 151020 }, { "epoch": 0.7542260730605009, "grad_norm": 0.08755730837583542, "learning_rate": 7.391674384840673e-06, "loss": 8.5821, "step": 151030 }, { "epoch": 0.7542760118854404, "grad_norm": 0.08993065357208252, "learning_rate": 7.390172469899121e-06, "loss": 8.6094, "step": 151040 }, { "epoch": 0.7543259507103798, "grad_norm": 0.08330941945314407, "learning_rate": 7.388670554957571e-06, "loss": 8.6262, "step": 151050 }, { "epoch": 0.7543758895353192, "grad_norm": 0.09145517647266388, "learning_rate": 7.387168640016021e-06, "loss": 8.5803, "step": 151060 }, { "epoch": 0.7544258283602587, "grad_norm": 0.09170622378587723, "learning_rate": 7.38566672507447e-06, "loss": 8.568, "step": 151070 }, { "epoch": 0.7544757671851982, "grad_norm": 0.08807757496833801, "learning_rate": 7.38416481013292e-06, "loss": 8.5906, "step": 151080 }, { "epoch": 0.7545257060101376, "grad_norm": 0.09455586969852448, "learning_rate": 7.382662895191369e-06, "loss": 8.6016, "step": 151090 }, { "epoch": 0.754575644835077, "grad_norm": 0.08837214857339859, "learning_rate": 7.381160980249819e-06, "loss": 8.5958, "step": 151100 }, { "epoch": 0.7546255836600165, "grad_norm": 0.08906712383031845, "learning_rate": 7.379659065308268e-06, "loss": 8.5873, "step": 151110 }, { "epoch": 0.754675522484956, "grad_norm": 0.10366376489400864, "learning_rate": 7.3781571503667175e-06, "loss": 8.5838, "step": 151120 }, { "epoch": 0.7547254613098954, "grad_norm": 0.08967901021242142, "learning_rate": 7.376655235425168e-06, "loss": 8.5893, "step": 151130 }, { "epoch": 0.7547754001348348, "grad_norm": 0.0880189910531044, "learning_rate": 7.375153320483617e-06, "loss": 8.5897, "step": 151140 }, { "epoch": 0.7548253389597743, "grad_norm": 0.09614763408899307, "learning_rate": 7.373651405542066e-06, "loss": 8.5823, "step": 151150 }, { "epoch": 0.7548752777847137, "grad_norm": 0.0998152643442154, "learning_rate": 7.372149490600516e-06, "loss": 8.5901, "step": 151160 }, { "epoch": 0.7549252166096532, "grad_norm": 0.09459684044122696, "learning_rate": 7.370647575658965e-06, "loss": 8.5769, "step": 151170 }, { "epoch": 0.7549751554345926, "grad_norm": 0.09051591157913208, "learning_rate": 7.369145660717415e-06, "loss": 8.593, "step": 151180 }, { "epoch": 0.7550250942595321, "grad_norm": 0.0956014096736908, "learning_rate": 7.3676437457758646e-06, "loss": 8.5867, "step": 151190 }, { "epoch": 0.7550750330844715, "grad_norm": 0.10061989724636078, "learning_rate": 7.366141830834314e-06, "loss": 8.5855, "step": 151200 }, { "epoch": 0.755124971909411, "grad_norm": 0.09272131323814392, "learning_rate": 7.364639915892763e-06, "loss": 8.5966, "step": 151210 }, { "epoch": 0.7551749107343504, "grad_norm": 0.09143613278865814, "learning_rate": 7.363138000951213e-06, "loss": 8.6061, "step": 151220 }, { "epoch": 0.7552248495592899, "grad_norm": 0.09460750222206116, "learning_rate": 7.361636086009663e-06, "loss": 8.5901, "step": 151230 }, { "epoch": 0.7552747883842293, "grad_norm": 0.09221066534519196, "learning_rate": 7.360134171068112e-06, "loss": 8.5844, "step": 151240 }, { "epoch": 0.7553247272091688, "grad_norm": 0.09496848285198212, "learning_rate": 7.358632256126561e-06, "loss": 8.5875, "step": 151250 }, { "epoch": 0.7553746660341082, "grad_norm": 0.0936916247010231, "learning_rate": 7.357130341185011e-06, "loss": 8.5976, "step": 151260 }, { "epoch": 0.7554246048590477, "grad_norm": 0.09380991011857986, "learning_rate": 7.355628426243461e-06, "loss": 8.6017, "step": 151270 }, { "epoch": 0.7554745436839871, "grad_norm": 0.09433276206254959, "learning_rate": 7.35412651130191e-06, "loss": 8.5803, "step": 151280 }, { "epoch": 0.7555244825089266, "grad_norm": 0.08973414450883865, "learning_rate": 7.3526245963603596e-06, "loss": 8.5969, "step": 151290 }, { "epoch": 0.755574421333866, "grad_norm": 0.09096003323793411, "learning_rate": 7.35112268141881e-06, "loss": 8.5919, "step": 151300 }, { "epoch": 0.7556243601588054, "grad_norm": 0.0915827602148056, "learning_rate": 7.349620766477258e-06, "loss": 8.5893, "step": 151310 }, { "epoch": 0.7556742989837449, "grad_norm": 0.09033535420894623, "learning_rate": 7.348118851535708e-06, "loss": 8.5979, "step": 151320 }, { "epoch": 0.7557242378086844, "grad_norm": 0.0904705822467804, "learning_rate": 7.346616936594158e-06, "loss": 8.5985, "step": 151330 }, { "epoch": 0.7557741766336238, "grad_norm": 0.0926397368311882, "learning_rate": 7.345115021652607e-06, "loss": 8.5686, "step": 151340 }, { "epoch": 0.7558241154585632, "grad_norm": 0.09501095861196518, "learning_rate": 7.343613106711057e-06, "loss": 8.5688, "step": 151350 }, { "epoch": 0.7558740542835027, "grad_norm": 0.09006702154874802, "learning_rate": 7.342111191769506e-06, "loss": 8.588, "step": 151360 }, { "epoch": 0.7559239931084422, "grad_norm": 0.09147243946790695, "learning_rate": 7.340609276827956e-06, "loss": 8.5847, "step": 151370 }, { "epoch": 0.7559739319333816, "grad_norm": 0.09361589699983597, "learning_rate": 7.339107361886406e-06, "loss": 8.5841, "step": 151380 }, { "epoch": 0.756023870758321, "grad_norm": 0.0927942767739296, "learning_rate": 7.3376054469448546e-06, "loss": 8.6122, "step": 151390 }, { "epoch": 0.7560738095832605, "grad_norm": 0.0871824249625206, "learning_rate": 7.336103532003305e-06, "loss": 8.5985, "step": 151400 }, { "epoch": 0.7561237484082, "grad_norm": 0.09094393253326416, "learning_rate": 7.334601617061753e-06, "loss": 8.5906, "step": 151410 }, { "epoch": 0.7561736872331394, "grad_norm": 0.08855657279491425, "learning_rate": 7.333099702120203e-06, "loss": 8.5768, "step": 151420 }, { "epoch": 0.7562236260580788, "grad_norm": 0.09399604797363281, "learning_rate": 7.331597787178654e-06, "loss": 8.5941, "step": 151430 }, { "epoch": 0.7562735648830183, "grad_norm": 0.09146516025066376, "learning_rate": 7.330095872237102e-06, "loss": 8.5875, "step": 151440 }, { "epoch": 0.7563235037079578, "grad_norm": 0.09131018072366714, "learning_rate": 7.328593957295552e-06, "loss": 8.5829, "step": 151450 }, { "epoch": 0.7563734425328972, "grad_norm": 0.09400021284818649, "learning_rate": 7.327092042354002e-06, "loss": 8.5939, "step": 151460 }, { "epoch": 0.7564233813578366, "grad_norm": 0.08741091191768646, "learning_rate": 7.325590127412451e-06, "loss": 8.5914, "step": 151470 }, { "epoch": 0.7564733201827761, "grad_norm": 0.09049748629331589, "learning_rate": 7.324088212470901e-06, "loss": 8.5944, "step": 151480 }, { "epoch": 0.7565232590077156, "grad_norm": 0.09107566624879837, "learning_rate": 7.32258629752935e-06, "loss": 8.5838, "step": 151490 }, { "epoch": 0.756573197832655, "grad_norm": 0.08954470604658127, "learning_rate": 7.3210843825878e-06, "loss": 8.5864, "step": 151500 }, { "epoch": 0.7566231366575944, "grad_norm": 0.09204570204019547, "learning_rate": 7.319582467646249e-06, "loss": 8.5808, "step": 151510 }, { "epoch": 0.7566730754825339, "grad_norm": 0.09428434073925018, "learning_rate": 7.318080552704698e-06, "loss": 8.5759, "step": 151520 }, { "epoch": 0.7567230143074734, "grad_norm": 0.08692929893732071, "learning_rate": 7.316578637763149e-06, "loss": 8.5886, "step": 151530 }, { "epoch": 0.7567729531324128, "grad_norm": 0.08495133370161057, "learning_rate": 7.315076722821598e-06, "loss": 8.5822, "step": 151540 }, { "epoch": 0.7568228919573522, "grad_norm": 0.09004715830087662, "learning_rate": 7.313574807880047e-06, "loss": 8.5829, "step": 151550 }, { "epoch": 0.7568728307822917, "grad_norm": 0.09108411520719528, "learning_rate": 7.312072892938497e-06, "loss": 8.5784, "step": 151560 }, { "epoch": 0.7569227696072311, "grad_norm": 0.0907498225569725, "learning_rate": 7.310570977996946e-06, "loss": 8.5694, "step": 151570 }, { "epoch": 0.7569727084321706, "grad_norm": 0.08905670791864395, "learning_rate": 7.309069063055396e-06, "loss": 8.5844, "step": 151580 }, { "epoch": 0.75702264725711, "grad_norm": 0.08316656202077866, "learning_rate": 7.3075671481138454e-06, "loss": 8.5998, "step": 151590 }, { "epoch": 0.7570725860820495, "grad_norm": 0.09294084459543228, "learning_rate": 7.306065233172295e-06, "loss": 8.5846, "step": 151600 }, { "epoch": 0.757122524906989, "grad_norm": 0.08754400908946991, "learning_rate": 7.304563318230744e-06, "loss": 8.5693, "step": 151610 }, { "epoch": 0.7571724637319284, "grad_norm": 0.09371070563793182, "learning_rate": 7.303061403289194e-06, "loss": 8.5871, "step": 151620 }, { "epoch": 0.7572224025568678, "grad_norm": 0.08874963223934174, "learning_rate": 7.301559488347644e-06, "loss": 8.5855, "step": 151630 }, { "epoch": 0.7572723413818073, "grad_norm": 0.09325557202100754, "learning_rate": 7.300057573406093e-06, "loss": 8.5884, "step": 151640 }, { "epoch": 0.7573222802067467, "grad_norm": 0.0924559012055397, "learning_rate": 7.298555658464542e-06, "loss": 8.582, "step": 151650 }, { "epoch": 0.7573722190316862, "grad_norm": 0.09226152300834656, "learning_rate": 7.297053743522992e-06, "loss": 8.5726, "step": 151660 }, { "epoch": 0.7574221578566256, "grad_norm": 0.09244561940431595, "learning_rate": 7.295551828581442e-06, "loss": 8.5807, "step": 151670 }, { "epoch": 0.7574720966815651, "grad_norm": 0.08866026997566223, "learning_rate": 7.294049913639891e-06, "loss": 8.6003, "step": 151680 }, { "epoch": 0.7575220355065045, "grad_norm": 0.09373471140861511, "learning_rate": 7.2925479986983404e-06, "loss": 8.5914, "step": 151690 }, { "epoch": 0.757571974331444, "grad_norm": 0.09084302186965942, "learning_rate": 7.291046083756791e-06, "loss": 8.5745, "step": 151700 }, { "epoch": 0.7576219131563834, "grad_norm": 0.0983499139547348, "learning_rate": 7.289544168815239e-06, "loss": 8.5901, "step": 151710 }, { "epoch": 0.7576718519813229, "grad_norm": 0.09153123944997787, "learning_rate": 7.288042253873689e-06, "loss": 8.5917, "step": 151720 }, { "epoch": 0.7577217908062623, "grad_norm": 0.08924426138401031, "learning_rate": 7.286540338932139e-06, "loss": 8.5917, "step": 151730 }, { "epoch": 0.7577717296312018, "grad_norm": 0.09073387831449509, "learning_rate": 7.285038423990588e-06, "loss": 8.5854, "step": 151740 }, { "epoch": 0.7578216684561412, "grad_norm": 0.09264533221721649, "learning_rate": 7.283536509049038e-06, "loss": 8.5888, "step": 151750 }, { "epoch": 0.7578716072810807, "grad_norm": 0.09185033291578293, "learning_rate": 7.282034594107487e-06, "loss": 8.585, "step": 151760 }, { "epoch": 0.7579215461060201, "grad_norm": 0.09249900281429291, "learning_rate": 7.280532679165937e-06, "loss": 8.5612, "step": 151770 }, { "epoch": 0.7579714849309596, "grad_norm": 0.08698815107345581, "learning_rate": 7.279030764224387e-06, "loss": 8.6089, "step": 151780 }, { "epoch": 0.758021423755899, "grad_norm": 0.09104190021753311, "learning_rate": 7.2775288492828355e-06, "loss": 8.5981, "step": 151790 }, { "epoch": 0.7580713625808385, "grad_norm": 0.08875631541013718, "learning_rate": 7.276026934341286e-06, "loss": 8.5945, "step": 151800 }, { "epoch": 0.7581213014057779, "grad_norm": 0.08652567863464355, "learning_rate": 7.274525019399734e-06, "loss": 8.5882, "step": 151810 }, { "epoch": 0.7581712402307174, "grad_norm": 0.0900021344423294, "learning_rate": 7.273023104458184e-06, "loss": 8.5922, "step": 151820 }, { "epoch": 0.7582211790556568, "grad_norm": 0.09021075814962387, "learning_rate": 7.2715211895166345e-06, "loss": 8.5893, "step": 151830 }, { "epoch": 0.7582711178805963, "grad_norm": 0.09226670861244202, "learning_rate": 7.270019274575083e-06, "loss": 8.5892, "step": 151840 }, { "epoch": 0.7583210567055357, "grad_norm": 0.09223869442939758, "learning_rate": 7.268517359633533e-06, "loss": 8.5831, "step": 151850 }, { "epoch": 0.7583709955304752, "grad_norm": 0.08831837028265, "learning_rate": 7.2670154446919825e-06, "loss": 8.5805, "step": 151860 }, { "epoch": 0.7584209343554146, "grad_norm": 0.0934990644454956, "learning_rate": 7.265513529750432e-06, "loss": 8.5758, "step": 151870 }, { "epoch": 0.7584708731803541, "grad_norm": 0.09644410014152527, "learning_rate": 7.264011614808882e-06, "loss": 8.5791, "step": 151880 }, { "epoch": 0.7585208120052935, "grad_norm": 0.08798360824584961, "learning_rate": 7.2625096998673305e-06, "loss": 8.584, "step": 151890 }, { "epoch": 0.758570750830233, "grad_norm": 0.08095114678144455, "learning_rate": 7.261007784925781e-06, "loss": 8.6034, "step": 151900 }, { "epoch": 0.7586206896551724, "grad_norm": 0.09009264409542084, "learning_rate": 7.25950586998423e-06, "loss": 8.5723, "step": 151910 }, { "epoch": 0.7586706284801119, "grad_norm": 0.08817630261182785, "learning_rate": 7.258003955042679e-06, "loss": 8.5885, "step": 151920 }, { "epoch": 0.7587205673050513, "grad_norm": 0.09514699131250381, "learning_rate": 7.2565020401011295e-06, "loss": 8.5849, "step": 151930 }, { "epoch": 0.7587705061299908, "grad_norm": 0.09281176328659058, "learning_rate": 7.255000125159579e-06, "loss": 8.579, "step": 151940 }, { "epoch": 0.7588204449549302, "grad_norm": 0.08887535333633423, "learning_rate": 7.253498210218028e-06, "loss": 8.6016, "step": 151950 }, { "epoch": 0.7588703837798697, "grad_norm": 0.08661656081676483, "learning_rate": 7.251996295276478e-06, "loss": 8.5982, "step": 151960 }, { "epoch": 0.7589203226048091, "grad_norm": 0.09328266978263855, "learning_rate": 7.250494380334927e-06, "loss": 8.5797, "step": 151970 }, { "epoch": 0.7589702614297485, "grad_norm": 0.09176629781723022, "learning_rate": 7.248992465393377e-06, "loss": 8.5894, "step": 151980 }, { "epoch": 0.759020200254688, "grad_norm": 0.08731918781995773, "learning_rate": 7.247490550451826e-06, "loss": 8.5794, "step": 151990 }, { "epoch": 0.7590701390796275, "grad_norm": 0.088376984000206, "learning_rate": 7.245988635510276e-06, "loss": 8.5798, "step": 152000 }, { "epoch": 0.7591200779045669, "grad_norm": 0.09818810224533081, "learning_rate": 7.244486720568726e-06, "loss": 8.5913, "step": 152010 }, { "epoch": 0.7591700167295063, "grad_norm": 0.08995293080806732, "learning_rate": 7.242984805627175e-06, "loss": 8.5928, "step": 152020 }, { "epoch": 0.7592199555544458, "grad_norm": 0.09495987743139267, "learning_rate": 7.2414828906856245e-06, "loss": 8.5887, "step": 152030 }, { "epoch": 0.7592698943793853, "grad_norm": 0.09125667065382004, "learning_rate": 7.239980975744074e-06, "loss": 8.5905, "step": 152040 }, { "epoch": 0.7593198332043247, "grad_norm": 0.09113696217536926, "learning_rate": 7.238479060802523e-06, "loss": 8.5895, "step": 152050 }, { "epoch": 0.7593697720292641, "grad_norm": 0.09109106659889221, "learning_rate": 7.236977145860973e-06, "loss": 8.5895, "step": 152060 }, { "epoch": 0.7594197108542036, "grad_norm": 0.09017052501440048, "learning_rate": 7.235475230919423e-06, "loss": 8.5818, "step": 152070 }, { "epoch": 0.7594696496791431, "grad_norm": 0.08849617093801498, "learning_rate": 7.233973315977872e-06, "loss": 8.5957, "step": 152080 }, { "epoch": 0.7595195885040825, "grad_norm": 0.09171763062477112, "learning_rate": 7.232471401036321e-06, "loss": 8.5808, "step": 152090 }, { "epoch": 0.7595695273290219, "grad_norm": 0.10110411792993546, "learning_rate": 7.2309694860947715e-06, "loss": 8.5877, "step": 152100 }, { "epoch": 0.7596194661539614, "grad_norm": 0.09818938374519348, "learning_rate": 7.229467571153221e-06, "loss": 8.6031, "step": 152110 }, { "epoch": 0.7596694049789009, "grad_norm": 0.09149213880300522, "learning_rate": 7.22796565621167e-06, "loss": 8.5883, "step": 152120 }, { "epoch": 0.7597193438038403, "grad_norm": 0.09270060062408447, "learning_rate": 7.2264637412701195e-06, "loss": 8.603, "step": 152130 }, { "epoch": 0.7597692826287797, "grad_norm": 0.08630634099245071, "learning_rate": 7.224961826328569e-06, "loss": 8.5836, "step": 152140 }, { "epoch": 0.7598192214537192, "grad_norm": 0.0882948487997055, "learning_rate": 7.223459911387019e-06, "loss": 8.5835, "step": 152150 }, { "epoch": 0.7598691602786587, "grad_norm": 0.09250038117170334, "learning_rate": 7.221957996445468e-06, "loss": 8.5625, "step": 152160 }, { "epoch": 0.7599190991035981, "grad_norm": 0.09016095101833344, "learning_rate": 7.220456081503918e-06, "loss": 8.5824, "step": 152170 }, { "epoch": 0.7599690379285375, "grad_norm": 0.08857563883066177, "learning_rate": 7.218954166562367e-06, "loss": 8.5876, "step": 152180 }, { "epoch": 0.760018976753477, "grad_norm": 0.09116075932979584, "learning_rate": 7.217452251620816e-06, "loss": 8.577, "step": 152190 }, { "epoch": 0.7600689155784165, "grad_norm": 0.09463620185852051, "learning_rate": 7.2159503366792665e-06, "loss": 8.5771, "step": 152200 }, { "epoch": 0.7601188544033559, "grad_norm": 0.09192495793104172, "learning_rate": 7.214448421737716e-06, "loss": 8.5971, "step": 152210 }, { "epoch": 0.7601687932282953, "grad_norm": 0.09425206482410431, "learning_rate": 7.212946506796165e-06, "loss": 8.5846, "step": 152220 }, { "epoch": 0.7602187320532348, "grad_norm": 0.0923011526465416, "learning_rate": 7.211444591854615e-06, "loss": 8.576, "step": 152230 }, { "epoch": 0.7602686708781743, "grad_norm": 0.08811124414205551, "learning_rate": 7.209942676913064e-06, "loss": 8.587, "step": 152240 }, { "epoch": 0.7603186097031137, "grad_norm": 0.08695264905691147, "learning_rate": 7.208440761971514e-06, "loss": 8.59, "step": 152250 }, { "epoch": 0.7603685485280531, "grad_norm": 0.0906856581568718, "learning_rate": 7.206938847029963e-06, "loss": 8.5963, "step": 152260 }, { "epoch": 0.7604184873529926, "grad_norm": 0.09710587561130524, "learning_rate": 7.205436932088413e-06, "loss": 8.5624, "step": 152270 }, { "epoch": 0.7604684261779321, "grad_norm": 0.09217939525842667, "learning_rate": 7.203935017146863e-06, "loss": 8.5973, "step": 152280 }, { "epoch": 0.7605183650028715, "grad_norm": 0.09139391779899597, "learning_rate": 7.202433102205311e-06, "loss": 8.5674, "step": 152290 }, { "epoch": 0.7605683038278109, "grad_norm": 0.08444222062826157, "learning_rate": 7.2009311872637615e-06, "loss": 8.5952, "step": 152300 }, { "epoch": 0.7606182426527504, "grad_norm": 0.09180841594934464, "learning_rate": 7.199429272322212e-06, "loss": 8.596, "step": 152310 }, { "epoch": 0.7606681814776898, "grad_norm": 0.09961362183094025, "learning_rate": 7.19792735738066e-06, "loss": 8.5865, "step": 152320 }, { "epoch": 0.7607181203026293, "grad_norm": 0.09278172999620438, "learning_rate": 7.19642544243911e-06, "loss": 8.5995, "step": 152330 }, { "epoch": 0.7607680591275687, "grad_norm": 0.08981714397668839, "learning_rate": 7.194923527497559e-06, "loss": 8.578, "step": 152340 }, { "epoch": 0.7608179979525082, "grad_norm": 0.09385214000940323, "learning_rate": 7.193421612556009e-06, "loss": 8.5784, "step": 152350 }, { "epoch": 0.7608679367774476, "grad_norm": 0.09259159117937088, "learning_rate": 7.191919697614459e-06, "loss": 8.5798, "step": 152360 }, { "epoch": 0.7609178756023871, "grad_norm": 0.0899551659822464, "learning_rate": 7.190417782672908e-06, "loss": 8.5876, "step": 152370 }, { "epoch": 0.7609678144273265, "grad_norm": 0.09498420357704163, "learning_rate": 7.188915867731358e-06, "loss": 8.5757, "step": 152380 }, { "epoch": 0.761017753252266, "grad_norm": 0.09275230020284653, "learning_rate": 7.187413952789807e-06, "loss": 8.5825, "step": 152390 }, { "epoch": 0.7610676920772054, "grad_norm": 0.09266764670610428, "learning_rate": 7.1859120378482565e-06, "loss": 8.5912, "step": 152400 }, { "epoch": 0.7611176309021449, "grad_norm": 0.08858975768089294, "learning_rate": 7.184410122906707e-06, "loss": 8.5878, "step": 152410 }, { "epoch": 0.7611675697270843, "grad_norm": 0.09748895466327667, "learning_rate": 7.182908207965155e-06, "loss": 8.5744, "step": 152420 }, { "epoch": 0.7612175085520237, "grad_norm": 0.09607774019241333, "learning_rate": 7.181406293023605e-06, "loss": 8.5716, "step": 152430 }, { "epoch": 0.7612674473769632, "grad_norm": 0.09278061985969543, "learning_rate": 7.179904378082055e-06, "loss": 8.5937, "step": 152440 }, { "epoch": 0.7613173862019027, "grad_norm": 0.0908476933836937, "learning_rate": 7.178402463140504e-06, "loss": 8.5842, "step": 152450 }, { "epoch": 0.7613673250268421, "grad_norm": 0.0958239734172821, "learning_rate": 7.176900548198954e-06, "loss": 8.593, "step": 152460 }, { "epoch": 0.7614172638517815, "grad_norm": 0.08807183057069778, "learning_rate": 7.1753986332574035e-06, "loss": 8.5842, "step": 152470 }, { "epoch": 0.761467202676721, "grad_norm": 0.09366779029369354, "learning_rate": 7.173896718315853e-06, "loss": 8.5819, "step": 152480 }, { "epoch": 0.7615171415016605, "grad_norm": 0.09066038578748703, "learning_rate": 7.172394803374302e-06, "loss": 8.5982, "step": 152490 }, { "epoch": 0.7615670803265999, "grad_norm": 0.08774998039007187, "learning_rate": 7.1708928884327515e-06, "loss": 8.5932, "step": 152500 }, { "epoch": 0.7616170191515393, "grad_norm": 0.09205503761768341, "learning_rate": 7.169390973491202e-06, "loss": 8.5773, "step": 152510 }, { "epoch": 0.7616669579764788, "grad_norm": 0.08959227055311203, "learning_rate": 7.167889058549651e-06, "loss": 8.586, "step": 152520 }, { "epoch": 0.7617168968014183, "grad_norm": 0.09033005684614182, "learning_rate": 7.1663871436081e-06, "loss": 8.5832, "step": 152530 }, { "epoch": 0.7617668356263577, "grad_norm": 0.08551044017076492, "learning_rate": 7.16488522866655e-06, "loss": 8.5845, "step": 152540 }, { "epoch": 0.7618167744512971, "grad_norm": 0.09518376737833023, "learning_rate": 7.163383313725e-06, "loss": 8.5857, "step": 152550 }, { "epoch": 0.7618667132762366, "grad_norm": 0.0905361995100975, "learning_rate": 7.161881398783449e-06, "loss": 8.5745, "step": 152560 }, { "epoch": 0.7619166521011761, "grad_norm": 0.09344843029975891, "learning_rate": 7.1603794838418985e-06, "loss": 8.5731, "step": 152570 }, { "epoch": 0.7619665909261155, "grad_norm": 0.08621740341186523, "learning_rate": 7.158877568900348e-06, "loss": 8.582, "step": 152580 }, { "epoch": 0.7620165297510549, "grad_norm": 0.09143950790166855, "learning_rate": 7.157375653958797e-06, "loss": 8.5648, "step": 152590 }, { "epoch": 0.7620664685759944, "grad_norm": 0.09078672528266907, "learning_rate": 7.155873739017247e-06, "loss": 8.5895, "step": 152600 }, { "epoch": 0.7621164074009339, "grad_norm": 0.09113630652427673, "learning_rate": 7.154371824075697e-06, "loss": 8.5785, "step": 152610 }, { "epoch": 0.7621663462258733, "grad_norm": 0.09150314331054688, "learning_rate": 7.152869909134146e-06, "loss": 8.5799, "step": 152620 }, { "epoch": 0.7622162850508127, "grad_norm": 0.08845186978578568, "learning_rate": 7.151367994192596e-06, "loss": 8.5894, "step": 152630 }, { "epoch": 0.7622662238757522, "grad_norm": 0.09440822899341583, "learning_rate": 7.149866079251045e-06, "loss": 8.5837, "step": 152640 }, { "epoch": 0.7623161627006917, "grad_norm": 0.08900156617164612, "learning_rate": 7.148364164309495e-06, "loss": 8.5828, "step": 152650 }, { "epoch": 0.7623661015256311, "grad_norm": 0.09400945901870728, "learning_rate": 7.146862249367944e-06, "loss": 8.576, "step": 152660 }, { "epoch": 0.7624160403505705, "grad_norm": 0.09294036775827408, "learning_rate": 7.1453603344263935e-06, "loss": 8.5688, "step": 152670 }, { "epoch": 0.76246597917551, "grad_norm": 0.09247752279043198, "learning_rate": 7.143858419484844e-06, "loss": 8.5811, "step": 152680 }, { "epoch": 0.7625159180004495, "grad_norm": 0.09127040207386017, "learning_rate": 7.142356504543292e-06, "loss": 8.5837, "step": 152690 }, { "epoch": 0.7625658568253889, "grad_norm": 0.0945616066455841, "learning_rate": 7.140854589601742e-06, "loss": 8.5798, "step": 152700 }, { "epoch": 0.7626157956503283, "grad_norm": 0.08728938549757004, "learning_rate": 7.1393526746601926e-06, "loss": 8.5811, "step": 152710 }, { "epoch": 0.7626657344752678, "grad_norm": 0.09225558489561081, "learning_rate": 7.137850759718641e-06, "loss": 8.6107, "step": 152720 }, { "epoch": 0.7627156733002073, "grad_norm": 0.09513969719409943, "learning_rate": 7.136348844777091e-06, "loss": 8.5764, "step": 152730 }, { "epoch": 0.7627656121251467, "grad_norm": 0.09618328511714935, "learning_rate": 7.13484692983554e-06, "loss": 8.5874, "step": 152740 }, { "epoch": 0.7628155509500861, "grad_norm": 0.09063924103975296, "learning_rate": 7.13334501489399e-06, "loss": 8.5823, "step": 152750 }, { "epoch": 0.7628654897750256, "grad_norm": 0.09331314265727997, "learning_rate": 7.13184309995244e-06, "loss": 8.5897, "step": 152760 }, { "epoch": 0.7629154285999651, "grad_norm": 0.09065219014883041, "learning_rate": 7.1303411850108886e-06, "loss": 8.5748, "step": 152770 }, { "epoch": 0.7629653674249045, "grad_norm": 0.09267207235097885, "learning_rate": 7.128839270069339e-06, "loss": 8.5953, "step": 152780 }, { "epoch": 0.7630153062498439, "grad_norm": 0.094063401222229, "learning_rate": 7.127337355127788e-06, "loss": 8.5751, "step": 152790 }, { "epoch": 0.7630652450747833, "grad_norm": 0.09036116302013397, "learning_rate": 7.125835440186237e-06, "loss": 8.6051, "step": 152800 }, { "epoch": 0.7631151838997229, "grad_norm": 0.08873546123504639, "learning_rate": 7.1243335252446876e-06, "loss": 8.5893, "step": 152810 }, { "epoch": 0.7631651227246623, "grad_norm": 0.0934579074382782, "learning_rate": 7.122831610303136e-06, "loss": 8.5921, "step": 152820 }, { "epoch": 0.7632150615496017, "grad_norm": 0.08765194565057755, "learning_rate": 7.121329695361586e-06, "loss": 8.5853, "step": 152830 }, { "epoch": 0.7632650003745411, "grad_norm": 0.09874917566776276, "learning_rate": 7.1198277804200356e-06, "loss": 8.6031, "step": 152840 }, { "epoch": 0.7633149391994807, "grad_norm": 0.10156397521495819, "learning_rate": 7.118325865478485e-06, "loss": 8.5753, "step": 152850 }, { "epoch": 0.7633648780244201, "grad_norm": 0.0943882167339325, "learning_rate": 7.116823950536935e-06, "loss": 8.5921, "step": 152860 }, { "epoch": 0.7634148168493595, "grad_norm": 0.0905870795249939, "learning_rate": 7.115322035595384e-06, "loss": 8.5748, "step": 152870 }, { "epoch": 0.763464755674299, "grad_norm": 0.09174995124340057, "learning_rate": 7.113820120653834e-06, "loss": 8.5743, "step": 152880 }, { "epoch": 0.7635146944992385, "grad_norm": 0.08916423469781876, "learning_rate": 7.112318205712283e-06, "loss": 8.5986, "step": 152890 }, { "epoch": 0.7635646333241779, "grad_norm": 0.0889330729842186, "learning_rate": 7.110816290770732e-06, "loss": 8.5888, "step": 152900 }, { "epoch": 0.7636145721491173, "grad_norm": 0.08929458260536194, "learning_rate": 7.109314375829183e-06, "loss": 8.5861, "step": 152910 }, { "epoch": 0.7636645109740567, "grad_norm": 0.08744913339614868, "learning_rate": 7.107812460887632e-06, "loss": 8.5891, "step": 152920 }, { "epoch": 0.7637144497989963, "grad_norm": 0.08869093656539917, "learning_rate": 7.106310545946081e-06, "loss": 8.5786, "step": 152930 }, { "epoch": 0.7637643886239357, "grad_norm": 0.09083859622478485, "learning_rate": 7.1048086310045306e-06, "loss": 8.5703, "step": 152940 }, { "epoch": 0.7638143274488751, "grad_norm": 0.09250235557556152, "learning_rate": 7.103306716062981e-06, "loss": 8.5856, "step": 152950 }, { "epoch": 0.7638642662738145, "grad_norm": 0.09155931323766708, "learning_rate": 7.10180480112143e-06, "loss": 8.5781, "step": 152960 }, { "epoch": 0.7639142050987541, "grad_norm": 0.08976297080516815, "learning_rate": 7.100302886179879e-06, "loss": 8.5806, "step": 152970 }, { "epoch": 0.7639641439236935, "grad_norm": 0.0865217074751854, "learning_rate": 7.098800971238329e-06, "loss": 8.5913, "step": 152980 }, { "epoch": 0.7640140827486329, "grad_norm": 0.09239277988672256, "learning_rate": 7.097299056296778e-06, "loss": 8.5603, "step": 152990 }, { "epoch": 0.7640640215735723, "grad_norm": 0.09613004326820374, "learning_rate": 7.095797141355228e-06, "loss": 8.5824, "step": 153000 }, { "epoch": 0.7641139603985119, "grad_norm": 0.09154446423053741, "learning_rate": 7.094295226413678e-06, "loss": 8.5709, "step": 153010 }, { "epoch": 0.7641638992234513, "grad_norm": 0.08664754033088684, "learning_rate": 7.092793311472127e-06, "loss": 8.5846, "step": 153020 }, { "epoch": 0.7642138380483907, "grad_norm": 0.08845943212509155, "learning_rate": 7.091291396530577e-06, "loss": 8.5786, "step": 153030 }, { "epoch": 0.7642637768733301, "grad_norm": 0.09169842302799225, "learning_rate": 7.0897894815890264e-06, "loss": 8.5761, "step": 153040 }, { "epoch": 0.7643137156982697, "grad_norm": 0.09660103172063828, "learning_rate": 7.088287566647476e-06, "loss": 8.5811, "step": 153050 }, { "epoch": 0.7643636545232091, "grad_norm": 0.09721507132053375, "learning_rate": 7.086785651705925e-06, "loss": 8.564, "step": 153060 }, { "epoch": 0.7644135933481485, "grad_norm": 0.08967658877372742, "learning_rate": 7.0852837367643744e-06, "loss": 8.5768, "step": 153070 }, { "epoch": 0.7644635321730879, "grad_norm": 0.08986034244298935, "learning_rate": 7.083781821822825e-06, "loss": 8.5778, "step": 153080 }, { "epoch": 0.7645134709980275, "grad_norm": 0.09163659065961838, "learning_rate": 7.082279906881274e-06, "loss": 8.5838, "step": 153090 }, { "epoch": 0.7645634098229669, "grad_norm": 0.0884322002530098, "learning_rate": 7.080777991939723e-06, "loss": 8.5956, "step": 153100 }, { "epoch": 0.7646133486479063, "grad_norm": 0.08686332404613495, "learning_rate": 7.0792760769981734e-06, "loss": 8.5683, "step": 153110 }, { "epoch": 0.7646632874728457, "grad_norm": 0.09294196218252182, "learning_rate": 7.077774162056622e-06, "loss": 8.5796, "step": 153120 }, { "epoch": 0.7647132262977853, "grad_norm": 0.08890484273433685, "learning_rate": 7.076272247115072e-06, "loss": 8.5687, "step": 153130 }, { "epoch": 0.7647631651227247, "grad_norm": 0.08904290199279785, "learning_rate": 7.0747703321735214e-06, "loss": 8.5894, "step": 153140 }, { "epoch": 0.7648131039476641, "grad_norm": 0.09268071502447128, "learning_rate": 7.073268417231971e-06, "loss": 8.5896, "step": 153150 }, { "epoch": 0.7648630427726035, "grad_norm": 0.08970651030540466, "learning_rate": 7.071766502290421e-06, "loss": 8.6045, "step": 153160 }, { "epoch": 0.7649129815975431, "grad_norm": 0.09089753031730652, "learning_rate": 7.0702645873488694e-06, "loss": 8.5732, "step": 153170 }, { "epoch": 0.7649629204224825, "grad_norm": 0.0952310562133789, "learning_rate": 7.06876267240732e-06, "loss": 8.5883, "step": 153180 }, { "epoch": 0.7650128592474219, "grad_norm": 0.09604635834693909, "learning_rate": 7.06726075746577e-06, "loss": 8.5674, "step": 153190 }, { "epoch": 0.7650627980723613, "grad_norm": 0.0942385122179985, "learning_rate": 7.065758842524218e-06, "loss": 8.5867, "step": 153200 }, { "epoch": 0.7651127368973009, "grad_norm": 0.08863166719675064, "learning_rate": 7.0642569275826685e-06, "loss": 8.5591, "step": 153210 }, { "epoch": 0.7651626757222403, "grad_norm": 0.09197341650724411, "learning_rate": 7.062755012641117e-06, "loss": 8.5931, "step": 153220 }, { "epoch": 0.7652126145471797, "grad_norm": 0.09086757153272629, "learning_rate": 7.061253097699567e-06, "loss": 8.578, "step": 153230 }, { "epoch": 0.7652625533721191, "grad_norm": 0.08695393055677414, "learning_rate": 7.059751182758017e-06, "loss": 8.5728, "step": 153240 }, { "epoch": 0.7653124921970587, "grad_norm": 0.09496577829122543, "learning_rate": 7.058249267816466e-06, "loss": 8.5701, "step": 153250 }, { "epoch": 0.7653624310219981, "grad_norm": 0.09787900000810623, "learning_rate": 7.056747352874916e-06, "loss": 8.5682, "step": 153260 }, { "epoch": 0.7654123698469375, "grad_norm": 0.0917130559682846, "learning_rate": 7.055245437933365e-06, "loss": 8.57, "step": 153270 }, { "epoch": 0.7654623086718769, "grad_norm": 0.08664479851722717, "learning_rate": 7.053743522991815e-06, "loss": 8.5829, "step": 153280 }, { "epoch": 0.7655122474968163, "grad_norm": 0.08745972812175751, "learning_rate": 7.052241608050265e-06, "loss": 8.564, "step": 153290 }, { "epoch": 0.7655621863217559, "grad_norm": 0.09336210042238235, "learning_rate": 7.050739693108713e-06, "loss": 8.5767, "step": 153300 }, { "epoch": 0.7656121251466953, "grad_norm": 0.0911019816994667, "learning_rate": 7.0492377781671635e-06, "loss": 8.5855, "step": 153310 }, { "epoch": 0.7656620639716347, "grad_norm": 0.09164589643478394, "learning_rate": 7.047735863225613e-06, "loss": 8.5782, "step": 153320 }, { "epoch": 0.7657120027965741, "grad_norm": 0.09078686684370041, "learning_rate": 7.046233948284062e-06, "loss": 8.5899, "step": 153330 }, { "epoch": 0.7657619416215137, "grad_norm": 0.09011105448007584, "learning_rate": 7.044732033342512e-06, "loss": 8.5706, "step": 153340 }, { "epoch": 0.7658118804464531, "grad_norm": 0.08931349217891693, "learning_rate": 7.043230118400962e-06, "loss": 8.5779, "step": 153350 }, { "epoch": 0.7658618192713925, "grad_norm": 0.09002659469842911, "learning_rate": 7.041728203459411e-06, "loss": 8.5732, "step": 153360 }, { "epoch": 0.7659117580963319, "grad_norm": 0.0962076261639595, "learning_rate": 7.04022628851786e-06, "loss": 8.5705, "step": 153370 }, { "epoch": 0.7659616969212715, "grad_norm": 0.0866306945681572, "learning_rate": 7.03872437357631e-06, "loss": 8.5906, "step": 153380 }, { "epoch": 0.7660116357462109, "grad_norm": 0.09287164360284805, "learning_rate": 7.03722245863476e-06, "loss": 8.5881, "step": 153390 }, { "epoch": 0.7660615745711503, "grad_norm": 0.09079669415950775, "learning_rate": 7.035720543693209e-06, "loss": 8.5676, "step": 153400 }, { "epoch": 0.7661115133960897, "grad_norm": 0.08933369070291519, "learning_rate": 7.0342186287516585e-06, "loss": 8.5854, "step": 153410 }, { "epoch": 0.7661614522210293, "grad_norm": 0.09217403084039688, "learning_rate": 7.032716713810108e-06, "loss": 8.5743, "step": 153420 }, { "epoch": 0.7662113910459687, "grad_norm": 0.08750222623348236, "learning_rate": 7.031214798868558e-06, "loss": 8.5838, "step": 153430 }, { "epoch": 0.7662613298709081, "grad_norm": 0.09187041968107224, "learning_rate": 7.029712883927007e-06, "loss": 8.5779, "step": 153440 }, { "epoch": 0.7663112686958475, "grad_norm": 0.09300150722265244, "learning_rate": 7.028210968985457e-06, "loss": 8.5867, "step": 153450 }, { "epoch": 0.7663612075207871, "grad_norm": 0.08883293718099594, "learning_rate": 7.026709054043906e-06, "loss": 8.5779, "step": 153460 }, { "epoch": 0.7664111463457265, "grad_norm": 0.09035582095384598, "learning_rate": 7.025207139102355e-06, "loss": 8.5666, "step": 153470 }, { "epoch": 0.7664610851706659, "grad_norm": 0.09275439381599426, "learning_rate": 7.0237052241608055e-06, "loss": 8.5823, "step": 153480 }, { "epoch": 0.7665110239956053, "grad_norm": 0.08916142582893372, "learning_rate": 7.022203309219255e-06, "loss": 8.5674, "step": 153490 }, { "epoch": 0.7665609628205449, "grad_norm": 0.09958826005458832, "learning_rate": 7.020701394277704e-06, "loss": 8.5855, "step": 153500 }, { "epoch": 0.7666109016454843, "grad_norm": 0.09693287312984467, "learning_rate": 7.019199479336154e-06, "loss": 8.6037, "step": 153510 }, { "epoch": 0.7666608404704237, "grad_norm": 0.09419762343168259, "learning_rate": 7.017697564394603e-06, "loss": 8.6014, "step": 153520 }, { "epoch": 0.7667107792953631, "grad_norm": 0.08924181014299393, "learning_rate": 7.016195649453053e-06, "loss": 8.5692, "step": 153530 }, { "epoch": 0.7667607181203027, "grad_norm": 0.09446694701910019, "learning_rate": 7.014693734511502e-06, "loss": 8.5536, "step": 153540 }, { "epoch": 0.7668106569452421, "grad_norm": 0.09108909219503403, "learning_rate": 7.013191819569952e-06, "loss": 8.572, "step": 153550 }, { "epoch": 0.7668605957701815, "grad_norm": 0.09171731770038605, "learning_rate": 7.011689904628402e-06, "loss": 8.5945, "step": 153560 }, { "epoch": 0.7669105345951209, "grad_norm": 0.08465012907981873, "learning_rate": 7.01018798968685e-06, "loss": 8.5643, "step": 153570 }, { "epoch": 0.7669604734200605, "grad_norm": 0.09661739319562912, "learning_rate": 7.0086860747453005e-06, "loss": 8.5638, "step": 153580 }, { "epoch": 0.7670104122449999, "grad_norm": 0.0985170304775238, "learning_rate": 7.007184159803751e-06, "loss": 8.5805, "step": 153590 }, { "epoch": 0.7670603510699393, "grad_norm": 0.08951466530561447, "learning_rate": 7.005682244862199e-06, "loss": 8.5677, "step": 153600 }, { "epoch": 0.7671102898948787, "grad_norm": 0.08966083079576492, "learning_rate": 7.004180329920649e-06, "loss": 8.5769, "step": 153610 }, { "epoch": 0.7671602287198183, "grad_norm": 0.09198177605867386, "learning_rate": 7.002678414979098e-06, "loss": 8.5674, "step": 153620 }, { "epoch": 0.7672101675447577, "grad_norm": 0.0966639444231987, "learning_rate": 7.001176500037548e-06, "loss": 8.5847, "step": 153630 }, { "epoch": 0.7672601063696971, "grad_norm": 0.10365894436836243, "learning_rate": 6.999674585095998e-06, "loss": 8.5755, "step": 153640 }, { "epoch": 0.7673100451946365, "grad_norm": 0.08775070309638977, "learning_rate": 6.998172670154447e-06, "loss": 8.5752, "step": 153650 }, { "epoch": 0.7673599840195761, "grad_norm": 0.09237588942050934, "learning_rate": 6.996670755212897e-06, "loss": 8.5767, "step": 153660 }, { "epoch": 0.7674099228445155, "grad_norm": 0.0916270911693573, "learning_rate": 6.995168840271346e-06, "loss": 8.5828, "step": 153670 }, { "epoch": 0.7674598616694549, "grad_norm": 0.08937940746545792, "learning_rate": 6.9936669253297955e-06, "loss": 8.5851, "step": 153680 }, { "epoch": 0.7675098004943943, "grad_norm": 0.08998386561870575, "learning_rate": 6.992165010388246e-06, "loss": 8.5821, "step": 153690 }, { "epoch": 0.7675597393193339, "grad_norm": 0.09344659745693207, "learning_rate": 6.990663095446694e-06, "loss": 8.5799, "step": 153700 }, { "epoch": 0.7676096781442733, "grad_norm": 0.08797649294137955, "learning_rate": 6.989161180505144e-06, "loss": 8.5644, "step": 153710 }, { "epoch": 0.7676596169692127, "grad_norm": 0.08696374297142029, "learning_rate": 6.987659265563594e-06, "loss": 8.575, "step": 153720 }, { "epoch": 0.7677095557941521, "grad_norm": 0.08707814663648605, "learning_rate": 6.986157350622043e-06, "loss": 8.5779, "step": 153730 }, { "epoch": 0.7677594946190917, "grad_norm": 0.0909426361322403, "learning_rate": 6.984655435680493e-06, "loss": 8.581, "step": 153740 }, { "epoch": 0.7678094334440311, "grad_norm": 0.09398233890533447, "learning_rate": 6.9831535207389425e-06, "loss": 8.5727, "step": 153750 }, { "epoch": 0.7678593722689705, "grad_norm": 0.09371063113212585, "learning_rate": 6.981651605797392e-06, "loss": 8.5808, "step": 153760 }, { "epoch": 0.7679093110939099, "grad_norm": 0.09734338521957397, "learning_rate": 6.980149690855841e-06, "loss": 8.5737, "step": 153770 }, { "epoch": 0.7679592499188495, "grad_norm": 0.09083466976881027, "learning_rate": 6.9786477759142905e-06, "loss": 8.5716, "step": 153780 }, { "epoch": 0.7680091887437889, "grad_norm": 0.08842228353023529, "learning_rate": 6.977145860972741e-06, "loss": 8.5752, "step": 153790 }, { "epoch": 0.7680591275687283, "grad_norm": 0.08697518706321716, "learning_rate": 6.97564394603119e-06, "loss": 8.5822, "step": 153800 }, { "epoch": 0.7681090663936677, "grad_norm": 0.08936888724565506, "learning_rate": 6.974142031089639e-06, "loss": 8.582, "step": 153810 }, { "epoch": 0.7681590052186072, "grad_norm": 0.08924106508493423, "learning_rate": 6.972640116148089e-06, "loss": 8.5737, "step": 153820 }, { "epoch": 0.7682089440435467, "grad_norm": 0.08949612826108932, "learning_rate": 6.971138201206539e-06, "loss": 8.5744, "step": 153830 }, { "epoch": 0.7682588828684861, "grad_norm": 0.08947469294071198, "learning_rate": 6.969636286264988e-06, "loss": 8.5754, "step": 153840 }, { "epoch": 0.7683088216934255, "grad_norm": 0.09205352514982224, "learning_rate": 6.9681343713234375e-06, "loss": 8.5822, "step": 153850 }, { "epoch": 0.768358760518365, "grad_norm": 0.09942862391471863, "learning_rate": 6.966632456381887e-06, "loss": 8.582, "step": 153860 }, { "epoch": 0.7684086993433045, "grad_norm": 0.090413898229599, "learning_rate": 6.965130541440336e-06, "loss": 8.5727, "step": 153870 }, { "epoch": 0.7684586381682439, "grad_norm": 0.09462569653987885, "learning_rate": 6.963628626498786e-06, "loss": 8.5756, "step": 153880 }, { "epoch": 0.7685085769931833, "grad_norm": 0.09226179867982864, "learning_rate": 6.962126711557236e-06, "loss": 8.5864, "step": 153890 }, { "epoch": 0.7685585158181228, "grad_norm": 0.08182681351900101, "learning_rate": 6.960624796615685e-06, "loss": 8.5841, "step": 153900 }, { "epoch": 0.7686084546430623, "grad_norm": 0.09245304763317108, "learning_rate": 6.959122881674135e-06, "loss": 8.5817, "step": 153910 }, { "epoch": 0.7686583934680017, "grad_norm": 0.09626985341310501, "learning_rate": 6.957620966732584e-06, "loss": 8.595, "step": 153920 }, { "epoch": 0.7687083322929411, "grad_norm": 0.08915554732084274, "learning_rate": 6.956119051791034e-06, "loss": 8.5715, "step": 153930 }, { "epoch": 0.7687582711178806, "grad_norm": 0.09438993036746979, "learning_rate": 6.954617136849483e-06, "loss": 8.5903, "step": 153940 }, { "epoch": 0.7688082099428201, "grad_norm": 0.08598021417856216, "learning_rate": 6.9531152219079325e-06, "loss": 8.5767, "step": 153950 }, { "epoch": 0.7688581487677595, "grad_norm": 0.09764537960290909, "learning_rate": 6.951613306966383e-06, "loss": 8.5781, "step": 153960 }, { "epoch": 0.7689080875926989, "grad_norm": 0.09817574918270111, "learning_rate": 6.950111392024831e-06, "loss": 8.5746, "step": 153970 }, { "epoch": 0.7689580264176384, "grad_norm": 0.08399667590856552, "learning_rate": 6.948609477083281e-06, "loss": 8.5705, "step": 153980 }, { "epoch": 0.7690079652425779, "grad_norm": 0.09401653707027435, "learning_rate": 6.9471075621417315e-06, "loss": 8.5782, "step": 153990 }, { "epoch": 0.7690579040675173, "grad_norm": 0.0929185077548027, "learning_rate": 6.94560564720018e-06, "loss": 8.5698, "step": 154000 }, { "epoch": 0.7691078428924567, "grad_norm": 0.08887774497270584, "learning_rate": 6.94410373225863e-06, "loss": 8.5957, "step": 154010 }, { "epoch": 0.7691577817173962, "grad_norm": 0.08719068765640259, "learning_rate": 6.942601817317079e-06, "loss": 8.5963, "step": 154020 }, { "epoch": 0.7692077205423357, "grad_norm": 0.09064088016748428, "learning_rate": 6.941099902375529e-06, "loss": 8.5823, "step": 154030 }, { "epoch": 0.7692576593672751, "grad_norm": 0.09441380947828293, "learning_rate": 6.939597987433979e-06, "loss": 8.5988, "step": 154040 }, { "epoch": 0.7693075981922145, "grad_norm": 0.08752386271953583, "learning_rate": 6.9380960724924275e-06, "loss": 8.5738, "step": 154050 }, { "epoch": 0.769357537017154, "grad_norm": 0.09293375164270401, "learning_rate": 6.936594157550878e-06, "loss": 8.581, "step": 154060 }, { "epoch": 0.7694074758420935, "grad_norm": 0.09073223173618317, "learning_rate": 6.935092242609327e-06, "loss": 8.586, "step": 154070 }, { "epoch": 0.7694574146670329, "grad_norm": 0.09582027792930603, "learning_rate": 6.933590327667776e-06, "loss": 8.5888, "step": 154080 }, { "epoch": 0.7695073534919723, "grad_norm": 0.09688831865787506, "learning_rate": 6.9320884127262265e-06, "loss": 8.5683, "step": 154090 }, { "epoch": 0.7695572923169118, "grad_norm": 0.09381963312625885, "learning_rate": 6.930586497784675e-06, "loss": 8.5728, "step": 154100 }, { "epoch": 0.7696072311418513, "grad_norm": 0.0936012864112854, "learning_rate": 6.929084582843125e-06, "loss": 8.5665, "step": 154110 }, { "epoch": 0.7696571699667907, "grad_norm": 0.08793763071298599, "learning_rate": 6.9275826679015745e-06, "loss": 8.6069, "step": 154120 }, { "epoch": 0.7697071087917301, "grad_norm": 0.08785630762577057, "learning_rate": 6.926080752960024e-06, "loss": 8.581, "step": 154130 }, { "epoch": 0.7697570476166696, "grad_norm": 0.09242880344390869, "learning_rate": 6.924578838018474e-06, "loss": 8.5786, "step": 154140 }, { "epoch": 0.769806986441609, "grad_norm": 0.08848867565393448, "learning_rate": 6.923076923076923e-06, "loss": 8.578, "step": 154150 }, { "epoch": 0.7698569252665485, "grad_norm": 0.09217596054077148, "learning_rate": 6.921575008135373e-06, "loss": 8.578, "step": 154160 }, { "epoch": 0.7699068640914879, "grad_norm": 0.08778587728738785, "learning_rate": 6.920073093193823e-06, "loss": 8.5806, "step": 154170 }, { "epoch": 0.7699568029164274, "grad_norm": 0.09108664095401764, "learning_rate": 6.918571178252271e-06, "loss": 8.5815, "step": 154180 }, { "epoch": 0.7700067417413669, "grad_norm": 0.09245019406080246, "learning_rate": 6.9170692633107216e-06, "loss": 8.58, "step": 154190 }, { "epoch": 0.7700566805663063, "grad_norm": 0.09027982503175735, "learning_rate": 6.915567348369171e-06, "loss": 8.5821, "step": 154200 }, { "epoch": 0.7701066193912457, "grad_norm": 0.093569815158844, "learning_rate": 6.91406543342762e-06, "loss": 8.562, "step": 154210 }, { "epoch": 0.7701565582161852, "grad_norm": 0.09079516679048538, "learning_rate": 6.91256351848607e-06, "loss": 8.5822, "step": 154220 }, { "epoch": 0.7702064970411246, "grad_norm": 0.09518477320671082, "learning_rate": 6.91106160354452e-06, "loss": 8.5833, "step": 154230 }, { "epoch": 0.7702564358660641, "grad_norm": 0.08532573282718658, "learning_rate": 6.909559688602969e-06, "loss": 8.5973, "step": 154240 }, { "epoch": 0.7703063746910035, "grad_norm": 0.09336396306753159, "learning_rate": 6.908057773661418e-06, "loss": 8.5737, "step": 154250 }, { "epoch": 0.770356313515943, "grad_norm": 0.09063505381345749, "learning_rate": 6.906555858719868e-06, "loss": 8.5763, "step": 154260 }, { "epoch": 0.7704062523408824, "grad_norm": 0.08470286428928375, "learning_rate": 6.905053943778318e-06, "loss": 8.5924, "step": 154270 }, { "epoch": 0.7704561911658219, "grad_norm": 0.09841927140951157, "learning_rate": 6.903552028836767e-06, "loss": 8.5754, "step": 154280 }, { "epoch": 0.7705061299907613, "grad_norm": 0.09492678195238113, "learning_rate": 6.9020501138952166e-06, "loss": 8.5874, "step": 154290 }, { "epoch": 0.7705560688157007, "grad_norm": 0.09044993668794632, "learning_rate": 6.900548198953666e-06, "loss": 8.5628, "step": 154300 }, { "epoch": 0.7706060076406402, "grad_norm": 0.09086602926254272, "learning_rate": 6.899046284012116e-06, "loss": 8.5546, "step": 154310 }, { "epoch": 0.7706559464655797, "grad_norm": 0.08962773531675339, "learning_rate": 6.897544369070565e-06, "loss": 8.5762, "step": 154320 }, { "epoch": 0.7707058852905191, "grad_norm": 0.09127478301525116, "learning_rate": 6.896042454129015e-06, "loss": 8.5902, "step": 154330 }, { "epoch": 0.7707558241154585, "grad_norm": 0.09498311579227448, "learning_rate": 6.894540539187464e-06, "loss": 8.574, "step": 154340 }, { "epoch": 0.770805762940398, "grad_norm": 0.09378572553396225, "learning_rate": 6.893038624245913e-06, "loss": 8.5666, "step": 154350 }, { "epoch": 0.7708557017653375, "grad_norm": 0.09132121503353119, "learning_rate": 6.8915367093043636e-06, "loss": 8.5664, "step": 154360 }, { "epoch": 0.7709056405902769, "grad_norm": 0.0891404077410698, "learning_rate": 6.890034794362813e-06, "loss": 8.5671, "step": 154370 }, { "epoch": 0.7709555794152163, "grad_norm": 0.0928260087966919, "learning_rate": 6.888532879421262e-06, "loss": 8.5717, "step": 154380 }, { "epoch": 0.7710055182401558, "grad_norm": 0.092970110476017, "learning_rate": 6.887030964479712e-06, "loss": 8.5745, "step": 154390 }, { "epoch": 0.7710554570650953, "grad_norm": 0.09026780724525452, "learning_rate": 6.885529049538161e-06, "loss": 8.5715, "step": 154400 }, { "epoch": 0.7711053958900347, "grad_norm": 0.09131123125553131, "learning_rate": 6.884027134596611e-06, "loss": 8.5806, "step": 154410 }, { "epoch": 0.7711553347149741, "grad_norm": 0.09628612548112869, "learning_rate": 6.88252521965506e-06, "loss": 8.5872, "step": 154420 }, { "epoch": 0.7712052735399136, "grad_norm": 0.10040068626403809, "learning_rate": 6.88102330471351e-06, "loss": 8.5681, "step": 154430 }, { "epoch": 0.7712552123648531, "grad_norm": 0.09062188118696213, "learning_rate": 6.87952138977196e-06, "loss": 8.5869, "step": 154440 }, { "epoch": 0.7713051511897925, "grad_norm": 0.09356309473514557, "learning_rate": 6.878019474830408e-06, "loss": 8.58, "step": 154450 }, { "epoch": 0.7713550900147319, "grad_norm": 0.09048853069543839, "learning_rate": 6.876517559888859e-06, "loss": 8.5787, "step": 154460 }, { "epoch": 0.7714050288396714, "grad_norm": 0.08828842639923096, "learning_rate": 6.875015644947309e-06, "loss": 8.5799, "step": 154470 }, { "epoch": 0.7714549676646109, "grad_norm": 0.0911807268857956, "learning_rate": 6.873513730005757e-06, "loss": 8.5726, "step": 154480 }, { "epoch": 0.7715049064895503, "grad_norm": 0.08841972798109055, "learning_rate": 6.8720118150642074e-06, "loss": 8.5756, "step": 154490 }, { "epoch": 0.7715548453144897, "grad_norm": 0.09796998649835587, "learning_rate": 6.870509900122656e-06, "loss": 8.559, "step": 154500 }, { "epoch": 0.7716047841394292, "grad_norm": 0.09058402478694916, "learning_rate": 6.869007985181106e-06, "loss": 8.5675, "step": 154510 }, { "epoch": 0.7716547229643687, "grad_norm": 0.08772189915180206, "learning_rate": 6.867506070239556e-06, "loss": 8.5848, "step": 154520 }, { "epoch": 0.7717046617893081, "grad_norm": 0.08951723575592041, "learning_rate": 6.866004155298005e-06, "loss": 8.5791, "step": 154530 }, { "epoch": 0.7717546006142475, "grad_norm": 0.08602496981620789, "learning_rate": 6.864502240356455e-06, "loss": 8.5794, "step": 154540 }, { "epoch": 0.771804539439187, "grad_norm": 0.08945851027965546, "learning_rate": 6.863000325414904e-06, "loss": 8.5841, "step": 154550 }, { "epoch": 0.7718544782641265, "grad_norm": 0.09233725816011429, "learning_rate": 6.861498410473354e-06, "loss": 8.5541, "step": 154560 }, { "epoch": 0.7719044170890659, "grad_norm": 0.09467992186546326, "learning_rate": 6.859996495531804e-06, "loss": 8.5744, "step": 154570 }, { "epoch": 0.7719543559140053, "grad_norm": 0.08942775428295135, "learning_rate": 6.858494580590252e-06, "loss": 8.5864, "step": 154580 }, { "epoch": 0.7720042947389448, "grad_norm": 0.08920110762119293, "learning_rate": 6.8569926656487024e-06, "loss": 8.5959, "step": 154590 }, { "epoch": 0.7720542335638843, "grad_norm": 0.08930312097072601, "learning_rate": 6.855490750707152e-06, "loss": 8.5837, "step": 154600 }, { "epoch": 0.7721041723888237, "grad_norm": 0.09447375684976578, "learning_rate": 6.853988835765601e-06, "loss": 8.58, "step": 154610 }, { "epoch": 0.7721541112137631, "grad_norm": 0.08980347961187363, "learning_rate": 6.852486920824051e-06, "loss": 8.5727, "step": 154620 }, { "epoch": 0.7722040500387026, "grad_norm": 0.09173062443733215, "learning_rate": 6.850985005882501e-06, "loss": 8.5604, "step": 154630 }, { "epoch": 0.772253988863642, "grad_norm": 0.08936478197574615, "learning_rate": 6.84948309094095e-06, "loss": 8.5684, "step": 154640 }, { "epoch": 0.7723039276885815, "grad_norm": 0.09296960383653641, "learning_rate": 6.847981175999399e-06, "loss": 8.5477, "step": 154650 }, { "epoch": 0.7723538665135209, "grad_norm": 0.09330233186483383, "learning_rate": 6.846479261057849e-06, "loss": 8.5855, "step": 154660 }, { "epoch": 0.7724038053384604, "grad_norm": 0.08701977878808975, "learning_rate": 6.844977346116299e-06, "loss": 8.5892, "step": 154670 }, { "epoch": 0.7724537441633998, "grad_norm": 0.10141323506832123, "learning_rate": 6.843475431174748e-06, "loss": 8.5798, "step": 154680 }, { "epoch": 0.7725036829883393, "grad_norm": 0.08847815543413162, "learning_rate": 6.8419735162331974e-06, "loss": 8.579, "step": 154690 }, { "epoch": 0.7725536218132787, "grad_norm": 0.10151761770248413, "learning_rate": 6.840471601291647e-06, "loss": 8.5819, "step": 154700 }, { "epoch": 0.7726035606382182, "grad_norm": 0.09666801989078522, "learning_rate": 6.838969686350097e-06, "loss": 8.5728, "step": 154710 }, { "epoch": 0.7726534994631576, "grad_norm": 0.09449943155050278, "learning_rate": 6.837467771408546e-06, "loss": 8.5817, "step": 154720 }, { "epoch": 0.7727034382880971, "grad_norm": 0.09444770961999893, "learning_rate": 6.835965856466996e-06, "loss": 8.5654, "step": 154730 }, { "epoch": 0.7727533771130365, "grad_norm": 0.09364951401948929, "learning_rate": 6.834463941525445e-06, "loss": 8.5647, "step": 154740 }, { "epoch": 0.772803315937976, "grad_norm": 0.09371913969516754, "learning_rate": 6.832962026583894e-06, "loss": 8.5827, "step": 154750 }, { "epoch": 0.7728532547629154, "grad_norm": 0.08978746831417084, "learning_rate": 6.8314601116423445e-06, "loss": 8.5917, "step": 154760 }, { "epoch": 0.7729031935878549, "grad_norm": 0.0957767516374588, "learning_rate": 6.829958196700794e-06, "loss": 8.566, "step": 154770 }, { "epoch": 0.7729531324127943, "grad_norm": 0.09105443209409714, "learning_rate": 6.828456281759243e-06, "loss": 8.5709, "step": 154780 }, { "epoch": 0.7730030712377338, "grad_norm": 0.09461674094200134, "learning_rate": 6.826954366817693e-06, "loss": 8.5735, "step": 154790 }, { "epoch": 0.7730530100626732, "grad_norm": 0.08995918929576874, "learning_rate": 6.825452451876142e-06, "loss": 8.5817, "step": 154800 }, { "epoch": 0.7731029488876127, "grad_norm": 0.08911199867725372, "learning_rate": 6.823950536934592e-06, "loss": 8.578, "step": 154810 }, { "epoch": 0.7731528877125521, "grad_norm": 0.09105192124843597, "learning_rate": 6.822448621993041e-06, "loss": 8.5733, "step": 154820 }, { "epoch": 0.7732028265374916, "grad_norm": 0.08885302394628525, "learning_rate": 6.820946707051491e-06, "loss": 8.5597, "step": 154830 }, { "epoch": 0.773252765362431, "grad_norm": 0.09799888730049133, "learning_rate": 6.819444792109941e-06, "loss": 8.5674, "step": 154840 }, { "epoch": 0.7733027041873705, "grad_norm": 0.0921357050538063, "learning_rate": 6.817942877168389e-06, "loss": 8.5727, "step": 154850 }, { "epoch": 0.7733526430123099, "grad_norm": 0.0890340581536293, "learning_rate": 6.8164409622268395e-06, "loss": 8.5742, "step": 154860 }, { "epoch": 0.7734025818372494, "grad_norm": 0.09299721568822861, "learning_rate": 6.81493904728529e-06, "loss": 8.6013, "step": 154870 }, { "epoch": 0.7734525206621888, "grad_norm": 0.09847550094127655, "learning_rate": 6.813437132343738e-06, "loss": 8.5676, "step": 154880 }, { "epoch": 0.7735024594871283, "grad_norm": 0.09080928564071655, "learning_rate": 6.811935217402188e-06, "loss": 8.5686, "step": 154890 }, { "epoch": 0.7735523983120677, "grad_norm": 0.09545505046844482, "learning_rate": 6.810433302460637e-06, "loss": 8.5719, "step": 154900 }, { "epoch": 0.7736023371370072, "grad_norm": 0.08688024431467056, "learning_rate": 6.808931387519087e-06, "loss": 8.5832, "step": 154910 }, { "epoch": 0.7736522759619466, "grad_norm": 0.0897180363535881, "learning_rate": 6.807429472577537e-06, "loss": 8.5695, "step": 154920 }, { "epoch": 0.7737022147868861, "grad_norm": 0.0968603864312172, "learning_rate": 6.805927557635986e-06, "loss": 8.5708, "step": 154930 }, { "epoch": 0.7737521536118255, "grad_norm": 0.09534350782632828, "learning_rate": 6.804425642694436e-06, "loss": 8.5643, "step": 154940 }, { "epoch": 0.773802092436765, "grad_norm": 0.09181123226881027, "learning_rate": 6.802923727752885e-06, "loss": 8.5733, "step": 154950 }, { "epoch": 0.7738520312617044, "grad_norm": 0.08840048313140869, "learning_rate": 6.8014218128113345e-06, "loss": 8.56, "step": 154960 }, { "epoch": 0.7739019700866439, "grad_norm": 0.09602469950914383, "learning_rate": 6.799919897869785e-06, "loss": 8.582, "step": 154970 }, { "epoch": 0.7739519089115833, "grad_norm": 0.08890822529792786, "learning_rate": 6.798417982928233e-06, "loss": 8.5752, "step": 154980 }, { "epoch": 0.7740018477365228, "grad_norm": 0.0951463058590889, "learning_rate": 6.796916067986683e-06, "loss": 8.5782, "step": 154990 }, { "epoch": 0.7740517865614622, "grad_norm": 0.08842669427394867, "learning_rate": 6.795414153045133e-06, "loss": 8.6005, "step": 155000 }, { "epoch": 0.7741017253864017, "grad_norm": 0.0843082070350647, "learning_rate": 6.793912238103582e-06, "loss": 8.5825, "step": 155010 }, { "epoch": 0.7741516642113411, "grad_norm": 0.09507893025875092, "learning_rate": 6.792410323162032e-06, "loss": 8.5753, "step": 155020 }, { "epoch": 0.7742016030362806, "grad_norm": 0.09141889959573746, "learning_rate": 6.7909084082204815e-06, "loss": 8.5755, "step": 155030 }, { "epoch": 0.77425154186122, "grad_norm": 0.09243453294038773, "learning_rate": 6.789406493278931e-06, "loss": 8.5904, "step": 155040 }, { "epoch": 0.7743014806861594, "grad_norm": 0.09155929833650589, "learning_rate": 6.78790457833738e-06, "loss": 8.5654, "step": 155050 }, { "epoch": 0.7743514195110989, "grad_norm": 0.09300145506858826, "learning_rate": 6.7864026633958295e-06, "loss": 8.5482, "step": 155060 }, { "epoch": 0.7744013583360384, "grad_norm": 0.09293139725923538, "learning_rate": 6.78490074845428e-06, "loss": 8.5743, "step": 155070 }, { "epoch": 0.7744512971609778, "grad_norm": 0.08827711641788483, "learning_rate": 6.783398833512729e-06, "loss": 8.5664, "step": 155080 }, { "epoch": 0.7745012359859172, "grad_norm": 0.08957238495349884, "learning_rate": 6.781896918571178e-06, "loss": 8.586, "step": 155090 }, { "epoch": 0.7745511748108567, "grad_norm": 0.08923155814409256, "learning_rate": 6.780395003629628e-06, "loss": 8.5718, "step": 155100 }, { "epoch": 0.7746011136357962, "grad_norm": 0.0950160101056099, "learning_rate": 6.778893088688077e-06, "loss": 8.585, "step": 155110 }, { "epoch": 0.7746510524607356, "grad_norm": 0.09716098755598068, "learning_rate": 6.777391173746527e-06, "loss": 8.5609, "step": 155120 }, { "epoch": 0.774700991285675, "grad_norm": 0.09193146228790283, "learning_rate": 6.7758892588049765e-06, "loss": 8.5826, "step": 155130 }, { "epoch": 0.7747509301106145, "grad_norm": 0.08806554228067398, "learning_rate": 6.774387343863426e-06, "loss": 8.5824, "step": 155140 }, { "epoch": 0.774800868935554, "grad_norm": 0.09546437114477158, "learning_rate": 6.772885428921875e-06, "loss": 8.5459, "step": 155150 }, { "epoch": 0.7748508077604934, "grad_norm": 0.08918742090463638, "learning_rate": 6.771383513980325e-06, "loss": 8.5828, "step": 155160 }, { "epoch": 0.7749007465854328, "grad_norm": 0.0883205235004425, "learning_rate": 6.769881599038775e-06, "loss": 8.5882, "step": 155170 }, { "epoch": 0.7749506854103723, "grad_norm": 0.09442654252052307, "learning_rate": 6.768379684097224e-06, "loss": 8.5788, "step": 155180 }, { "epoch": 0.7750006242353118, "grad_norm": 0.08732815831899643, "learning_rate": 6.766877769155673e-06, "loss": 8.5597, "step": 155190 }, { "epoch": 0.7750505630602512, "grad_norm": 0.09169010072946548, "learning_rate": 6.7653758542141235e-06, "loss": 8.5722, "step": 155200 }, { "epoch": 0.7751005018851906, "grad_norm": 0.09443099051713943, "learning_rate": 6.763873939272573e-06, "loss": 8.59, "step": 155210 }, { "epoch": 0.7751504407101301, "grad_norm": 0.09280900657176971, "learning_rate": 6.762372024331022e-06, "loss": 8.5831, "step": 155220 }, { "epoch": 0.7752003795350696, "grad_norm": 0.09292498975992203, "learning_rate": 6.7608701093894715e-06, "loss": 8.5737, "step": 155230 }, { "epoch": 0.775250318360009, "grad_norm": 0.08915520459413528, "learning_rate": 6.759368194447922e-06, "loss": 8.5831, "step": 155240 }, { "epoch": 0.7753002571849484, "grad_norm": 0.08977535367012024, "learning_rate": 6.757866279506371e-06, "loss": 8.5632, "step": 155250 }, { "epoch": 0.7753501960098879, "grad_norm": 0.09135992079973221, "learning_rate": 6.75636436456482e-06, "loss": 8.5602, "step": 155260 }, { "epoch": 0.7754001348348273, "grad_norm": 0.0918012335896492, "learning_rate": 6.75486244962327e-06, "loss": 8.5713, "step": 155270 }, { "epoch": 0.7754500736597668, "grad_norm": 0.0947796106338501, "learning_rate": 6.753360534681719e-06, "loss": 8.5765, "step": 155280 }, { "epoch": 0.7755000124847062, "grad_norm": 0.09258890151977539, "learning_rate": 6.751858619740169e-06, "loss": 8.5802, "step": 155290 }, { "epoch": 0.7755499513096457, "grad_norm": 0.09028926491737366, "learning_rate": 6.7503567047986185e-06, "loss": 8.5838, "step": 155300 }, { "epoch": 0.7755998901345851, "grad_norm": 0.09300950914621353, "learning_rate": 6.748854789857068e-06, "loss": 8.5698, "step": 155310 }, { "epoch": 0.7756498289595246, "grad_norm": 0.09121584892272949, "learning_rate": 6.747352874915518e-06, "loss": 8.5736, "step": 155320 }, { "epoch": 0.775699767784464, "grad_norm": 0.0916307121515274, "learning_rate": 6.7458509599739665e-06, "loss": 8.5664, "step": 155330 }, { "epoch": 0.7757497066094035, "grad_norm": 0.09273859858512878, "learning_rate": 6.744349045032417e-06, "loss": 8.576, "step": 155340 }, { "epoch": 0.7757996454343429, "grad_norm": 0.08911219984292984, "learning_rate": 6.742847130090866e-06, "loss": 8.5848, "step": 155350 }, { "epoch": 0.7758495842592824, "grad_norm": 0.0911405086517334, "learning_rate": 6.741345215149315e-06, "loss": 8.5869, "step": 155360 }, { "epoch": 0.7758995230842218, "grad_norm": 0.09100606292486191, "learning_rate": 6.7398433002077655e-06, "loss": 8.5476, "step": 155370 }, { "epoch": 0.7759494619091613, "grad_norm": 0.09772035479545593, "learning_rate": 6.738341385266214e-06, "loss": 8.5863, "step": 155380 }, { "epoch": 0.7759994007341007, "grad_norm": 0.08929920196533203, "learning_rate": 6.736839470324664e-06, "loss": 8.5799, "step": 155390 }, { "epoch": 0.7760493395590402, "grad_norm": 0.0926336944103241, "learning_rate": 6.735337555383114e-06, "loss": 8.5804, "step": 155400 }, { "epoch": 0.7760992783839796, "grad_norm": 0.09426475316286087, "learning_rate": 6.733835640441563e-06, "loss": 8.5662, "step": 155410 }, { "epoch": 0.776149217208919, "grad_norm": 0.08994784206151962, "learning_rate": 6.732333725500013e-06, "loss": 8.568, "step": 155420 }, { "epoch": 0.7761991560338585, "grad_norm": 0.093609519302845, "learning_rate": 6.7308318105584615e-06, "loss": 8.5902, "step": 155430 }, { "epoch": 0.776249094858798, "grad_norm": 0.09885302931070328, "learning_rate": 6.729329895616912e-06, "loss": 8.5656, "step": 155440 }, { "epoch": 0.7762990336837374, "grad_norm": 0.08882102370262146, "learning_rate": 6.727827980675362e-06, "loss": 8.5813, "step": 155450 }, { "epoch": 0.7763489725086768, "grad_norm": 0.09155222773551941, "learning_rate": 6.72632606573381e-06, "loss": 8.5743, "step": 155460 }, { "epoch": 0.7763989113336163, "grad_norm": 0.09071797877550125, "learning_rate": 6.7248241507922605e-06, "loss": 8.5807, "step": 155470 }, { "epoch": 0.7764488501585558, "grad_norm": 0.09299448877573013, "learning_rate": 6.72332223585071e-06, "loss": 8.555, "step": 155480 }, { "epoch": 0.7764987889834952, "grad_norm": 0.09560713171958923, "learning_rate": 6.721820320909159e-06, "loss": 8.5618, "step": 155490 }, { "epoch": 0.7765487278084346, "grad_norm": 0.08749803900718689, "learning_rate": 6.720318405967609e-06, "loss": 8.5805, "step": 155500 }, { "epoch": 0.7765986666333741, "grad_norm": 0.09022912383079529, "learning_rate": 6.718816491026058e-06, "loss": 8.5955, "step": 155510 }, { "epoch": 0.7766486054583136, "grad_norm": 0.09331797063350677, "learning_rate": 6.717314576084508e-06, "loss": 8.5657, "step": 155520 }, { "epoch": 0.776698544283253, "grad_norm": 0.09521540254354477, "learning_rate": 6.715812661142957e-06, "loss": 8.5662, "step": 155530 }, { "epoch": 0.7767484831081924, "grad_norm": 0.09175549447536469, "learning_rate": 6.714310746201407e-06, "loss": 8.5795, "step": 155540 }, { "epoch": 0.7767984219331319, "grad_norm": 0.09057381004095078, "learning_rate": 6.712808831259857e-06, "loss": 8.5605, "step": 155550 }, { "epoch": 0.7768483607580714, "grad_norm": 0.09255360811948776, "learning_rate": 6.711306916318306e-06, "loss": 8.5666, "step": 155560 }, { "epoch": 0.7768982995830108, "grad_norm": 0.08889876306056976, "learning_rate": 6.7098050013767555e-06, "loss": 8.5632, "step": 155570 }, { "epoch": 0.7769482384079502, "grad_norm": 0.09821149706840515, "learning_rate": 6.708303086435205e-06, "loss": 8.5697, "step": 155580 }, { "epoch": 0.7769981772328897, "grad_norm": 0.09199816733598709, "learning_rate": 6.706801171493654e-06, "loss": 8.5783, "step": 155590 }, { "epoch": 0.7770481160578292, "grad_norm": 0.08835037052631378, "learning_rate": 6.705299256552104e-06, "loss": 8.5675, "step": 155600 }, { "epoch": 0.7770980548827686, "grad_norm": 0.09240995347499847, "learning_rate": 6.703797341610554e-06, "loss": 8.586, "step": 155610 }, { "epoch": 0.777147993707708, "grad_norm": 0.08770235627889633, "learning_rate": 6.702295426669003e-06, "loss": 8.5867, "step": 155620 }, { "epoch": 0.7771979325326475, "grad_norm": 0.0896248146891594, "learning_rate": 6.700793511727452e-06, "loss": 8.5803, "step": 155630 }, { "epoch": 0.777247871357587, "grad_norm": 0.09114081412553787, "learning_rate": 6.6992915967859025e-06, "loss": 8.5682, "step": 155640 }, { "epoch": 0.7772978101825264, "grad_norm": 0.08897022902965546, "learning_rate": 6.697789681844352e-06, "loss": 8.589, "step": 155650 }, { "epoch": 0.7773477490074658, "grad_norm": 0.09216853976249695, "learning_rate": 6.696287766902801e-06, "loss": 8.5517, "step": 155660 }, { "epoch": 0.7773976878324053, "grad_norm": 0.0946219190955162, "learning_rate": 6.6947858519612505e-06, "loss": 8.5782, "step": 155670 }, { "epoch": 0.7774476266573448, "grad_norm": 0.09455423057079315, "learning_rate": 6.6932839370197e-06, "loss": 8.5608, "step": 155680 }, { "epoch": 0.7774975654822842, "grad_norm": 0.0957740917801857, "learning_rate": 6.69178202207815e-06, "loss": 8.5761, "step": 155690 }, { "epoch": 0.7775475043072236, "grad_norm": 0.09181936085224152, "learning_rate": 6.690280107136599e-06, "loss": 8.582, "step": 155700 }, { "epoch": 0.7775974431321631, "grad_norm": 0.09328048676252365, "learning_rate": 6.688778192195049e-06, "loss": 8.5598, "step": 155710 }, { "epoch": 0.7776473819571026, "grad_norm": 0.08828567713499069, "learning_rate": 6.687276277253499e-06, "loss": 8.5677, "step": 155720 }, { "epoch": 0.777697320782042, "grad_norm": 0.0937914326786995, "learning_rate": 6.685774362311947e-06, "loss": 8.5704, "step": 155730 }, { "epoch": 0.7777472596069814, "grad_norm": 0.09564419835805893, "learning_rate": 6.6842724473703976e-06, "loss": 8.5762, "step": 155740 }, { "epoch": 0.7777971984319209, "grad_norm": 0.08568717539310455, "learning_rate": 6.682770532428847e-06, "loss": 8.558, "step": 155750 }, { "epoch": 0.7778471372568604, "grad_norm": 0.09092507511377335, "learning_rate": 6.681268617487296e-06, "loss": 8.5532, "step": 155760 }, { "epoch": 0.7778970760817998, "grad_norm": 0.09010069072246552, "learning_rate": 6.679766702545746e-06, "loss": 8.5674, "step": 155770 }, { "epoch": 0.7779470149067392, "grad_norm": 0.0927618071436882, "learning_rate": 6.678264787604195e-06, "loss": 8.5879, "step": 155780 }, { "epoch": 0.7779969537316787, "grad_norm": 0.09050939232110977, "learning_rate": 6.676762872662645e-06, "loss": 8.5588, "step": 155790 }, { "epoch": 0.7780468925566182, "grad_norm": 0.10684484988451004, "learning_rate": 6.675260957721095e-06, "loss": 8.5721, "step": 155800 }, { "epoch": 0.7780968313815576, "grad_norm": 0.08730565011501312, "learning_rate": 6.673759042779544e-06, "loss": 8.5582, "step": 155810 }, { "epoch": 0.778146770206497, "grad_norm": 0.089006207883358, "learning_rate": 6.672257127837994e-06, "loss": 8.5687, "step": 155820 }, { "epoch": 0.7781967090314365, "grad_norm": 0.09139891713857651, "learning_rate": 6.670755212896442e-06, "loss": 8.5781, "step": 155830 }, { "epoch": 0.778246647856376, "grad_norm": 0.09247538447380066, "learning_rate": 6.6692532979548926e-06, "loss": 8.578, "step": 155840 }, { "epoch": 0.7782965866813154, "grad_norm": 0.09350184351205826, "learning_rate": 6.667751383013343e-06, "loss": 8.5866, "step": 155850 }, { "epoch": 0.7783465255062548, "grad_norm": 0.08985615521669388, "learning_rate": 6.666249468071791e-06, "loss": 8.5793, "step": 155860 }, { "epoch": 0.7783964643311942, "grad_norm": 0.08648446202278137, "learning_rate": 6.664747553130241e-06, "loss": 8.5698, "step": 155870 }, { "epoch": 0.7784464031561338, "grad_norm": 0.0879853144288063, "learning_rate": 6.663245638188691e-06, "loss": 8.5803, "step": 155880 }, { "epoch": 0.7784963419810732, "grad_norm": 0.09268853813409805, "learning_rate": 6.66174372324714e-06, "loss": 8.5734, "step": 155890 }, { "epoch": 0.7785462808060126, "grad_norm": 0.09160283207893372, "learning_rate": 6.66024180830559e-06, "loss": 8.5861, "step": 155900 }, { "epoch": 0.778596219630952, "grad_norm": 0.09253548830747604, "learning_rate": 6.658739893364039e-06, "loss": 8.5787, "step": 155910 }, { "epoch": 0.7786461584558916, "grad_norm": 0.09596210718154907, "learning_rate": 6.657237978422489e-06, "loss": 8.5661, "step": 155920 }, { "epoch": 0.778696097280831, "grad_norm": 0.08908601105213165, "learning_rate": 6.655736063480938e-06, "loss": 8.5649, "step": 155930 }, { "epoch": 0.7787460361057704, "grad_norm": 0.08889824151992798, "learning_rate": 6.6542341485393876e-06, "loss": 8.55, "step": 155940 }, { "epoch": 0.7787959749307098, "grad_norm": 0.09235508739948273, "learning_rate": 6.652732233597838e-06, "loss": 8.581, "step": 155950 }, { "epoch": 0.7788459137556494, "grad_norm": 0.09276876598596573, "learning_rate": 6.651230318656287e-06, "loss": 8.5593, "step": 155960 }, { "epoch": 0.7788958525805888, "grad_norm": 0.08646731078624725, "learning_rate": 6.649728403714736e-06, "loss": 8.5816, "step": 155970 }, { "epoch": 0.7789457914055282, "grad_norm": 0.08740647882223129, "learning_rate": 6.648226488773186e-06, "loss": 8.5732, "step": 155980 }, { "epoch": 0.7789957302304676, "grad_norm": 0.09212644398212433, "learning_rate": 6.646724573831635e-06, "loss": 8.5731, "step": 155990 }, { "epoch": 0.7790456690554072, "grad_norm": 0.0925680547952652, "learning_rate": 6.645222658890085e-06, "loss": 8.5776, "step": 156000 }, { "epoch": 0.7790956078803466, "grad_norm": 0.08844732493162155, "learning_rate": 6.643720743948535e-06, "loss": 8.5941, "step": 156010 }, { "epoch": 0.779145546705286, "grad_norm": 0.08517491817474365, "learning_rate": 6.642218829006984e-06, "loss": 8.5737, "step": 156020 }, { "epoch": 0.7791954855302254, "grad_norm": 0.09461086988449097, "learning_rate": 6.640716914065433e-06, "loss": 8.5723, "step": 156030 }, { "epoch": 0.779245424355165, "grad_norm": 0.09351557493209839, "learning_rate": 6.6392149991238834e-06, "loss": 8.5885, "step": 156040 }, { "epoch": 0.7792953631801044, "grad_norm": 0.09356269985437393, "learning_rate": 6.637713084182333e-06, "loss": 8.5521, "step": 156050 }, { "epoch": 0.7793453020050438, "grad_norm": 0.09160886704921722, "learning_rate": 6.636211169240782e-06, "loss": 8.5711, "step": 156060 }, { "epoch": 0.7793952408299832, "grad_norm": 0.09123864024877548, "learning_rate": 6.634709254299231e-06, "loss": 8.5658, "step": 156070 }, { "epoch": 0.7794451796549228, "grad_norm": 0.08630036562681198, "learning_rate": 6.633207339357681e-06, "loss": 8.5817, "step": 156080 }, { "epoch": 0.7794951184798622, "grad_norm": 0.08908306062221527, "learning_rate": 6.631705424416131e-06, "loss": 8.5747, "step": 156090 }, { "epoch": 0.7795450573048016, "grad_norm": 0.0923098474740982, "learning_rate": 6.63020350947458e-06, "loss": 8.5524, "step": 156100 }, { "epoch": 0.779594996129741, "grad_norm": 0.09396307915449142, "learning_rate": 6.62870159453303e-06, "loss": 8.5752, "step": 156110 }, { "epoch": 0.7796449349546806, "grad_norm": 0.09391772747039795, "learning_rate": 6.62719967959148e-06, "loss": 8.5701, "step": 156120 }, { "epoch": 0.77969487377962, "grad_norm": 0.0857236236333847, "learning_rate": 6.625697764649928e-06, "loss": 8.5592, "step": 156130 }, { "epoch": 0.7797448126045594, "grad_norm": 0.09074844419956207, "learning_rate": 6.6241958497083784e-06, "loss": 8.5821, "step": 156140 }, { "epoch": 0.7797947514294988, "grad_norm": 0.08897120505571365, "learning_rate": 6.622693934766828e-06, "loss": 8.5788, "step": 156150 }, { "epoch": 0.7798446902544384, "grad_norm": 0.08881950378417969, "learning_rate": 6.621192019825277e-06, "loss": 8.572, "step": 156160 }, { "epoch": 0.7798946290793778, "grad_norm": 0.09620756655931473, "learning_rate": 6.619690104883727e-06, "loss": 8.5728, "step": 156170 }, { "epoch": 0.7799445679043172, "grad_norm": 0.08694388717412949, "learning_rate": 6.618188189942176e-06, "loss": 8.5895, "step": 156180 }, { "epoch": 0.7799945067292566, "grad_norm": 0.08693565428256989, "learning_rate": 6.616686275000626e-06, "loss": 8.5849, "step": 156190 }, { "epoch": 0.7800444455541962, "grad_norm": 0.08743081241846085, "learning_rate": 6.615184360059076e-06, "loss": 8.579, "step": 156200 }, { "epoch": 0.7800943843791356, "grad_norm": 0.09228560328483582, "learning_rate": 6.613682445117525e-06, "loss": 8.5556, "step": 156210 }, { "epoch": 0.780144323204075, "grad_norm": 0.09084375947713852, "learning_rate": 6.612180530175975e-06, "loss": 8.5772, "step": 156220 }, { "epoch": 0.7801942620290144, "grad_norm": 0.08933619409799576, "learning_rate": 6.610678615234423e-06, "loss": 8.5722, "step": 156230 }, { "epoch": 0.780244200853954, "grad_norm": 0.09434707462787628, "learning_rate": 6.6091767002928734e-06, "loss": 8.5676, "step": 156240 }, { "epoch": 0.7802941396788934, "grad_norm": 0.08935230225324631, "learning_rate": 6.607674785351324e-06, "loss": 8.5802, "step": 156250 }, { "epoch": 0.7803440785038328, "grad_norm": 0.09848465025424957, "learning_rate": 6.606172870409772e-06, "loss": 8.555, "step": 156260 }, { "epoch": 0.7803940173287722, "grad_norm": 0.09197591245174408, "learning_rate": 6.604670955468222e-06, "loss": 8.5815, "step": 156270 }, { "epoch": 0.7804439561537116, "grad_norm": 0.0875692144036293, "learning_rate": 6.603169040526672e-06, "loss": 8.5706, "step": 156280 }, { "epoch": 0.7804938949786512, "grad_norm": 0.09515390545129776, "learning_rate": 6.601667125585121e-06, "loss": 8.5762, "step": 156290 }, { "epoch": 0.7805438338035906, "grad_norm": 0.0905672088265419, "learning_rate": 6.600165210643571e-06, "loss": 8.5627, "step": 156300 }, { "epoch": 0.78059377262853, "grad_norm": 0.08882583677768707, "learning_rate": 6.59866329570202e-06, "loss": 8.5586, "step": 156310 }, { "epoch": 0.7806437114534694, "grad_norm": 0.08932114392518997, "learning_rate": 6.59716138076047e-06, "loss": 8.5591, "step": 156320 }, { "epoch": 0.780693650278409, "grad_norm": 0.09061101078987122, "learning_rate": 6.59565946581892e-06, "loss": 8.5806, "step": 156330 }, { "epoch": 0.7807435891033484, "grad_norm": 0.08474583178758621, "learning_rate": 6.5941575508773684e-06, "loss": 8.5841, "step": 156340 }, { "epoch": 0.7807935279282878, "grad_norm": 0.09312687814235687, "learning_rate": 6.592655635935819e-06, "loss": 8.5691, "step": 156350 }, { "epoch": 0.7808434667532272, "grad_norm": 0.09034864604473114, "learning_rate": 6.591153720994268e-06, "loss": 8.5726, "step": 156360 }, { "epoch": 0.7808934055781668, "grad_norm": 0.09045044332742691, "learning_rate": 6.589651806052717e-06, "loss": 8.5599, "step": 156370 }, { "epoch": 0.7809433444031062, "grad_norm": 0.09198865294456482, "learning_rate": 6.5881498911111675e-06, "loss": 8.5653, "step": 156380 }, { "epoch": 0.7809932832280456, "grad_norm": 0.08959676325321198, "learning_rate": 6.586647976169616e-06, "loss": 8.5642, "step": 156390 }, { "epoch": 0.781043222052985, "grad_norm": 0.09269116818904877, "learning_rate": 6.585146061228066e-06, "loss": 8.559, "step": 156400 }, { "epoch": 0.7810931608779246, "grad_norm": 0.08859947323799133, "learning_rate": 6.5836441462865155e-06, "loss": 8.5707, "step": 156410 }, { "epoch": 0.781143099702864, "grad_norm": 0.08951582759618759, "learning_rate": 6.582142231344965e-06, "loss": 8.5724, "step": 156420 }, { "epoch": 0.7811930385278034, "grad_norm": 0.08770976960659027, "learning_rate": 6.580640316403415e-06, "loss": 8.5788, "step": 156430 }, { "epoch": 0.7812429773527428, "grad_norm": 0.09240131080150604, "learning_rate": 6.579138401461864e-06, "loss": 8.5691, "step": 156440 }, { "epoch": 0.7812929161776824, "grad_norm": 0.08868956565856934, "learning_rate": 6.577636486520314e-06, "loss": 8.5805, "step": 156450 }, { "epoch": 0.7813428550026218, "grad_norm": 0.09283566474914551, "learning_rate": 6.576134571578763e-06, "loss": 8.5591, "step": 156460 }, { "epoch": 0.7813927938275612, "grad_norm": 0.09575477242469788, "learning_rate": 6.574632656637212e-06, "loss": 8.562, "step": 156470 }, { "epoch": 0.7814427326525006, "grad_norm": 0.09500584006309509, "learning_rate": 6.5731307416956625e-06, "loss": 8.5661, "step": 156480 }, { "epoch": 0.7814926714774402, "grad_norm": 0.0926256999373436, "learning_rate": 6.571628826754112e-06, "loss": 8.5676, "step": 156490 }, { "epoch": 0.7815426103023796, "grad_norm": 0.09180507063865662, "learning_rate": 6.570126911812561e-06, "loss": 8.5845, "step": 156500 }, { "epoch": 0.781592549127319, "grad_norm": 0.08600914478302002, "learning_rate": 6.5686249968710105e-06, "loss": 8.5769, "step": 156510 }, { "epoch": 0.7816424879522584, "grad_norm": 0.0883520096540451, "learning_rate": 6.567123081929461e-06, "loss": 8.5766, "step": 156520 }, { "epoch": 0.781692426777198, "grad_norm": 0.0899481549859047, "learning_rate": 6.56562116698791e-06, "loss": 8.5516, "step": 156530 }, { "epoch": 0.7817423656021374, "grad_norm": 0.09373979270458221, "learning_rate": 6.564119252046359e-06, "loss": 8.5745, "step": 156540 }, { "epoch": 0.7817923044270768, "grad_norm": 0.08799232542514801, "learning_rate": 6.562617337104809e-06, "loss": 8.5713, "step": 156550 }, { "epoch": 0.7818422432520162, "grad_norm": 0.09024485945701599, "learning_rate": 6.561115422163258e-06, "loss": 8.5852, "step": 156560 }, { "epoch": 0.7818921820769558, "grad_norm": 0.0885784775018692, "learning_rate": 6.559613507221708e-06, "loss": 8.5742, "step": 156570 }, { "epoch": 0.7819421209018952, "grad_norm": 0.09604980051517487, "learning_rate": 6.5581115922801575e-06, "loss": 8.5856, "step": 156580 }, { "epoch": 0.7819920597268346, "grad_norm": 0.08925112336874008, "learning_rate": 6.556609677338607e-06, "loss": 8.577, "step": 156590 }, { "epoch": 0.782041998551774, "grad_norm": 0.08953628689050674, "learning_rate": 6.555107762397057e-06, "loss": 8.5945, "step": 156600 }, { "epoch": 0.7820919373767136, "grad_norm": 0.08811146020889282, "learning_rate": 6.5536058474555055e-06, "loss": 8.5769, "step": 156610 }, { "epoch": 0.782141876201653, "grad_norm": 0.08883983641862869, "learning_rate": 6.552103932513956e-06, "loss": 8.5647, "step": 156620 }, { "epoch": 0.7821918150265924, "grad_norm": 0.09047286212444305, "learning_rate": 6.550602017572405e-06, "loss": 8.5689, "step": 156630 }, { "epoch": 0.7822417538515318, "grad_norm": 0.091248519718647, "learning_rate": 6.549100102630854e-06, "loss": 8.5746, "step": 156640 }, { "epoch": 0.7822916926764714, "grad_norm": 0.0949409082531929, "learning_rate": 6.5475981876893045e-06, "loss": 8.5657, "step": 156650 }, { "epoch": 0.7823416315014108, "grad_norm": 0.09160256385803223, "learning_rate": 6.546096272747753e-06, "loss": 8.5602, "step": 156660 }, { "epoch": 0.7823915703263502, "grad_norm": 0.09412173181772232, "learning_rate": 6.544594357806203e-06, "loss": 8.5698, "step": 156670 }, { "epoch": 0.7824415091512896, "grad_norm": 0.08734364062547684, "learning_rate": 6.543092442864653e-06, "loss": 8.5699, "step": 156680 }, { "epoch": 0.7824914479762292, "grad_norm": 0.08681227266788483, "learning_rate": 6.541590527923102e-06, "loss": 8.5734, "step": 156690 }, { "epoch": 0.7825413868011686, "grad_norm": 0.09137750416994095, "learning_rate": 6.540088612981552e-06, "loss": 8.5507, "step": 156700 }, { "epoch": 0.782591325626108, "grad_norm": 0.08570120483636856, "learning_rate": 6.5385866980400005e-06, "loss": 8.5657, "step": 156710 }, { "epoch": 0.7826412644510474, "grad_norm": 0.0939328595995903, "learning_rate": 6.537084783098451e-06, "loss": 8.5711, "step": 156720 }, { "epoch": 0.782691203275987, "grad_norm": 0.08831031620502472, "learning_rate": 6.535582868156901e-06, "loss": 8.5667, "step": 156730 }, { "epoch": 0.7827411421009264, "grad_norm": 0.09072265774011612, "learning_rate": 6.534080953215349e-06, "loss": 8.5528, "step": 156740 }, { "epoch": 0.7827910809258658, "grad_norm": 0.09154432266950607, "learning_rate": 6.5325790382737995e-06, "loss": 8.5787, "step": 156750 }, { "epoch": 0.7828410197508052, "grad_norm": 0.0947096049785614, "learning_rate": 6.531077123332249e-06, "loss": 8.5638, "step": 156760 }, { "epoch": 0.7828909585757448, "grad_norm": 0.09594990313053131, "learning_rate": 6.529575208390698e-06, "loss": 8.5761, "step": 156770 }, { "epoch": 0.7829408974006842, "grad_norm": 0.0905158668756485, "learning_rate": 6.528073293449148e-06, "loss": 8.558, "step": 156780 }, { "epoch": 0.7829908362256236, "grad_norm": 0.0911213606595993, "learning_rate": 6.526571378507597e-06, "loss": 8.5628, "step": 156790 }, { "epoch": 0.783040775050563, "grad_norm": 0.08714745938777924, "learning_rate": 6.525069463566047e-06, "loss": 8.5658, "step": 156800 }, { "epoch": 0.7830907138755026, "grad_norm": 0.09068936109542847, "learning_rate": 6.523567548624496e-06, "loss": 8.5696, "step": 156810 }, { "epoch": 0.783140652700442, "grad_norm": 0.09490951150655746, "learning_rate": 6.522065633682946e-06, "loss": 8.5601, "step": 156820 }, { "epoch": 0.7831905915253814, "grad_norm": 0.08792884647846222, "learning_rate": 6.520563718741396e-06, "loss": 8.5703, "step": 156830 }, { "epoch": 0.7832405303503208, "grad_norm": 0.08713288605213165, "learning_rate": 6.519061803799845e-06, "loss": 8.5737, "step": 156840 }, { "epoch": 0.7832904691752604, "grad_norm": 0.0946747288107872, "learning_rate": 6.5175598888582945e-06, "loss": 8.5595, "step": 156850 }, { "epoch": 0.7833404080001998, "grad_norm": 0.08821689337491989, "learning_rate": 6.516057973916744e-06, "loss": 8.5619, "step": 156860 }, { "epoch": 0.7833903468251392, "grad_norm": 0.09473678469657898, "learning_rate": 6.514556058975193e-06, "loss": 8.5639, "step": 156870 }, { "epoch": 0.7834402856500786, "grad_norm": 0.09480735659599304, "learning_rate": 6.513054144033643e-06, "loss": 8.5744, "step": 156880 }, { "epoch": 0.7834902244750181, "grad_norm": 0.09333161264657974, "learning_rate": 6.511552229092093e-06, "loss": 8.5615, "step": 156890 }, { "epoch": 0.7835401632999576, "grad_norm": 0.09537094086408615, "learning_rate": 6.510050314150542e-06, "loss": 8.5697, "step": 156900 }, { "epoch": 0.783590102124897, "grad_norm": 0.09100158512592316, "learning_rate": 6.508548399208991e-06, "loss": 8.5796, "step": 156910 }, { "epoch": 0.7836400409498364, "grad_norm": 0.0884363129734993, "learning_rate": 6.5070464842674415e-06, "loss": 8.5578, "step": 156920 }, { "epoch": 0.783689979774776, "grad_norm": 0.09135287255048752, "learning_rate": 6.505544569325891e-06, "loss": 8.5509, "step": 156930 }, { "epoch": 0.7837399185997154, "grad_norm": 0.09011730551719666, "learning_rate": 6.50404265438434e-06, "loss": 8.562, "step": 156940 }, { "epoch": 0.7837898574246548, "grad_norm": 0.08869774639606476, "learning_rate": 6.5025407394427895e-06, "loss": 8.5689, "step": 156950 }, { "epoch": 0.7838397962495942, "grad_norm": 0.09371919184923172, "learning_rate": 6.501038824501239e-06, "loss": 8.5672, "step": 156960 }, { "epoch": 0.7838897350745337, "grad_norm": 0.0950884222984314, "learning_rate": 6.499536909559689e-06, "loss": 8.5585, "step": 156970 }, { "epoch": 0.7839396738994732, "grad_norm": 0.09123662114143372, "learning_rate": 6.498034994618138e-06, "loss": 8.5601, "step": 156980 }, { "epoch": 0.7839896127244126, "grad_norm": 0.0944250077009201, "learning_rate": 6.496533079676588e-06, "loss": 8.5766, "step": 156990 }, { "epoch": 0.784039551549352, "grad_norm": 0.08941183984279633, "learning_rate": 6.495031164735038e-06, "loss": 8.5673, "step": 157000 }, { "epoch": 0.7840894903742915, "grad_norm": 0.0950687900185585, "learning_rate": 6.493529249793486e-06, "loss": 8.5736, "step": 157010 }, { "epoch": 0.784139429199231, "grad_norm": 0.09286126494407654, "learning_rate": 6.4920273348519365e-06, "loss": 8.5549, "step": 157020 }, { "epoch": 0.7841893680241704, "grad_norm": 0.09739340096712112, "learning_rate": 6.490525419910386e-06, "loss": 8.5606, "step": 157030 }, { "epoch": 0.7842393068491098, "grad_norm": 0.09119179099798203, "learning_rate": 6.489023504968835e-06, "loss": 8.5585, "step": 157040 }, { "epoch": 0.7842892456740493, "grad_norm": 0.09187173843383789, "learning_rate": 6.487521590027285e-06, "loss": 8.5867, "step": 157050 }, { "epoch": 0.7843391844989888, "grad_norm": 0.08924529701471329, "learning_rate": 6.486019675085734e-06, "loss": 8.5617, "step": 157060 }, { "epoch": 0.7843891233239282, "grad_norm": 0.08665250241756439, "learning_rate": 6.484517760144184e-06, "loss": 8.5643, "step": 157070 }, { "epoch": 0.7844390621488676, "grad_norm": 0.08721933513879776, "learning_rate": 6.483015845202634e-06, "loss": 8.5747, "step": 157080 }, { "epoch": 0.7844890009738071, "grad_norm": 0.09293626993894577, "learning_rate": 6.481513930261083e-06, "loss": 8.5616, "step": 157090 }, { "epoch": 0.7845389397987466, "grad_norm": 0.09339628368616104, "learning_rate": 6.480012015319533e-06, "loss": 8.5674, "step": 157100 }, { "epoch": 0.784588878623686, "grad_norm": 0.09324491769075394, "learning_rate": 6.478510100377981e-06, "loss": 8.561, "step": 157110 }, { "epoch": 0.7846388174486254, "grad_norm": 0.08938504010438919, "learning_rate": 6.4770081854364315e-06, "loss": 8.5575, "step": 157120 }, { "epoch": 0.7846887562735649, "grad_norm": 0.09114962816238403, "learning_rate": 6.475506270494882e-06, "loss": 8.5692, "step": 157130 }, { "epoch": 0.7847386950985044, "grad_norm": 0.0890054702758789, "learning_rate": 6.47400435555333e-06, "loss": 8.5646, "step": 157140 }, { "epoch": 0.7847886339234438, "grad_norm": 0.09435494989156723, "learning_rate": 6.47250244061178e-06, "loss": 8.5797, "step": 157150 }, { "epoch": 0.7848385727483832, "grad_norm": 0.09404738992452621, "learning_rate": 6.47100052567023e-06, "loss": 8.5541, "step": 157160 }, { "epoch": 0.7848885115733227, "grad_norm": 0.09190955758094788, "learning_rate": 6.469498610728679e-06, "loss": 8.5693, "step": 157170 }, { "epoch": 0.7849384503982622, "grad_norm": 0.08809108287096024, "learning_rate": 6.467996695787129e-06, "loss": 8.5717, "step": 157180 }, { "epoch": 0.7849883892232016, "grad_norm": 0.0869806781411171, "learning_rate": 6.466494780845578e-06, "loss": 8.5623, "step": 157190 }, { "epoch": 0.785038328048141, "grad_norm": 0.093222476541996, "learning_rate": 6.464992865904028e-06, "loss": 8.5687, "step": 157200 }, { "epoch": 0.7850882668730805, "grad_norm": 0.08760460466146469, "learning_rate": 6.463490950962477e-06, "loss": 8.5537, "step": 157210 }, { "epoch": 0.78513820569802, "grad_norm": 0.09468575567007065, "learning_rate": 6.4619890360209265e-06, "loss": 8.5678, "step": 157220 }, { "epoch": 0.7851881445229594, "grad_norm": 0.08602230995893478, "learning_rate": 6.460487121079377e-06, "loss": 8.5842, "step": 157230 }, { "epoch": 0.7852380833478988, "grad_norm": 0.093845434486866, "learning_rate": 6.458985206137826e-06, "loss": 8.5626, "step": 157240 }, { "epoch": 0.7852880221728382, "grad_norm": 0.08826538175344467, "learning_rate": 6.457483291196275e-06, "loss": 8.5479, "step": 157250 }, { "epoch": 0.7853379609977778, "grad_norm": 0.09430468082427979, "learning_rate": 6.455981376254725e-06, "loss": 8.5857, "step": 157260 }, { "epoch": 0.7853878998227172, "grad_norm": 0.08807472139596939, "learning_rate": 6.454479461313174e-06, "loss": 8.5541, "step": 157270 }, { "epoch": 0.7854378386476566, "grad_norm": 0.09787096083164215, "learning_rate": 6.452977546371624e-06, "loss": 8.5621, "step": 157280 }, { "epoch": 0.785487777472596, "grad_norm": 0.09540273249149323, "learning_rate": 6.4514756314300736e-06, "loss": 8.5687, "step": 157290 }, { "epoch": 0.7855377162975355, "grad_norm": 0.09057477116584778, "learning_rate": 6.449973716488523e-06, "loss": 8.5679, "step": 157300 }, { "epoch": 0.785587655122475, "grad_norm": 0.08641599118709564, "learning_rate": 6.448471801546972e-06, "loss": 8.5723, "step": 157310 }, { "epoch": 0.7856375939474144, "grad_norm": 0.0898127630352974, "learning_rate": 6.446969886605422e-06, "loss": 8.5729, "step": 157320 }, { "epoch": 0.7856875327723538, "grad_norm": 0.09772838652133942, "learning_rate": 6.445467971663872e-06, "loss": 8.5604, "step": 157330 }, { "epoch": 0.7857374715972933, "grad_norm": 0.09075041115283966, "learning_rate": 6.443966056722321e-06, "loss": 8.5621, "step": 157340 }, { "epoch": 0.7857874104222328, "grad_norm": 0.09176818281412125, "learning_rate": 6.44246414178077e-06, "loss": 8.5765, "step": 157350 }, { "epoch": 0.7858373492471722, "grad_norm": 0.09130357950925827, "learning_rate": 6.44096222683922e-06, "loss": 8.5659, "step": 157360 }, { "epoch": 0.7858872880721116, "grad_norm": 0.08570962399244308, "learning_rate": 6.43946031189767e-06, "loss": 8.5623, "step": 157370 }, { "epoch": 0.7859372268970511, "grad_norm": 0.08622989058494568, "learning_rate": 6.437958396956119e-06, "loss": 8.5748, "step": 157380 }, { "epoch": 0.7859871657219906, "grad_norm": 0.09150289744138718, "learning_rate": 6.4364564820145686e-06, "loss": 8.5404, "step": 157390 }, { "epoch": 0.78603710454693, "grad_norm": 0.09090328216552734, "learning_rate": 6.434954567073019e-06, "loss": 8.5627, "step": 157400 }, { "epoch": 0.7860870433718694, "grad_norm": 0.09460791200399399, "learning_rate": 6.433452652131468e-06, "loss": 8.5751, "step": 157410 }, { "epoch": 0.7861369821968089, "grad_norm": 0.09171260893344879, "learning_rate": 6.431950737189917e-06, "loss": 8.5494, "step": 157420 }, { "epoch": 0.7861869210217484, "grad_norm": 0.0896756649017334, "learning_rate": 6.430448822248367e-06, "loss": 8.5658, "step": 157430 }, { "epoch": 0.7862368598466878, "grad_norm": 0.08946090936660767, "learning_rate": 6.428946907306816e-06, "loss": 8.5715, "step": 157440 }, { "epoch": 0.7862867986716272, "grad_norm": 0.08953197300434113, "learning_rate": 6.427444992365266e-06, "loss": 8.5723, "step": 157450 }, { "epoch": 0.7863367374965667, "grad_norm": 0.08706686645746231, "learning_rate": 6.425943077423716e-06, "loss": 8.5687, "step": 157460 }, { "epoch": 0.7863866763215062, "grad_norm": 0.09402570128440857, "learning_rate": 6.424441162482165e-06, "loss": 8.5646, "step": 157470 }, { "epoch": 0.7864366151464456, "grad_norm": 0.09386756271123886, "learning_rate": 6.422939247540615e-06, "loss": 8.5686, "step": 157480 }, { "epoch": 0.786486553971385, "grad_norm": 0.09243669360876083, "learning_rate": 6.4214373325990636e-06, "loss": 8.5747, "step": 157490 }, { "epoch": 0.7865364927963245, "grad_norm": 0.08882700651884079, "learning_rate": 6.419935417657514e-06, "loss": 8.553, "step": 157500 }, { "epoch": 0.786586431621264, "grad_norm": 0.091353639960289, "learning_rate": 6.418433502715963e-06, "loss": 8.5603, "step": 157510 }, { "epoch": 0.7866363704462034, "grad_norm": 0.09125305712223053, "learning_rate": 6.416931587774412e-06, "loss": 8.5857, "step": 157520 }, { "epoch": 0.7866863092711428, "grad_norm": 0.09622442722320557, "learning_rate": 6.415429672832863e-06, "loss": 8.554, "step": 157530 }, { "epoch": 0.7867362480960823, "grad_norm": 0.09017577767372131, "learning_rate": 6.413927757891311e-06, "loss": 8.5527, "step": 157540 }, { "epoch": 0.7867861869210218, "grad_norm": 0.09028837084770203, "learning_rate": 6.412425842949761e-06, "loss": 8.5776, "step": 157550 }, { "epoch": 0.7868361257459612, "grad_norm": 0.0956842452287674, "learning_rate": 6.4109239280082114e-06, "loss": 8.5637, "step": 157560 }, { "epoch": 0.7868860645709006, "grad_norm": 0.09367352724075317, "learning_rate": 6.40942201306666e-06, "loss": 8.5664, "step": 157570 }, { "epoch": 0.7869360033958401, "grad_norm": 0.0918663740158081, "learning_rate": 6.40792009812511e-06, "loss": 8.5806, "step": 157580 }, { "epoch": 0.7869859422207796, "grad_norm": 0.08775880932807922, "learning_rate": 6.406418183183559e-06, "loss": 8.5442, "step": 157590 }, { "epoch": 0.787035881045719, "grad_norm": 0.09422892332077026, "learning_rate": 6.404916268242009e-06, "loss": 8.5819, "step": 157600 }, { "epoch": 0.7870858198706584, "grad_norm": 0.0917881578207016, "learning_rate": 6.403414353300459e-06, "loss": 8.56, "step": 157610 }, { "epoch": 0.7871357586955979, "grad_norm": 0.08879712224006653, "learning_rate": 6.401912438358907e-06, "loss": 8.5751, "step": 157620 }, { "epoch": 0.7871856975205374, "grad_norm": 0.0947839617729187, "learning_rate": 6.400410523417358e-06, "loss": 8.5617, "step": 157630 }, { "epoch": 0.7872356363454768, "grad_norm": 0.09528858214616776, "learning_rate": 6.398908608475807e-06, "loss": 8.5535, "step": 157640 }, { "epoch": 0.7872855751704162, "grad_norm": 0.08856210112571716, "learning_rate": 6.397406693534256e-06, "loss": 8.5532, "step": 157650 }, { "epoch": 0.7873355139953557, "grad_norm": 0.08869683742523193, "learning_rate": 6.3959047785927064e-06, "loss": 8.5521, "step": 157660 }, { "epoch": 0.7873854528202952, "grad_norm": 0.09273868054151535, "learning_rate": 6.394402863651155e-06, "loss": 8.5656, "step": 157670 }, { "epoch": 0.7874353916452346, "grad_norm": 0.09226199239492416, "learning_rate": 6.392900948709605e-06, "loss": 8.5631, "step": 157680 }, { "epoch": 0.787485330470174, "grad_norm": 0.09002158790826797, "learning_rate": 6.3913990337680544e-06, "loss": 8.5679, "step": 157690 }, { "epoch": 0.7875352692951135, "grad_norm": 0.09723229706287384, "learning_rate": 6.389897118826504e-06, "loss": 8.5763, "step": 157700 }, { "epoch": 0.787585208120053, "grad_norm": 0.0909029170870781, "learning_rate": 6.388395203884954e-06, "loss": 8.5659, "step": 157710 }, { "epoch": 0.7876351469449924, "grad_norm": 0.08461030572652817, "learning_rate": 6.386893288943403e-06, "loss": 8.5652, "step": 157720 }, { "epoch": 0.7876850857699318, "grad_norm": 0.0874033197760582, "learning_rate": 6.385391374001853e-06, "loss": 8.553, "step": 157730 }, { "epoch": 0.7877350245948713, "grad_norm": 0.09039891511201859, "learning_rate": 6.383889459060302e-06, "loss": 8.5708, "step": 157740 }, { "epoch": 0.7877849634198107, "grad_norm": 0.09345263242721558, "learning_rate": 6.382387544118751e-06, "loss": 8.5643, "step": 157750 }, { "epoch": 0.7878349022447502, "grad_norm": 0.08719782531261444, "learning_rate": 6.3808856291772014e-06, "loss": 8.5788, "step": 157760 }, { "epoch": 0.7878848410696896, "grad_norm": 0.09255023300647736, "learning_rate": 6.379383714235651e-06, "loss": 8.5487, "step": 157770 }, { "epoch": 0.7879347798946291, "grad_norm": 0.08840210735797882, "learning_rate": 6.3778817992941e-06, "loss": 8.5636, "step": 157780 }, { "epoch": 0.7879847187195685, "grad_norm": 0.08876688033342361, "learning_rate": 6.3763798843525494e-06, "loss": 8.5884, "step": 157790 }, { "epoch": 0.788034657544508, "grad_norm": 0.0919869989156723, "learning_rate": 6.374877969411e-06, "loss": 8.5666, "step": 157800 }, { "epoch": 0.7880845963694474, "grad_norm": 0.08744458109140396, "learning_rate": 6.373376054469449e-06, "loss": 8.5712, "step": 157810 }, { "epoch": 0.7881345351943869, "grad_norm": 0.0894647017121315, "learning_rate": 6.371874139527898e-06, "loss": 8.5758, "step": 157820 }, { "epoch": 0.7881844740193263, "grad_norm": 0.08984792977571487, "learning_rate": 6.370372224586348e-06, "loss": 8.5658, "step": 157830 }, { "epoch": 0.7882344128442658, "grad_norm": 0.08753778785467148, "learning_rate": 6.368870309644797e-06, "loss": 8.5606, "step": 157840 }, { "epoch": 0.7882843516692052, "grad_norm": 0.09227602183818817, "learning_rate": 6.367368394703247e-06, "loss": 8.5656, "step": 157850 }, { "epoch": 0.7883342904941447, "grad_norm": 0.09825395792722702, "learning_rate": 6.3658664797616965e-06, "loss": 8.5529, "step": 157860 }, { "epoch": 0.7883842293190841, "grad_norm": 0.08933484554290771, "learning_rate": 6.364364564820146e-06, "loss": 8.5791, "step": 157870 }, { "epoch": 0.7884341681440236, "grad_norm": 0.09111601114273071, "learning_rate": 6.362862649878596e-06, "loss": 8.5454, "step": 157880 }, { "epoch": 0.788484106968963, "grad_norm": 0.09824613481760025, "learning_rate": 6.3613607349370444e-06, "loss": 8.5477, "step": 157890 }, { "epoch": 0.7885340457939025, "grad_norm": 0.09107904136180878, "learning_rate": 6.359858819995495e-06, "loss": 8.5625, "step": 157900 }, { "epoch": 0.7885839846188419, "grad_norm": 0.08801267296075821, "learning_rate": 6.358356905053944e-06, "loss": 8.5432, "step": 157910 }, { "epoch": 0.7886339234437814, "grad_norm": 0.09051348268985748, "learning_rate": 6.356854990112393e-06, "loss": 8.5402, "step": 157920 }, { "epoch": 0.7886838622687208, "grad_norm": 0.0885823667049408, "learning_rate": 6.3553530751708435e-06, "loss": 8.5557, "step": 157930 }, { "epoch": 0.7887338010936603, "grad_norm": 0.09226511418819427, "learning_rate": 6.353851160229292e-06, "loss": 8.5563, "step": 157940 }, { "epoch": 0.7887837399185997, "grad_norm": 0.09105234593153, "learning_rate": 6.352349245287742e-06, "loss": 8.572, "step": 157950 }, { "epoch": 0.7888336787435392, "grad_norm": 0.08796839416027069, "learning_rate": 6.350847330346192e-06, "loss": 8.5714, "step": 157960 }, { "epoch": 0.7888836175684786, "grad_norm": 0.08775161951780319, "learning_rate": 6.349345415404641e-06, "loss": 8.5559, "step": 157970 }, { "epoch": 0.7889335563934181, "grad_norm": 0.09324196726083755, "learning_rate": 6.347843500463091e-06, "loss": 8.5625, "step": 157980 }, { "epoch": 0.7889834952183575, "grad_norm": 0.08922797441482544, "learning_rate": 6.3463415855215395e-06, "loss": 8.5759, "step": 157990 }, { "epoch": 0.789033434043297, "grad_norm": 0.09467015415430069, "learning_rate": 6.34483967057999e-06, "loss": 8.5574, "step": 158000 }, { "epoch": 0.7890833728682364, "grad_norm": 0.09067735075950623, "learning_rate": 6.34333775563844e-06, "loss": 8.5545, "step": 158010 }, { "epoch": 0.7891333116931759, "grad_norm": 0.09173320233821869, "learning_rate": 6.341835840696888e-06, "loss": 8.5527, "step": 158020 }, { "epoch": 0.7891832505181153, "grad_norm": 0.08913468569517136, "learning_rate": 6.3403339257553385e-06, "loss": 8.567, "step": 158030 }, { "epoch": 0.7892331893430548, "grad_norm": 0.088861383497715, "learning_rate": 6.338832010813787e-06, "loss": 8.5653, "step": 158040 }, { "epoch": 0.7892831281679942, "grad_norm": 0.08768237382173538, "learning_rate": 6.337330095872237e-06, "loss": 8.5644, "step": 158050 }, { "epoch": 0.7893330669929337, "grad_norm": 0.09068477898836136, "learning_rate": 6.335828180930687e-06, "loss": 8.5581, "step": 158060 }, { "epoch": 0.7893830058178731, "grad_norm": 0.09171970933675766, "learning_rate": 6.334326265989136e-06, "loss": 8.5701, "step": 158070 }, { "epoch": 0.7894329446428126, "grad_norm": 0.09282483905553818, "learning_rate": 6.332824351047586e-06, "loss": 8.5821, "step": 158080 }, { "epoch": 0.789482883467752, "grad_norm": 0.09467281401157379, "learning_rate": 6.331322436106035e-06, "loss": 8.5509, "step": 158090 }, { "epoch": 0.7895328222926915, "grad_norm": 0.09098406136035919, "learning_rate": 6.329820521164485e-06, "loss": 8.5573, "step": 158100 }, { "epoch": 0.7895827611176309, "grad_norm": 0.08324146270751953, "learning_rate": 6.328318606222935e-06, "loss": 8.557, "step": 158110 }, { "epoch": 0.7896326999425703, "grad_norm": 0.09208779036998749, "learning_rate": 6.326816691281383e-06, "loss": 8.576, "step": 158120 }, { "epoch": 0.7896826387675098, "grad_norm": 0.0910223051905632, "learning_rate": 6.3253147763398335e-06, "loss": 8.5419, "step": 158130 }, { "epoch": 0.7897325775924493, "grad_norm": 0.0862598568201065, "learning_rate": 6.323812861398283e-06, "loss": 8.5799, "step": 158140 }, { "epoch": 0.7897825164173887, "grad_norm": 0.0855773463845253, "learning_rate": 6.322310946456732e-06, "loss": 8.5602, "step": 158150 }, { "epoch": 0.7898324552423281, "grad_norm": 0.09082646667957306, "learning_rate": 6.320809031515182e-06, "loss": 8.5751, "step": 158160 }, { "epoch": 0.7898823940672676, "grad_norm": 0.09423871338367462, "learning_rate": 6.319307116573632e-06, "loss": 8.5738, "step": 158170 }, { "epoch": 0.7899323328922071, "grad_norm": 0.08389170467853546, "learning_rate": 6.317805201632081e-06, "loss": 8.579, "step": 158180 }, { "epoch": 0.7899822717171465, "grad_norm": 0.08935504406690598, "learning_rate": 6.31630328669053e-06, "loss": 8.5623, "step": 158190 }, { "epoch": 0.7900322105420859, "grad_norm": 0.09032989293336868, "learning_rate": 6.31480137174898e-06, "loss": 8.5733, "step": 158200 }, { "epoch": 0.7900821493670254, "grad_norm": 0.08665549755096436, "learning_rate": 6.31329945680743e-06, "loss": 8.5566, "step": 158210 }, { "epoch": 0.7901320881919648, "grad_norm": 0.0938415452837944, "learning_rate": 6.311797541865879e-06, "loss": 8.5668, "step": 158220 }, { "epoch": 0.7901820270169043, "grad_norm": 0.08663594722747803, "learning_rate": 6.3102956269243285e-06, "loss": 8.5642, "step": 158230 }, { "epoch": 0.7902319658418437, "grad_norm": 0.08731528371572495, "learning_rate": 6.308793711982778e-06, "loss": 8.5803, "step": 158240 }, { "epoch": 0.7902819046667832, "grad_norm": 0.08931776881217957, "learning_rate": 6.307291797041228e-06, "loss": 8.5432, "step": 158250 }, { "epoch": 0.7903318434917226, "grad_norm": 0.09380349516868591, "learning_rate": 6.305789882099677e-06, "loss": 8.5696, "step": 158260 }, { "epoch": 0.7903817823166621, "grad_norm": 0.09893126040697098, "learning_rate": 6.304287967158127e-06, "loss": 8.5641, "step": 158270 }, { "epoch": 0.7904317211416015, "grad_norm": 0.0894632488489151, "learning_rate": 6.302786052216576e-06, "loss": 8.5541, "step": 158280 }, { "epoch": 0.790481659966541, "grad_norm": 0.08917111158370972, "learning_rate": 6.301284137275025e-06, "loss": 8.5575, "step": 158290 }, { "epoch": 0.7905315987914804, "grad_norm": 0.09342500567436218, "learning_rate": 6.2997822223334755e-06, "loss": 8.5596, "step": 158300 }, { "epoch": 0.7905815376164199, "grad_norm": 0.0935325175523758, "learning_rate": 6.298280307391925e-06, "loss": 8.5477, "step": 158310 }, { "epoch": 0.7906314764413593, "grad_norm": 0.09898251295089722, "learning_rate": 6.296778392450374e-06, "loss": 8.5758, "step": 158320 }, { "epoch": 0.7906814152662988, "grad_norm": 0.09187868982553482, "learning_rate": 6.295276477508824e-06, "loss": 8.5561, "step": 158330 }, { "epoch": 0.7907313540912382, "grad_norm": 0.08766231685876846, "learning_rate": 6.293774562567273e-06, "loss": 8.5648, "step": 158340 }, { "epoch": 0.7907812929161777, "grad_norm": 0.09430361539125443, "learning_rate": 6.292272647625723e-06, "loss": 8.5563, "step": 158350 }, { "epoch": 0.7908312317411171, "grad_norm": 0.08926588296890259, "learning_rate": 6.290770732684172e-06, "loss": 8.5654, "step": 158360 }, { "epoch": 0.7908811705660566, "grad_norm": 0.08842246234416962, "learning_rate": 6.289268817742622e-06, "loss": 8.568, "step": 158370 }, { "epoch": 0.790931109390996, "grad_norm": 0.08935972303152084, "learning_rate": 6.287766902801072e-06, "loss": 8.556, "step": 158380 }, { "epoch": 0.7909810482159355, "grad_norm": 0.09105094522237778, "learning_rate": 6.28626498785952e-06, "loss": 8.5633, "step": 158390 }, { "epoch": 0.7910309870408749, "grad_norm": 0.09042556583881378, "learning_rate": 6.2847630729179705e-06, "loss": 8.5534, "step": 158400 }, { "epoch": 0.7910809258658144, "grad_norm": 0.09127331525087357, "learning_rate": 6.283261157976421e-06, "loss": 8.5687, "step": 158410 }, { "epoch": 0.7911308646907538, "grad_norm": 0.09261275827884674, "learning_rate": 6.281759243034869e-06, "loss": 8.5473, "step": 158420 }, { "epoch": 0.7911808035156933, "grad_norm": 0.08710244297981262, "learning_rate": 6.280257328093319e-06, "loss": 8.578, "step": 158430 }, { "epoch": 0.7912307423406327, "grad_norm": 0.10005831718444824, "learning_rate": 6.278755413151769e-06, "loss": 8.5261, "step": 158440 }, { "epoch": 0.7912806811655722, "grad_norm": 0.09465691447257996, "learning_rate": 6.277253498210218e-06, "loss": 8.5627, "step": 158450 }, { "epoch": 0.7913306199905116, "grad_norm": 0.09323757886886597, "learning_rate": 6.275751583268668e-06, "loss": 8.5629, "step": 158460 }, { "epoch": 0.7913805588154511, "grad_norm": 0.09241240471601486, "learning_rate": 6.274249668327117e-06, "loss": 8.564, "step": 158470 }, { "epoch": 0.7914304976403905, "grad_norm": 0.09037698805332184, "learning_rate": 6.272747753385567e-06, "loss": 8.5478, "step": 158480 }, { "epoch": 0.79148043646533, "grad_norm": 0.09861491620540619, "learning_rate": 6.271245838444017e-06, "loss": 8.5623, "step": 158490 }, { "epoch": 0.7915303752902694, "grad_norm": 0.09813782572746277, "learning_rate": 6.2697439235024655e-06, "loss": 8.5839, "step": 158500 }, { "epoch": 0.7915803141152089, "grad_norm": 0.08585695922374725, "learning_rate": 6.268242008560916e-06, "loss": 8.5453, "step": 158510 }, { "epoch": 0.7916302529401483, "grad_norm": 0.09625940024852753, "learning_rate": 6.266740093619364e-06, "loss": 8.5728, "step": 158520 }, { "epoch": 0.7916801917650877, "grad_norm": 0.09219558537006378, "learning_rate": 6.265238178677814e-06, "loss": 8.5559, "step": 158530 }, { "epoch": 0.7917301305900272, "grad_norm": 0.0883294865489006, "learning_rate": 6.2637362637362645e-06, "loss": 8.5629, "step": 158540 }, { "epoch": 0.7917800694149667, "grad_norm": 0.09103575348854065, "learning_rate": 6.262234348794713e-06, "loss": 8.5675, "step": 158550 }, { "epoch": 0.7918300082399061, "grad_norm": 0.09251187741756439, "learning_rate": 6.260732433853163e-06, "loss": 8.5794, "step": 158560 }, { "epoch": 0.7918799470648455, "grad_norm": 0.09562096744775772, "learning_rate": 6.2592305189116125e-06, "loss": 8.5676, "step": 158570 }, { "epoch": 0.791929885889785, "grad_norm": 0.08727453649044037, "learning_rate": 6.257728603970062e-06, "loss": 8.5519, "step": 158580 }, { "epoch": 0.7919798247147245, "grad_norm": 0.08689242601394653, "learning_rate": 6.256226689028512e-06, "loss": 8.561, "step": 158590 }, { "epoch": 0.7920297635396639, "grad_norm": 0.09087180346250534, "learning_rate": 6.2547247740869605e-06, "loss": 8.5644, "step": 158600 }, { "epoch": 0.7920797023646033, "grad_norm": 0.09163371473550797, "learning_rate": 6.253222859145411e-06, "loss": 8.5527, "step": 158610 }, { "epoch": 0.7921296411895428, "grad_norm": 0.0966818630695343, "learning_rate": 6.25172094420386e-06, "loss": 8.5677, "step": 158620 }, { "epoch": 0.7921795800144823, "grad_norm": 0.0895446240901947, "learning_rate": 6.250219029262309e-06, "loss": 8.5491, "step": 158630 }, { "epoch": 0.7922295188394217, "grad_norm": 0.08966691046953201, "learning_rate": 6.2487171143207595e-06, "loss": 8.5715, "step": 158640 }, { "epoch": 0.7922794576643611, "grad_norm": 0.09029648452997208, "learning_rate": 6.247215199379209e-06, "loss": 8.5655, "step": 158650 }, { "epoch": 0.7923293964893006, "grad_norm": 0.09223111718893051, "learning_rate": 6.245713284437658e-06, "loss": 8.5537, "step": 158660 }, { "epoch": 0.7923793353142401, "grad_norm": 0.09418982267379761, "learning_rate": 6.2442113694961075e-06, "loss": 8.5509, "step": 158670 }, { "epoch": 0.7924292741391795, "grad_norm": 0.09183704107999802, "learning_rate": 6.242709454554557e-06, "loss": 8.5811, "step": 158680 }, { "epoch": 0.7924792129641189, "grad_norm": 0.09103389829397202, "learning_rate": 6.241207539613007e-06, "loss": 8.5531, "step": 158690 }, { "epoch": 0.7925291517890584, "grad_norm": 0.09130579978227615, "learning_rate": 6.239705624671456e-06, "loss": 8.5477, "step": 158700 }, { "epoch": 0.7925790906139979, "grad_norm": 0.0948038399219513, "learning_rate": 6.238203709729906e-06, "loss": 8.5581, "step": 158710 }, { "epoch": 0.7926290294389373, "grad_norm": 0.08801276236772537, "learning_rate": 6.236701794788355e-06, "loss": 8.5652, "step": 158720 }, { "epoch": 0.7926789682638767, "grad_norm": 0.09371495991945267, "learning_rate": 6.235199879846805e-06, "loss": 8.54, "step": 158730 }, { "epoch": 0.7927289070888162, "grad_norm": 0.09850714355707169, "learning_rate": 6.2336979649052546e-06, "loss": 8.5497, "step": 158740 }, { "epoch": 0.7927788459137557, "grad_norm": 0.08900289982557297, "learning_rate": 6.232196049963704e-06, "loss": 8.5674, "step": 158750 }, { "epoch": 0.7928287847386951, "grad_norm": 0.09315358847379684, "learning_rate": 6.230694135022153e-06, "loss": 8.566, "step": 158760 }, { "epoch": 0.7928787235636345, "grad_norm": 0.09045889228582382, "learning_rate": 6.2291922200806025e-06, "loss": 8.5793, "step": 158770 }, { "epoch": 0.792928662388574, "grad_norm": 0.0957542434334755, "learning_rate": 6.227690305139053e-06, "loss": 8.5479, "step": 158780 }, { "epoch": 0.7929786012135135, "grad_norm": 0.08995579183101654, "learning_rate": 6.226188390197502e-06, "loss": 8.5627, "step": 158790 }, { "epoch": 0.7930285400384529, "grad_norm": 0.09000235050916672, "learning_rate": 6.224686475255951e-06, "loss": 8.5479, "step": 158800 }, { "epoch": 0.7930784788633923, "grad_norm": 0.08959494531154633, "learning_rate": 6.2231845603144016e-06, "loss": 8.5524, "step": 158810 }, { "epoch": 0.7931284176883318, "grad_norm": 0.0925586149096489, "learning_rate": 6.22168264537285e-06, "loss": 8.566, "step": 158820 }, { "epoch": 0.7931783565132713, "grad_norm": 0.08586516976356506, "learning_rate": 6.2201807304313e-06, "loss": 8.5513, "step": 158830 }, { "epoch": 0.7932282953382107, "grad_norm": 0.0888560339808464, "learning_rate": 6.2186788154897496e-06, "loss": 8.5655, "step": 158840 }, { "epoch": 0.7932782341631501, "grad_norm": 0.08735716342926025, "learning_rate": 6.217176900548199e-06, "loss": 8.5603, "step": 158850 }, { "epoch": 0.7933281729880896, "grad_norm": 0.08948913961648941, "learning_rate": 6.215674985606649e-06, "loss": 8.5602, "step": 158860 }, { "epoch": 0.7933781118130291, "grad_norm": 0.09545059502124786, "learning_rate": 6.2141730706650976e-06, "loss": 8.5611, "step": 158870 }, { "epoch": 0.7934280506379685, "grad_norm": 0.08870154619216919, "learning_rate": 6.212671155723548e-06, "loss": 8.5613, "step": 158880 }, { "epoch": 0.7934779894629079, "grad_norm": 0.08690696954727173, "learning_rate": 6.211169240781998e-06, "loss": 8.5648, "step": 158890 }, { "epoch": 0.7935279282878474, "grad_norm": 0.09118922054767609, "learning_rate": 6.209667325840446e-06, "loss": 8.5562, "step": 158900 }, { "epoch": 0.7935778671127869, "grad_norm": 0.09019847214221954, "learning_rate": 6.2081654108988966e-06, "loss": 8.5698, "step": 158910 }, { "epoch": 0.7936278059377263, "grad_norm": 0.09406841546297073, "learning_rate": 6.206663495957345e-06, "loss": 8.5572, "step": 158920 }, { "epoch": 0.7936777447626657, "grad_norm": 0.09367325156927109, "learning_rate": 6.205161581015795e-06, "loss": 8.5657, "step": 158930 }, { "epoch": 0.7937276835876051, "grad_norm": 0.09416260570287704, "learning_rate": 6.203659666074245e-06, "loss": 8.5811, "step": 158940 }, { "epoch": 0.7937776224125447, "grad_norm": 0.09471859037876129, "learning_rate": 6.202157751132694e-06, "loss": 8.5597, "step": 158950 }, { "epoch": 0.7938275612374841, "grad_norm": 0.09237445890903473, "learning_rate": 6.200655836191144e-06, "loss": 8.5647, "step": 158960 }, { "epoch": 0.7938775000624235, "grad_norm": 0.09338158369064331, "learning_rate": 6.199153921249593e-06, "loss": 8.5622, "step": 158970 }, { "epoch": 0.793927438887363, "grad_norm": 0.08678948134183884, "learning_rate": 6.197652006308043e-06, "loss": 8.5693, "step": 158980 }, { "epoch": 0.7939773777123025, "grad_norm": 0.09783153980970383, "learning_rate": 6.196150091366493e-06, "loss": 8.5626, "step": 158990 }, { "epoch": 0.7940273165372419, "grad_norm": 0.09704387933015823, "learning_rate": 6.194648176424941e-06, "loss": 8.5705, "step": 159000 }, { "epoch": 0.7940772553621813, "grad_norm": 0.09087003022432327, "learning_rate": 6.193146261483392e-06, "loss": 8.5679, "step": 159010 }, { "epoch": 0.7941271941871207, "grad_norm": 0.09224209189414978, "learning_rate": 6.191644346541841e-06, "loss": 8.5447, "step": 159020 }, { "epoch": 0.7941771330120603, "grad_norm": 0.08932413905858994, "learning_rate": 6.19014243160029e-06, "loss": 8.5509, "step": 159030 }, { "epoch": 0.7942270718369997, "grad_norm": 0.09286679327487946, "learning_rate": 6.18864051665874e-06, "loss": 8.5673, "step": 159040 }, { "epoch": 0.7942770106619391, "grad_norm": 0.09589332342147827, "learning_rate": 6.18713860171719e-06, "loss": 8.5603, "step": 159050 }, { "epoch": 0.7943269494868785, "grad_norm": 0.10077871382236481, "learning_rate": 6.185636686775639e-06, "loss": 8.5626, "step": 159060 }, { "epoch": 0.7943768883118181, "grad_norm": 0.0931212455034256, "learning_rate": 6.184134771834088e-06, "loss": 8.5467, "step": 159070 }, { "epoch": 0.7944268271367575, "grad_norm": 0.09118203818798065, "learning_rate": 6.182632856892538e-06, "loss": 8.5558, "step": 159080 }, { "epoch": 0.7944767659616969, "grad_norm": 0.08937732130289078, "learning_rate": 6.181130941950988e-06, "loss": 8.56, "step": 159090 }, { "epoch": 0.7945267047866363, "grad_norm": 0.09185148030519485, "learning_rate": 6.179629027009437e-06, "loss": 8.5669, "step": 159100 }, { "epoch": 0.7945766436115759, "grad_norm": 0.08831166476011276, "learning_rate": 6.178127112067887e-06, "loss": 8.5614, "step": 159110 }, { "epoch": 0.7946265824365153, "grad_norm": 0.08398475497961044, "learning_rate": 6.176625197126336e-06, "loss": 8.5808, "step": 159120 }, { "epoch": 0.7946765212614547, "grad_norm": 0.08763767033815384, "learning_rate": 6.175123282184786e-06, "loss": 8.567, "step": 159130 }, { "epoch": 0.7947264600863941, "grad_norm": 0.10390178859233856, "learning_rate": 6.1736213672432354e-06, "loss": 8.542, "step": 159140 }, { "epoch": 0.7947763989113337, "grad_norm": 0.08661069720983505, "learning_rate": 6.172119452301685e-06, "loss": 8.5623, "step": 159150 }, { "epoch": 0.7948263377362731, "grad_norm": 0.0880892425775528, "learning_rate": 6.170617537360134e-06, "loss": 8.5611, "step": 159160 }, { "epoch": 0.7948762765612125, "grad_norm": 0.0939859077334404, "learning_rate": 6.169115622418583e-06, "loss": 8.5483, "step": 159170 }, { "epoch": 0.7949262153861519, "grad_norm": 0.09007950127124786, "learning_rate": 6.167613707477034e-06, "loss": 8.5587, "step": 159180 }, { "epoch": 0.7949761542110915, "grad_norm": 0.09516777098178864, "learning_rate": 6.166111792535483e-06, "loss": 8.574, "step": 159190 }, { "epoch": 0.7950260930360309, "grad_norm": 0.08972634375095367, "learning_rate": 6.164609877593932e-06, "loss": 8.5578, "step": 159200 }, { "epoch": 0.7950760318609703, "grad_norm": 0.09445880353450775, "learning_rate": 6.1631079626523824e-06, "loss": 8.5576, "step": 159210 }, { "epoch": 0.7951259706859097, "grad_norm": 0.08887441456317902, "learning_rate": 6.161606047710831e-06, "loss": 8.5647, "step": 159220 }, { "epoch": 0.7951759095108492, "grad_norm": 0.09141810983419418, "learning_rate": 6.160104132769281e-06, "loss": 8.5579, "step": 159230 }, { "epoch": 0.7952258483357887, "grad_norm": 0.0876648873090744, "learning_rate": 6.1586022178277304e-06, "loss": 8.5538, "step": 159240 }, { "epoch": 0.7952757871607281, "grad_norm": 0.08809340745210648, "learning_rate": 6.15710030288618e-06, "loss": 8.5455, "step": 159250 }, { "epoch": 0.7953257259856675, "grad_norm": 0.09779562056064606, "learning_rate": 6.15559838794463e-06, "loss": 8.5439, "step": 159260 }, { "epoch": 0.795375664810607, "grad_norm": 0.08831503242254257, "learning_rate": 6.1540964730030784e-06, "loss": 8.5677, "step": 159270 }, { "epoch": 0.7954256036355465, "grad_norm": 0.09317151457071304, "learning_rate": 6.152594558061529e-06, "loss": 8.551, "step": 159280 }, { "epoch": 0.7954755424604859, "grad_norm": 0.08679518103599548, "learning_rate": 6.151092643119979e-06, "loss": 8.577, "step": 159290 }, { "epoch": 0.7955254812854253, "grad_norm": 0.09124913066625595, "learning_rate": 6.149590728178427e-06, "loss": 8.5719, "step": 159300 }, { "epoch": 0.7955754201103648, "grad_norm": 0.08849890530109406, "learning_rate": 6.1480888132368774e-06, "loss": 8.5562, "step": 159310 }, { "epoch": 0.7956253589353043, "grad_norm": 0.09584712982177734, "learning_rate": 6.146586898295326e-06, "loss": 8.5612, "step": 159320 }, { "epoch": 0.7956752977602437, "grad_norm": 0.09756402671337128, "learning_rate": 6.145084983353776e-06, "loss": 8.5604, "step": 159330 }, { "epoch": 0.7957252365851831, "grad_norm": 0.09176575392484665, "learning_rate": 6.143583068412226e-06, "loss": 8.5578, "step": 159340 }, { "epoch": 0.7957751754101225, "grad_norm": 0.09086544811725616, "learning_rate": 6.142081153470675e-06, "loss": 8.5625, "step": 159350 }, { "epoch": 0.7958251142350621, "grad_norm": 0.09206107258796692, "learning_rate": 6.140579238529125e-06, "loss": 8.5577, "step": 159360 }, { "epoch": 0.7958750530600015, "grad_norm": 0.09471509605646133, "learning_rate": 6.139077323587574e-06, "loss": 8.5724, "step": 159370 }, { "epoch": 0.7959249918849409, "grad_norm": 0.09322207421064377, "learning_rate": 6.137575408646024e-06, "loss": 8.5488, "step": 159380 }, { "epoch": 0.7959749307098803, "grad_norm": 0.09167961776256561, "learning_rate": 6.136073493704474e-06, "loss": 8.5676, "step": 159390 }, { "epoch": 0.7960248695348199, "grad_norm": 0.09485690295696259, "learning_rate": 6.134571578762922e-06, "loss": 8.5436, "step": 159400 }, { "epoch": 0.7960748083597593, "grad_norm": 0.08992945402860641, "learning_rate": 6.1330696638213725e-06, "loss": 8.5585, "step": 159410 }, { "epoch": 0.7961247471846987, "grad_norm": 0.08964666724205017, "learning_rate": 6.131567748879822e-06, "loss": 8.5553, "step": 159420 }, { "epoch": 0.7961746860096381, "grad_norm": 0.09216075390577316, "learning_rate": 6.130065833938271e-06, "loss": 8.5633, "step": 159430 }, { "epoch": 0.7962246248345777, "grad_norm": 0.08898287266492844, "learning_rate": 6.128563918996721e-06, "loss": 8.5607, "step": 159440 }, { "epoch": 0.7962745636595171, "grad_norm": 0.08977809548377991, "learning_rate": 6.127062004055171e-06, "loss": 8.5637, "step": 159450 }, { "epoch": 0.7963245024844565, "grad_norm": 0.08928190171718597, "learning_rate": 6.12556008911362e-06, "loss": 8.5715, "step": 159460 }, { "epoch": 0.7963744413093959, "grad_norm": 0.09225127846002579, "learning_rate": 6.124058174172069e-06, "loss": 8.5857, "step": 159470 }, { "epoch": 0.7964243801343355, "grad_norm": 0.09755774587392807, "learning_rate": 6.122556259230519e-06, "loss": 8.5431, "step": 159480 }, { "epoch": 0.7964743189592749, "grad_norm": 0.09840092062950134, "learning_rate": 6.121054344288969e-06, "loss": 8.5594, "step": 159490 }, { "epoch": 0.7965242577842143, "grad_norm": 0.09154287725687027, "learning_rate": 6.119552429347418e-06, "loss": 8.5609, "step": 159500 }, { "epoch": 0.7965741966091537, "grad_norm": 0.08567217737436295, "learning_rate": 6.1180505144058675e-06, "loss": 8.5555, "step": 159510 }, { "epoch": 0.7966241354340933, "grad_norm": 0.08857017010450363, "learning_rate": 6.116548599464317e-06, "loss": 8.5614, "step": 159520 }, { "epoch": 0.7966740742590327, "grad_norm": 0.08761531114578247, "learning_rate": 6.115046684522767e-06, "loss": 8.5644, "step": 159530 }, { "epoch": 0.7967240130839721, "grad_norm": 0.0912371501326561, "learning_rate": 6.113544769581216e-06, "loss": 8.5736, "step": 159540 }, { "epoch": 0.7967739519089115, "grad_norm": 0.09295840561389923, "learning_rate": 6.112042854639666e-06, "loss": 8.5424, "step": 159550 }, { "epoch": 0.7968238907338511, "grad_norm": 0.09031003713607788, "learning_rate": 6.110540939698115e-06, "loss": 8.554, "step": 159560 }, { "epoch": 0.7968738295587905, "grad_norm": 0.0904209241271019, "learning_rate": 6.109039024756565e-06, "loss": 8.5746, "step": 159570 }, { "epoch": 0.7969237683837299, "grad_norm": 0.08807667344808578, "learning_rate": 6.1075371098150145e-06, "loss": 8.5526, "step": 159580 }, { "epoch": 0.7969737072086693, "grad_norm": 0.09461508691310883, "learning_rate": 6.106035194873464e-06, "loss": 8.557, "step": 159590 }, { "epoch": 0.7970236460336089, "grad_norm": 0.08692062646150589, "learning_rate": 6.104533279931913e-06, "loss": 8.5529, "step": 159600 }, { "epoch": 0.7970735848585483, "grad_norm": 0.09569226950407028, "learning_rate": 6.103031364990363e-06, "loss": 8.5602, "step": 159610 }, { "epoch": 0.7971235236834877, "grad_norm": 0.09210385382175446, "learning_rate": 6.101529450048813e-06, "loss": 8.5649, "step": 159620 }, { "epoch": 0.7971734625084271, "grad_norm": 0.09162899106740952, "learning_rate": 6.100027535107262e-06, "loss": 8.5509, "step": 159630 }, { "epoch": 0.7972234013333667, "grad_norm": 0.09765185415744781, "learning_rate": 6.098525620165711e-06, "loss": 8.5527, "step": 159640 }, { "epoch": 0.7972733401583061, "grad_norm": 0.09412235766649246, "learning_rate": 6.097023705224161e-06, "loss": 8.5699, "step": 159650 }, { "epoch": 0.7973232789832455, "grad_norm": 0.09401925653219223, "learning_rate": 6.095521790282611e-06, "loss": 8.5554, "step": 159660 }, { "epoch": 0.7973732178081849, "grad_norm": 0.09170075505971909, "learning_rate": 6.09401987534106e-06, "loss": 8.5666, "step": 159670 }, { "epoch": 0.7974231566331245, "grad_norm": 0.0880158469080925, "learning_rate": 6.0925179603995095e-06, "loss": 8.554, "step": 159680 }, { "epoch": 0.7974730954580639, "grad_norm": 0.09159617871046066, "learning_rate": 6.09101604545796e-06, "loss": 8.5581, "step": 159690 }, { "epoch": 0.7975230342830033, "grad_norm": 0.09276486188173294, "learning_rate": 6.089514130516408e-06, "loss": 8.5436, "step": 159700 }, { "epoch": 0.7975729731079427, "grad_norm": 0.08525187522172928, "learning_rate": 6.088012215574858e-06, "loss": 8.5508, "step": 159710 }, { "epoch": 0.7976229119328823, "grad_norm": 0.08781725913286209, "learning_rate": 6.086510300633308e-06, "loss": 8.5792, "step": 159720 }, { "epoch": 0.7976728507578217, "grad_norm": 0.09643100947141647, "learning_rate": 6.085008385691757e-06, "loss": 8.5551, "step": 159730 }, { "epoch": 0.7977227895827611, "grad_norm": 0.08715363591909409, "learning_rate": 6.083506470750207e-06, "loss": 8.5656, "step": 159740 }, { "epoch": 0.7977727284077005, "grad_norm": 0.09139413386583328, "learning_rate": 6.082004555808656e-06, "loss": 8.5401, "step": 159750 }, { "epoch": 0.7978226672326401, "grad_norm": 0.09375610202550888, "learning_rate": 6.080502640867106e-06, "loss": 8.5596, "step": 159760 }, { "epoch": 0.7978726060575795, "grad_norm": 0.09116285294294357, "learning_rate": 6.079000725925556e-06, "loss": 8.5359, "step": 159770 }, { "epoch": 0.7979225448825189, "grad_norm": 0.08968761563301086, "learning_rate": 6.0774988109840045e-06, "loss": 8.5511, "step": 159780 }, { "epoch": 0.7979724837074583, "grad_norm": 0.09019223600625992, "learning_rate": 6.075996896042455e-06, "loss": 8.5646, "step": 159790 }, { "epoch": 0.7980224225323979, "grad_norm": 0.09245903789997101, "learning_rate": 6.074494981100903e-06, "loss": 8.5769, "step": 159800 }, { "epoch": 0.7980723613573373, "grad_norm": 0.08857943117618561, "learning_rate": 6.072993066159353e-06, "loss": 8.5494, "step": 159810 }, { "epoch": 0.7981223001822767, "grad_norm": 0.09926991164684296, "learning_rate": 6.0714911512178035e-06, "loss": 8.569, "step": 159820 }, { "epoch": 0.7981722390072161, "grad_norm": 0.09205451607704163, "learning_rate": 6.069989236276252e-06, "loss": 8.5501, "step": 159830 }, { "epoch": 0.7982221778321557, "grad_norm": 0.09034121781587601, "learning_rate": 6.068487321334702e-06, "loss": 8.5465, "step": 159840 }, { "epoch": 0.7982721166570951, "grad_norm": 0.08652927726507187, "learning_rate": 6.0669854063931515e-06, "loss": 8.5484, "step": 159850 }, { "epoch": 0.7983220554820345, "grad_norm": 0.09696269780397415, "learning_rate": 6.065483491451601e-06, "loss": 8.5509, "step": 159860 }, { "epoch": 0.7983719943069739, "grad_norm": 0.09118789434432983, "learning_rate": 6.063981576510051e-06, "loss": 8.5379, "step": 159870 }, { "epoch": 0.7984219331319135, "grad_norm": 0.08937663584947586, "learning_rate": 6.0624796615684995e-06, "loss": 8.5392, "step": 159880 }, { "epoch": 0.7984718719568529, "grad_norm": 0.08873353898525238, "learning_rate": 6.06097774662695e-06, "loss": 8.5635, "step": 159890 }, { "epoch": 0.7985218107817923, "grad_norm": 0.0940752923488617, "learning_rate": 6.059475831685399e-06, "loss": 8.554, "step": 159900 }, { "epoch": 0.7985717496067317, "grad_norm": 0.09297428280115128, "learning_rate": 6.057973916743848e-06, "loss": 8.5504, "step": 159910 }, { "epoch": 0.7986216884316713, "grad_norm": 0.09415644407272339, "learning_rate": 6.0564720018022985e-06, "loss": 8.5671, "step": 159920 }, { "epoch": 0.7986716272566107, "grad_norm": 0.08840908855199814, "learning_rate": 6.054970086860748e-06, "loss": 8.55, "step": 159930 }, { "epoch": 0.7987215660815501, "grad_norm": 0.0910872295498848, "learning_rate": 6.053468171919197e-06, "loss": 8.5604, "step": 159940 }, { "epoch": 0.7987715049064895, "grad_norm": 0.09535114467144012, "learning_rate": 6.0519662569776465e-06, "loss": 8.5668, "step": 159950 }, { "epoch": 0.798821443731429, "grad_norm": 0.0967821255326271, "learning_rate": 6.050464342036096e-06, "loss": 8.5682, "step": 159960 }, { "epoch": 0.7988713825563685, "grad_norm": 0.0884976014494896, "learning_rate": 6.048962427094546e-06, "loss": 8.5515, "step": 159970 }, { "epoch": 0.7989213213813079, "grad_norm": 0.09149619191884995, "learning_rate": 6.047460512152995e-06, "loss": 8.5623, "step": 159980 }, { "epoch": 0.7989712602062473, "grad_norm": 0.08907512575387955, "learning_rate": 6.045958597211445e-06, "loss": 8.558, "step": 159990 }, { "epoch": 0.7990211990311868, "grad_norm": 0.0927252545952797, "learning_rate": 6.044456682269894e-06, "loss": 8.5367, "step": 160000 }, { "epoch": 0.7990711378561263, "grad_norm": 0.09032353013753891, "learning_rate": 6.042954767328344e-06, "loss": 8.5554, "step": 160010 }, { "epoch": 0.7991210766810657, "grad_norm": 0.09220920503139496, "learning_rate": 6.0414528523867935e-06, "loss": 8.5655, "step": 160020 }, { "epoch": 0.7991710155060051, "grad_norm": 0.09536270052194595, "learning_rate": 6.039950937445243e-06, "loss": 8.5547, "step": 160030 }, { "epoch": 0.7992209543309446, "grad_norm": 0.08931788057088852, "learning_rate": 6.038449022503692e-06, "loss": 8.5624, "step": 160040 }, { "epoch": 0.7992708931558841, "grad_norm": 0.09477946907281876, "learning_rate": 6.0369471075621415e-06, "loss": 8.5688, "step": 160050 }, { "epoch": 0.7993208319808235, "grad_norm": 0.09368328005075455, "learning_rate": 6.035445192620592e-06, "loss": 8.5624, "step": 160060 }, { "epoch": 0.7993707708057629, "grad_norm": 0.09561215341091156, "learning_rate": 6.033943277679041e-06, "loss": 8.557, "step": 160070 }, { "epoch": 0.7994207096307024, "grad_norm": 0.08733969926834106, "learning_rate": 6.03244136273749e-06, "loss": 8.5325, "step": 160080 }, { "epoch": 0.7994706484556419, "grad_norm": 0.09188752621412277, "learning_rate": 6.0309394477959405e-06, "loss": 8.5459, "step": 160090 }, { "epoch": 0.7995205872805813, "grad_norm": 0.09306297451257706, "learning_rate": 6.029437532854389e-06, "loss": 8.5527, "step": 160100 }, { "epoch": 0.7995705261055207, "grad_norm": 0.09068245440721512, "learning_rate": 6.027935617912839e-06, "loss": 8.5579, "step": 160110 }, { "epoch": 0.7996204649304602, "grad_norm": 0.09240369498729706, "learning_rate": 6.0264337029712885e-06, "loss": 8.5459, "step": 160120 }, { "epoch": 0.7996704037553997, "grad_norm": 0.08835700154304504, "learning_rate": 6.024931788029738e-06, "loss": 8.552, "step": 160130 }, { "epoch": 0.7997203425803391, "grad_norm": 0.08920862525701523, "learning_rate": 6.023429873088188e-06, "loss": 8.5368, "step": 160140 }, { "epoch": 0.7997702814052785, "grad_norm": 0.09194939583539963, "learning_rate": 6.0219279581466365e-06, "loss": 8.5641, "step": 160150 }, { "epoch": 0.799820220230218, "grad_norm": 0.09264218807220459, "learning_rate": 6.020426043205087e-06, "loss": 8.5531, "step": 160160 }, { "epoch": 0.7998701590551575, "grad_norm": 0.08757354319095612, "learning_rate": 6.018924128263537e-06, "loss": 8.5538, "step": 160170 }, { "epoch": 0.7999200978800969, "grad_norm": 0.09101276844739914, "learning_rate": 6.017422213321985e-06, "loss": 8.5525, "step": 160180 }, { "epoch": 0.7999700367050363, "grad_norm": 0.09623536467552185, "learning_rate": 6.0159202983804355e-06, "loss": 8.5407, "step": 160190 }, { "epoch": 0.8000199755299757, "grad_norm": 0.09045930206775665, "learning_rate": 6.014418383438884e-06, "loss": 8.5333, "step": 160200 }, { "epoch": 0.8000699143549153, "grad_norm": 0.08761759847402573, "learning_rate": 6.012916468497334e-06, "loss": 8.552, "step": 160210 }, { "epoch": 0.8001198531798547, "grad_norm": 0.09209952503442764, "learning_rate": 6.011414553555784e-06, "loss": 8.5461, "step": 160220 }, { "epoch": 0.8001697920047941, "grad_norm": 0.08897832036018372, "learning_rate": 6.009912638614233e-06, "loss": 8.5743, "step": 160230 }, { "epoch": 0.8002197308297335, "grad_norm": 0.09238340705633163, "learning_rate": 6.008410723672683e-06, "loss": 8.5463, "step": 160240 }, { "epoch": 0.800269669654673, "grad_norm": 0.09207829087972641, "learning_rate": 6.006908808731132e-06, "loss": 8.5396, "step": 160250 }, { "epoch": 0.8003196084796125, "grad_norm": 0.0933920294046402, "learning_rate": 6.005406893789582e-06, "loss": 8.5405, "step": 160260 }, { "epoch": 0.8003695473045519, "grad_norm": 0.09098160266876221, "learning_rate": 6.003904978848032e-06, "loss": 8.548, "step": 160270 }, { "epoch": 0.8004194861294913, "grad_norm": 0.0946149155497551, "learning_rate": 6.00240306390648e-06, "loss": 8.5488, "step": 160280 }, { "epoch": 0.8004694249544309, "grad_norm": 0.09164818376302719, "learning_rate": 6.0009011489649306e-06, "loss": 8.5535, "step": 160290 }, { "epoch": 0.8005193637793703, "grad_norm": 0.09267935901880264, "learning_rate": 5.99939923402338e-06, "loss": 8.571, "step": 160300 }, { "epoch": 0.8005693026043097, "grad_norm": 0.0906684473156929, "learning_rate": 5.997897319081829e-06, "loss": 8.5541, "step": 160310 }, { "epoch": 0.8006192414292491, "grad_norm": 0.09602951258420944, "learning_rate": 5.996395404140279e-06, "loss": 8.5577, "step": 160320 }, { "epoch": 0.8006691802541887, "grad_norm": 0.09331289678812027, "learning_rate": 5.994893489198729e-06, "loss": 8.5534, "step": 160330 }, { "epoch": 0.8007191190791281, "grad_norm": 0.0883229672908783, "learning_rate": 5.993391574257178e-06, "loss": 8.576, "step": 160340 }, { "epoch": 0.8007690579040675, "grad_norm": 0.09256965667009354, "learning_rate": 5.991889659315627e-06, "loss": 8.5416, "step": 160350 }, { "epoch": 0.8008189967290069, "grad_norm": 0.09165097028017044, "learning_rate": 5.990387744374077e-06, "loss": 8.5723, "step": 160360 }, { "epoch": 0.8008689355539464, "grad_norm": 0.09180103242397308, "learning_rate": 5.988885829432527e-06, "loss": 8.5441, "step": 160370 }, { "epoch": 0.8009188743788859, "grad_norm": 0.09208500385284424, "learning_rate": 5.987383914490976e-06, "loss": 8.5447, "step": 160380 }, { "epoch": 0.8009688132038253, "grad_norm": 0.09202751517295837, "learning_rate": 5.9858819995494256e-06, "loss": 8.5482, "step": 160390 }, { "epoch": 0.8010187520287647, "grad_norm": 0.09229252487421036, "learning_rate": 5.984380084607875e-06, "loss": 8.5692, "step": 160400 }, { "epoch": 0.8010686908537042, "grad_norm": 0.09639665484428406, "learning_rate": 5.982878169666325e-06, "loss": 8.5544, "step": 160410 }, { "epoch": 0.8011186296786437, "grad_norm": 0.08971371501684189, "learning_rate": 5.981376254724774e-06, "loss": 8.5706, "step": 160420 }, { "epoch": 0.8011685685035831, "grad_norm": 0.08654212206602097, "learning_rate": 5.979874339783224e-06, "loss": 8.5672, "step": 160430 }, { "epoch": 0.8012185073285225, "grad_norm": 0.09568259119987488, "learning_rate": 5.978372424841673e-06, "loss": 8.5543, "step": 160440 }, { "epoch": 0.801268446153462, "grad_norm": 0.08771758526563644, "learning_rate": 5.976870509900122e-06, "loss": 8.5576, "step": 160450 }, { "epoch": 0.8013183849784015, "grad_norm": 0.0924898236989975, "learning_rate": 5.9753685949585726e-06, "loss": 8.5352, "step": 160460 }, { "epoch": 0.8013683238033409, "grad_norm": 0.08928176760673523, "learning_rate": 5.973866680017022e-06, "loss": 8.5789, "step": 160470 }, { "epoch": 0.8014182626282803, "grad_norm": 0.08957140892744064, "learning_rate": 5.972364765075471e-06, "loss": 8.5642, "step": 160480 }, { "epoch": 0.8014682014532198, "grad_norm": 0.0887003242969513, "learning_rate": 5.970862850133921e-06, "loss": 8.5642, "step": 160490 }, { "epoch": 0.8015181402781593, "grad_norm": 0.09041450172662735, "learning_rate": 5.96936093519237e-06, "loss": 8.5659, "step": 160500 }, { "epoch": 0.8015680791030987, "grad_norm": 0.08909320086240768, "learning_rate": 5.96785902025082e-06, "loss": 8.5378, "step": 160510 }, { "epoch": 0.8016180179280381, "grad_norm": 0.09449964016675949, "learning_rate": 5.966357105309269e-06, "loss": 8.5644, "step": 160520 }, { "epoch": 0.8016679567529776, "grad_norm": 0.0875280573964119, "learning_rate": 5.964855190367719e-06, "loss": 8.5552, "step": 160530 }, { "epoch": 0.8017178955779171, "grad_norm": 0.09800990670919418, "learning_rate": 5.963353275426169e-06, "loss": 8.5692, "step": 160540 }, { "epoch": 0.8017678344028565, "grad_norm": 0.10497017949819565, "learning_rate": 5.961851360484617e-06, "loss": 8.5514, "step": 160550 }, { "epoch": 0.8018177732277959, "grad_norm": 0.09578979760408401, "learning_rate": 5.960349445543068e-06, "loss": 8.5585, "step": 160560 }, { "epoch": 0.8018677120527354, "grad_norm": 0.0902516171336174, "learning_rate": 5.958847530601518e-06, "loss": 8.5426, "step": 160570 }, { "epoch": 0.8019176508776749, "grad_norm": 0.087619349360466, "learning_rate": 5.957345615659966e-06, "loss": 8.5467, "step": 160580 }, { "epoch": 0.8019675897026143, "grad_norm": 0.08661577105522156, "learning_rate": 5.955843700718416e-06, "loss": 8.5649, "step": 160590 }, { "epoch": 0.8020175285275537, "grad_norm": 0.0935804471373558, "learning_rate": 5.954341785776866e-06, "loss": 8.558, "step": 160600 }, { "epoch": 0.8020674673524932, "grad_norm": 0.08383012562990189, "learning_rate": 5.952839870835315e-06, "loss": 8.5579, "step": 160610 }, { "epoch": 0.8021174061774327, "grad_norm": 0.09680390357971191, "learning_rate": 5.951337955893765e-06, "loss": 8.5733, "step": 160620 }, { "epoch": 0.8021673450023721, "grad_norm": 0.0891953781247139, "learning_rate": 5.949836040952214e-06, "loss": 8.5736, "step": 160630 }, { "epoch": 0.8022172838273115, "grad_norm": 0.0887405052781105, "learning_rate": 5.948334126010664e-06, "loss": 8.5685, "step": 160640 }, { "epoch": 0.802267222652251, "grad_norm": 0.092610664665699, "learning_rate": 5.946832211069113e-06, "loss": 8.5667, "step": 160650 }, { "epoch": 0.8023171614771905, "grad_norm": 0.08894860744476318, "learning_rate": 5.945330296127563e-06, "loss": 8.5653, "step": 160660 }, { "epoch": 0.8023671003021299, "grad_norm": 0.09235876053571701, "learning_rate": 5.943828381186013e-06, "loss": 8.5525, "step": 160670 }, { "epoch": 0.8024170391270693, "grad_norm": 0.0946565493941307, "learning_rate": 5.942326466244461e-06, "loss": 8.5589, "step": 160680 }, { "epoch": 0.8024669779520088, "grad_norm": 0.08634993433952332, "learning_rate": 5.9408245513029114e-06, "loss": 8.5665, "step": 160690 }, { "epoch": 0.8025169167769483, "grad_norm": 0.08891087025403976, "learning_rate": 5.939322636361362e-06, "loss": 8.5576, "step": 160700 }, { "epoch": 0.8025668556018877, "grad_norm": 0.09388010948896408, "learning_rate": 5.93782072141981e-06, "loss": 8.5506, "step": 160710 }, { "epoch": 0.8026167944268271, "grad_norm": 0.09189348667860031, "learning_rate": 5.93631880647826e-06, "loss": 8.554, "step": 160720 }, { "epoch": 0.8026667332517666, "grad_norm": 0.08890991657972336, "learning_rate": 5.93481689153671e-06, "loss": 8.5491, "step": 160730 }, { "epoch": 0.802716672076706, "grad_norm": 0.0901801586151123, "learning_rate": 5.933314976595159e-06, "loss": 8.5757, "step": 160740 }, { "epoch": 0.8027666109016455, "grad_norm": 0.09369862824678421, "learning_rate": 5.931813061653609e-06, "loss": 8.5404, "step": 160750 }, { "epoch": 0.8028165497265849, "grad_norm": 0.09540555626153946, "learning_rate": 5.930311146712058e-06, "loss": 8.552, "step": 160760 }, { "epoch": 0.8028664885515244, "grad_norm": 0.09585050493478775, "learning_rate": 5.928809231770508e-06, "loss": 8.5557, "step": 160770 }, { "epoch": 0.8029164273764638, "grad_norm": 0.088337741792202, "learning_rate": 5.927307316828957e-06, "loss": 8.5585, "step": 160780 }, { "epoch": 0.8029663662014033, "grad_norm": 0.08791206032037735, "learning_rate": 5.9258054018874064e-06, "loss": 8.5687, "step": 160790 }, { "epoch": 0.8030163050263427, "grad_norm": 0.09240417182445526, "learning_rate": 5.924303486945857e-06, "loss": 8.5421, "step": 160800 }, { "epoch": 0.8030662438512822, "grad_norm": 0.08927211910486221, "learning_rate": 5.922801572004306e-06, "loss": 8.5463, "step": 160810 }, { "epoch": 0.8031161826762216, "grad_norm": 0.09254935383796692, "learning_rate": 5.921299657062755e-06, "loss": 8.5417, "step": 160820 }, { "epoch": 0.8031661215011611, "grad_norm": 0.09360701590776443, "learning_rate": 5.919797742121205e-06, "loss": 8.5466, "step": 160830 }, { "epoch": 0.8032160603261005, "grad_norm": 0.09263437986373901, "learning_rate": 5.918295827179654e-06, "loss": 8.5576, "step": 160840 }, { "epoch": 0.80326599915104, "grad_norm": 0.08811653405427933, "learning_rate": 5.916793912238104e-06, "loss": 8.5447, "step": 160850 }, { "epoch": 0.8033159379759794, "grad_norm": 0.09161476045846939, "learning_rate": 5.9152919972965535e-06, "loss": 8.5664, "step": 160860 }, { "epoch": 0.8033658768009189, "grad_norm": 0.08815915882587433, "learning_rate": 5.913790082355003e-06, "loss": 8.5393, "step": 160870 }, { "epoch": 0.8034158156258583, "grad_norm": 0.08650946617126465, "learning_rate": 5.912288167413452e-06, "loss": 8.561, "step": 160880 }, { "epoch": 0.8034657544507978, "grad_norm": 0.09026594460010529, "learning_rate": 5.9107862524719014e-06, "loss": 8.5529, "step": 160890 }, { "epoch": 0.8035156932757372, "grad_norm": 0.08889591693878174, "learning_rate": 5.909284337530352e-06, "loss": 8.5591, "step": 160900 }, { "epoch": 0.8035656321006767, "grad_norm": 0.09031382948160172, "learning_rate": 5.907782422588801e-06, "loss": 8.5549, "step": 160910 }, { "epoch": 0.8036155709256161, "grad_norm": 0.08855296671390533, "learning_rate": 5.90628050764725e-06, "loss": 8.5692, "step": 160920 }, { "epoch": 0.8036655097505556, "grad_norm": 0.09416501969099045, "learning_rate": 5.9047785927057e-06, "loss": 8.562, "step": 160930 }, { "epoch": 0.803715448575495, "grad_norm": 0.09245479851961136, "learning_rate": 5.90327667776415e-06, "loss": 8.5468, "step": 160940 }, { "epoch": 0.8037653874004345, "grad_norm": 0.09061010181903839, "learning_rate": 5.901774762822599e-06, "loss": 8.5644, "step": 160950 }, { "epoch": 0.8038153262253739, "grad_norm": 0.08976545184850693, "learning_rate": 5.9002728478810485e-06, "loss": 8.5529, "step": 160960 }, { "epoch": 0.8038652650503134, "grad_norm": 0.09560992568731308, "learning_rate": 5.898770932939498e-06, "loss": 8.5582, "step": 160970 }, { "epoch": 0.8039152038752528, "grad_norm": 0.09577993303537369, "learning_rate": 5.897269017997947e-06, "loss": 8.558, "step": 160980 }, { "epoch": 0.8039651427001923, "grad_norm": 0.08982730656862259, "learning_rate": 5.895767103056397e-06, "loss": 8.5651, "step": 160990 }, { "epoch": 0.8040150815251317, "grad_norm": 0.08825762569904327, "learning_rate": 5.894265188114847e-06, "loss": 8.5539, "step": 161000 }, { "epoch": 0.8040650203500712, "grad_norm": 0.0992155522108078, "learning_rate": 5.892763273173296e-06, "loss": 8.572, "step": 161010 }, { "epoch": 0.8041149591750106, "grad_norm": 0.09141044318675995, "learning_rate": 5.891261358231746e-06, "loss": 8.5596, "step": 161020 }, { "epoch": 0.8041648979999501, "grad_norm": 0.09531890600919724, "learning_rate": 5.889759443290195e-06, "loss": 8.5501, "step": 161030 }, { "epoch": 0.8042148368248895, "grad_norm": 0.08679885417222977, "learning_rate": 5.888257528348645e-06, "loss": 8.5506, "step": 161040 }, { "epoch": 0.804264775649829, "grad_norm": 0.08994447439908981, "learning_rate": 5.886755613407094e-06, "loss": 8.5481, "step": 161050 }, { "epoch": 0.8043147144747684, "grad_norm": 0.09019139409065247, "learning_rate": 5.8852536984655435e-06, "loss": 8.5627, "step": 161060 }, { "epoch": 0.8043646532997079, "grad_norm": 0.08854031562805176, "learning_rate": 5.883751783523994e-06, "loss": 8.5427, "step": 161070 }, { "epoch": 0.8044145921246473, "grad_norm": 0.0935094803571701, "learning_rate": 5.882249868582442e-06, "loss": 8.5412, "step": 161080 }, { "epoch": 0.8044645309495868, "grad_norm": 0.08779904246330261, "learning_rate": 5.880747953640892e-06, "loss": 8.5639, "step": 161090 }, { "epoch": 0.8045144697745262, "grad_norm": 0.0949099212884903, "learning_rate": 5.8792460386993425e-06, "loss": 8.5561, "step": 161100 }, { "epoch": 0.8045644085994657, "grad_norm": 0.0882745087146759, "learning_rate": 5.877744123757791e-06, "loss": 8.5621, "step": 161110 }, { "epoch": 0.8046143474244051, "grad_norm": 0.09196063131093979, "learning_rate": 5.876242208816241e-06, "loss": 8.5656, "step": 161120 }, { "epoch": 0.8046642862493446, "grad_norm": 0.08640565723180771, "learning_rate": 5.87474029387469e-06, "loss": 8.5621, "step": 161130 }, { "epoch": 0.804714225074284, "grad_norm": 0.09424121677875519, "learning_rate": 5.87323837893314e-06, "loss": 8.5532, "step": 161140 }, { "epoch": 0.8047641638992235, "grad_norm": 0.08842575550079346, "learning_rate": 5.87173646399159e-06, "loss": 8.561, "step": 161150 }, { "epoch": 0.8048141027241629, "grad_norm": 0.09014535695314407, "learning_rate": 5.8702345490500385e-06, "loss": 8.5514, "step": 161160 }, { "epoch": 0.8048640415491024, "grad_norm": 0.09411894530057907, "learning_rate": 5.868732634108489e-06, "loss": 8.544, "step": 161170 }, { "epoch": 0.8049139803740418, "grad_norm": 0.09182292968034744, "learning_rate": 5.867230719166938e-06, "loss": 8.5573, "step": 161180 }, { "epoch": 0.8049639191989812, "grad_norm": 0.08946768194437027, "learning_rate": 5.865728804225387e-06, "loss": 8.5418, "step": 161190 }, { "epoch": 0.8050138580239207, "grad_norm": 0.08927719295024872, "learning_rate": 5.8642268892838375e-06, "loss": 8.5637, "step": 161200 }, { "epoch": 0.8050637968488601, "grad_norm": 0.09145810455083847, "learning_rate": 5.862724974342286e-06, "loss": 8.5511, "step": 161210 }, { "epoch": 0.8051137356737996, "grad_norm": 0.09392247349023819, "learning_rate": 5.861223059400736e-06, "loss": 8.5375, "step": 161220 }, { "epoch": 0.805163674498739, "grad_norm": 0.09256572276353836, "learning_rate": 5.8597211444591855e-06, "loss": 8.5547, "step": 161230 }, { "epoch": 0.8052136133236785, "grad_norm": 0.08906706422567368, "learning_rate": 5.858219229517635e-06, "loss": 8.5664, "step": 161240 }, { "epoch": 0.8052635521486179, "grad_norm": 0.08576681464910507, "learning_rate": 5.856717314576085e-06, "loss": 8.5525, "step": 161250 }, { "epoch": 0.8053134909735574, "grad_norm": 0.09053860604763031, "learning_rate": 5.855215399634534e-06, "loss": 8.5489, "step": 161260 }, { "epoch": 0.8053634297984968, "grad_norm": 0.08544787019491196, "learning_rate": 5.853713484692984e-06, "loss": 8.5576, "step": 161270 }, { "epoch": 0.8054133686234363, "grad_norm": 0.0925343781709671, "learning_rate": 5.852211569751433e-06, "loss": 8.5416, "step": 161280 }, { "epoch": 0.8054633074483757, "grad_norm": 0.09434544295072556, "learning_rate": 5.850709654809882e-06, "loss": 8.5593, "step": 161290 }, { "epoch": 0.8055132462733152, "grad_norm": 0.09324178099632263, "learning_rate": 5.8492077398683325e-06, "loss": 8.5498, "step": 161300 }, { "epoch": 0.8055631850982546, "grad_norm": 0.09053826332092285, "learning_rate": 5.847705824926782e-06, "loss": 8.5522, "step": 161310 }, { "epoch": 0.8056131239231941, "grad_norm": 0.09149667620658875, "learning_rate": 5.846203909985231e-06, "loss": 8.536, "step": 161320 }, { "epoch": 0.8056630627481335, "grad_norm": 0.08661210536956787, "learning_rate": 5.8447019950436805e-06, "loss": 8.5475, "step": 161330 }, { "epoch": 0.805713001573073, "grad_norm": 0.09454959630966187, "learning_rate": 5.843200080102131e-06, "loss": 8.5584, "step": 161340 }, { "epoch": 0.8057629403980124, "grad_norm": 0.09166700392961502, "learning_rate": 5.84169816516058e-06, "loss": 8.5538, "step": 161350 }, { "epoch": 0.8058128792229519, "grad_norm": 0.08817677199840546, "learning_rate": 5.840196250219029e-06, "loss": 8.5504, "step": 161360 }, { "epoch": 0.8058628180478913, "grad_norm": 0.0908307358622551, "learning_rate": 5.838694335277479e-06, "loss": 8.5528, "step": 161370 }, { "epoch": 0.8059127568728308, "grad_norm": 0.10027515888214111, "learning_rate": 5.837192420335928e-06, "loss": 8.5598, "step": 161380 }, { "epoch": 0.8059626956977702, "grad_norm": 0.09583675861358643, "learning_rate": 5.835690505394378e-06, "loss": 8.5463, "step": 161390 }, { "epoch": 0.8060126345227097, "grad_norm": 0.0983138233423233, "learning_rate": 5.8341885904528275e-06, "loss": 8.5588, "step": 161400 }, { "epoch": 0.8060625733476491, "grad_norm": 0.08900081366300583, "learning_rate": 5.832686675511277e-06, "loss": 8.5361, "step": 161410 }, { "epoch": 0.8061125121725886, "grad_norm": 0.093843013048172, "learning_rate": 5.831184760569727e-06, "loss": 8.5498, "step": 161420 }, { "epoch": 0.806162450997528, "grad_norm": 0.09397601336240768, "learning_rate": 5.8296828456281755e-06, "loss": 8.5497, "step": 161430 }, { "epoch": 0.8062123898224675, "grad_norm": 0.09373549371957779, "learning_rate": 5.828180930686626e-06, "loss": 8.563, "step": 161440 }, { "epoch": 0.8062623286474069, "grad_norm": 0.09314602613449097, "learning_rate": 5.826679015745075e-06, "loss": 8.5504, "step": 161450 }, { "epoch": 0.8063122674723464, "grad_norm": 0.09471584111452103, "learning_rate": 5.825177100803524e-06, "loss": 8.5783, "step": 161460 }, { "epoch": 0.8063622062972858, "grad_norm": 0.08868781477212906, "learning_rate": 5.8236751858619745e-06, "loss": 8.5653, "step": 161470 }, { "epoch": 0.8064121451222253, "grad_norm": 0.09533412009477615, "learning_rate": 5.822173270920423e-06, "loss": 8.5538, "step": 161480 }, { "epoch": 0.8064620839471647, "grad_norm": 0.09250637888908386, "learning_rate": 5.820671355978873e-06, "loss": 8.5697, "step": 161490 }, { "epoch": 0.8065120227721042, "grad_norm": 0.08923783898353577, "learning_rate": 5.819169441037323e-06, "loss": 8.5524, "step": 161500 }, { "epoch": 0.8065619615970436, "grad_norm": 0.09027642011642456, "learning_rate": 5.817667526095772e-06, "loss": 8.5462, "step": 161510 }, { "epoch": 0.806611900421983, "grad_norm": 0.08848720788955688, "learning_rate": 5.816165611154222e-06, "loss": 8.547, "step": 161520 }, { "epoch": 0.8066618392469225, "grad_norm": 0.09159193187952042, "learning_rate": 5.8146636962126705e-06, "loss": 8.5562, "step": 161530 }, { "epoch": 0.806711778071862, "grad_norm": 0.09201356023550034, "learning_rate": 5.813161781271121e-06, "loss": 8.5494, "step": 161540 }, { "epoch": 0.8067617168968014, "grad_norm": 0.090115986764431, "learning_rate": 5.811659866329571e-06, "loss": 8.543, "step": 161550 }, { "epoch": 0.8068116557217409, "grad_norm": 0.08883675187826157, "learning_rate": 5.810157951388019e-06, "loss": 8.566, "step": 161560 }, { "epoch": 0.8068615945466803, "grad_norm": 0.09100556373596191, "learning_rate": 5.8086560364464695e-06, "loss": 8.5675, "step": 161570 }, { "epoch": 0.8069115333716198, "grad_norm": 0.09314378350973129, "learning_rate": 5.807154121504919e-06, "loss": 8.5437, "step": 161580 }, { "epoch": 0.8069614721965592, "grad_norm": 0.09172666072845459, "learning_rate": 5.805652206563368e-06, "loss": 8.5589, "step": 161590 }, { "epoch": 0.8070114110214986, "grad_norm": 0.09398376941680908, "learning_rate": 5.804150291621818e-06, "loss": 8.5466, "step": 161600 }, { "epoch": 0.8070613498464381, "grad_norm": 0.08542805910110474, "learning_rate": 5.802648376680267e-06, "loss": 8.5673, "step": 161610 }, { "epoch": 0.8071112886713776, "grad_norm": 0.08974550664424896, "learning_rate": 5.801146461738717e-06, "loss": 8.5524, "step": 161620 }, { "epoch": 0.807161227496317, "grad_norm": 0.09220361709594727, "learning_rate": 5.799644546797166e-06, "loss": 8.5404, "step": 161630 }, { "epoch": 0.8072111663212564, "grad_norm": 0.08950573951005936, "learning_rate": 5.798142631855616e-06, "loss": 8.5477, "step": 161640 }, { "epoch": 0.8072611051461959, "grad_norm": 0.0872855931520462, "learning_rate": 5.796640716914066e-06, "loss": 8.5645, "step": 161650 }, { "epoch": 0.8073110439711354, "grad_norm": 0.09016703069210052, "learning_rate": 5.795138801972515e-06, "loss": 8.5501, "step": 161660 }, { "epoch": 0.8073609827960748, "grad_norm": 0.09285937994718552, "learning_rate": 5.7936368870309645e-06, "loss": 8.5527, "step": 161670 }, { "epoch": 0.8074109216210142, "grad_norm": 0.09346285462379456, "learning_rate": 5.792134972089414e-06, "loss": 8.5559, "step": 161680 }, { "epoch": 0.8074608604459537, "grad_norm": 0.08668027818202972, "learning_rate": 5.790633057147863e-06, "loss": 8.5425, "step": 161690 }, { "epoch": 0.8075107992708932, "grad_norm": 0.0898892879486084, "learning_rate": 5.789131142206313e-06, "loss": 8.5491, "step": 161700 }, { "epoch": 0.8075607380958326, "grad_norm": 0.09450709819793701, "learning_rate": 5.787629227264763e-06, "loss": 8.5568, "step": 161710 }, { "epoch": 0.807610676920772, "grad_norm": 0.08670226484537125, "learning_rate": 5.786127312323212e-06, "loss": 8.5596, "step": 161720 }, { "epoch": 0.8076606157457115, "grad_norm": 0.08837024122476578, "learning_rate": 5.784625397381662e-06, "loss": 8.5445, "step": 161730 }, { "epoch": 0.807710554570651, "grad_norm": 0.093661367893219, "learning_rate": 5.7831234824401115e-06, "loss": 8.5325, "step": 161740 }, { "epoch": 0.8077604933955904, "grad_norm": 0.0894351676106453, "learning_rate": 5.781621567498561e-06, "loss": 8.551, "step": 161750 }, { "epoch": 0.8078104322205298, "grad_norm": 0.08936372399330139, "learning_rate": 5.78011965255701e-06, "loss": 8.5591, "step": 161760 }, { "epoch": 0.8078603710454693, "grad_norm": 0.09095845371484756, "learning_rate": 5.7786177376154595e-06, "loss": 8.5491, "step": 161770 }, { "epoch": 0.8079103098704088, "grad_norm": 0.09978078305721283, "learning_rate": 5.77711582267391e-06, "loss": 8.5385, "step": 161780 }, { "epoch": 0.8079602486953482, "grad_norm": 0.0931134968996048, "learning_rate": 5.775613907732359e-06, "loss": 8.554, "step": 161790 }, { "epoch": 0.8080101875202876, "grad_norm": 0.09244008362293243, "learning_rate": 5.774111992790808e-06, "loss": 8.5561, "step": 161800 }, { "epoch": 0.8080601263452271, "grad_norm": 0.09390776604413986, "learning_rate": 5.772610077849258e-06, "loss": 8.5361, "step": 161810 }, { "epoch": 0.8081100651701666, "grad_norm": 0.09701921045780182, "learning_rate": 5.771108162907708e-06, "loss": 8.5554, "step": 161820 }, { "epoch": 0.808160003995106, "grad_norm": 0.09131251275539398, "learning_rate": 5.769606247966157e-06, "loss": 8.5438, "step": 161830 }, { "epoch": 0.8082099428200454, "grad_norm": 0.09114990383386612, "learning_rate": 5.7681043330246066e-06, "loss": 8.5591, "step": 161840 }, { "epoch": 0.8082598816449849, "grad_norm": 0.08846677094697952, "learning_rate": 5.766602418083056e-06, "loss": 8.5434, "step": 161850 }, { "epoch": 0.8083098204699244, "grad_norm": 0.08849672228097916, "learning_rate": 5.765100503141505e-06, "loss": 8.5666, "step": 161860 }, { "epoch": 0.8083597592948638, "grad_norm": 0.0881495401263237, "learning_rate": 5.763598588199955e-06, "loss": 8.5384, "step": 161870 }, { "epoch": 0.8084096981198032, "grad_norm": 0.09564726799726486, "learning_rate": 5.762096673258405e-06, "loss": 8.5536, "step": 161880 }, { "epoch": 0.8084596369447427, "grad_norm": 0.08935084193944931, "learning_rate": 5.760594758316854e-06, "loss": 8.5496, "step": 161890 }, { "epoch": 0.8085095757696822, "grad_norm": 0.09384889900684357, "learning_rate": 5.759092843375304e-06, "loss": 8.5448, "step": 161900 }, { "epoch": 0.8085595145946216, "grad_norm": 0.08726388961076736, "learning_rate": 5.757590928433753e-06, "loss": 8.5489, "step": 161910 }, { "epoch": 0.808609453419561, "grad_norm": 0.09072273224592209, "learning_rate": 5.756089013492203e-06, "loss": 8.5589, "step": 161920 }, { "epoch": 0.8086593922445005, "grad_norm": 0.09182450920343399, "learning_rate": 5.754587098550652e-06, "loss": 8.5484, "step": 161930 }, { "epoch": 0.80870933106944, "grad_norm": 0.08764579892158508, "learning_rate": 5.7530851836091016e-06, "loss": 8.5622, "step": 161940 }, { "epoch": 0.8087592698943794, "grad_norm": 0.0898037999868393, "learning_rate": 5.751583268667552e-06, "loss": 8.5555, "step": 161950 }, { "epoch": 0.8088092087193188, "grad_norm": 0.09432199597358704, "learning_rate": 5.750081353726e-06, "loss": 8.5663, "step": 161960 }, { "epoch": 0.8088591475442583, "grad_norm": 0.09024576097726822, "learning_rate": 5.74857943878445e-06, "loss": 8.5594, "step": 161970 }, { "epoch": 0.8089090863691978, "grad_norm": 0.08990621566772461, "learning_rate": 5.747077523842901e-06, "loss": 8.5471, "step": 161980 }, { "epoch": 0.8089590251941372, "grad_norm": 0.09168119728565216, "learning_rate": 5.745575608901349e-06, "loss": 8.5391, "step": 161990 }, { "epoch": 0.8090089640190766, "grad_norm": 0.09170830249786377, "learning_rate": 5.744073693959799e-06, "loss": 8.565, "step": 162000 }, { "epoch": 0.809058902844016, "grad_norm": 0.08952143043279648, "learning_rate": 5.742571779018248e-06, "loss": 8.5513, "step": 162010 }, { "epoch": 0.8091088416689556, "grad_norm": 0.08867137879133224, "learning_rate": 5.741069864076698e-06, "loss": 8.5306, "step": 162020 }, { "epoch": 0.809158780493895, "grad_norm": 0.09371023625135422, "learning_rate": 5.739567949135148e-06, "loss": 8.542, "step": 162030 }, { "epoch": 0.8092087193188344, "grad_norm": 0.09115874022245407, "learning_rate": 5.7380660341935966e-06, "loss": 8.5642, "step": 162040 }, { "epoch": 0.8092586581437738, "grad_norm": 0.09029915928840637, "learning_rate": 5.736564119252047e-06, "loss": 8.5507, "step": 162050 }, { "epoch": 0.8093085969687134, "grad_norm": 0.09160950779914856, "learning_rate": 5.735062204310496e-06, "loss": 8.5746, "step": 162060 }, { "epoch": 0.8093585357936528, "grad_norm": 0.09389065206050873, "learning_rate": 5.733560289368945e-06, "loss": 8.5682, "step": 162070 }, { "epoch": 0.8094084746185922, "grad_norm": 0.08822131901979446, "learning_rate": 5.732058374427396e-06, "loss": 8.5532, "step": 162080 }, { "epoch": 0.8094584134435316, "grad_norm": 0.09017851203680038, "learning_rate": 5.730556459485844e-06, "loss": 8.559, "step": 162090 }, { "epoch": 0.8095083522684712, "grad_norm": 0.09044285118579865, "learning_rate": 5.729054544544294e-06, "loss": 8.5549, "step": 162100 }, { "epoch": 0.8095582910934106, "grad_norm": 0.09080391377210617, "learning_rate": 5.727552629602744e-06, "loss": 8.5327, "step": 162110 }, { "epoch": 0.80960822991835, "grad_norm": 0.08978559076786041, "learning_rate": 5.726050714661193e-06, "loss": 8.5409, "step": 162120 }, { "epoch": 0.8096581687432894, "grad_norm": 0.09273146092891693, "learning_rate": 5.724548799719643e-06, "loss": 8.5516, "step": 162130 }, { "epoch": 0.809708107568229, "grad_norm": 0.09049143642187119, "learning_rate": 5.7230468847780924e-06, "loss": 8.5536, "step": 162140 }, { "epoch": 0.8097580463931684, "grad_norm": 0.09314949065446854, "learning_rate": 5.721544969836542e-06, "loss": 8.5574, "step": 162150 }, { "epoch": 0.8098079852181078, "grad_norm": 0.09063813835382462, "learning_rate": 5.720043054894991e-06, "loss": 8.5568, "step": 162160 }, { "epoch": 0.8098579240430472, "grad_norm": 0.09309311211109161, "learning_rate": 5.71854113995344e-06, "loss": 8.5452, "step": 162170 }, { "epoch": 0.8099078628679867, "grad_norm": 0.09010675549507141, "learning_rate": 5.717039225011891e-06, "loss": 8.5491, "step": 162180 }, { "epoch": 0.8099578016929262, "grad_norm": 0.09501739591360092, "learning_rate": 5.71553731007034e-06, "loss": 8.5697, "step": 162190 }, { "epoch": 0.8100077405178656, "grad_norm": 0.09098471701145172, "learning_rate": 5.714035395128789e-06, "loss": 8.5599, "step": 162200 }, { "epoch": 0.810057679342805, "grad_norm": 0.0888034850358963, "learning_rate": 5.712533480187239e-06, "loss": 8.5362, "step": 162210 }, { "epoch": 0.8101076181677445, "grad_norm": 0.09654735028743744, "learning_rate": 5.711031565245689e-06, "loss": 8.5474, "step": 162220 }, { "epoch": 0.810157556992684, "grad_norm": 0.09235084056854248, "learning_rate": 5.709529650304138e-06, "loss": 8.5447, "step": 162230 }, { "epoch": 0.8102074958176234, "grad_norm": 0.09002131223678589, "learning_rate": 5.7080277353625874e-06, "loss": 8.5462, "step": 162240 }, { "epoch": 0.8102574346425628, "grad_norm": 0.08661506325006485, "learning_rate": 5.706525820421037e-06, "loss": 8.5672, "step": 162250 }, { "epoch": 0.8103073734675023, "grad_norm": 0.08980439603328705, "learning_rate": 5.705023905479486e-06, "loss": 8.5427, "step": 162260 }, { "epoch": 0.8103573122924418, "grad_norm": 0.08393685519695282, "learning_rate": 5.703521990537936e-06, "loss": 8.5484, "step": 162270 }, { "epoch": 0.8104072511173812, "grad_norm": 0.09369827061891556, "learning_rate": 5.702020075596386e-06, "loss": 8.5434, "step": 162280 }, { "epoch": 0.8104571899423206, "grad_norm": 0.09700123220682144, "learning_rate": 5.700518160654835e-06, "loss": 8.5523, "step": 162290 }, { "epoch": 0.81050712876726, "grad_norm": 0.08638820052146912, "learning_rate": 5.699016245713285e-06, "loss": 8.5611, "step": 162300 }, { "epoch": 0.8105570675921996, "grad_norm": 0.09503623843193054, "learning_rate": 5.697514330771734e-06, "loss": 8.5216, "step": 162310 }, { "epoch": 0.810607006417139, "grad_norm": 0.09698504954576492, "learning_rate": 5.696012415830184e-06, "loss": 8.5362, "step": 162320 }, { "epoch": 0.8106569452420784, "grad_norm": 0.09360737353563309, "learning_rate": 5.694510500888633e-06, "loss": 8.5411, "step": 162330 }, { "epoch": 0.8107068840670179, "grad_norm": 0.08911336213350296, "learning_rate": 5.6930085859470824e-06, "loss": 8.5541, "step": 162340 }, { "epoch": 0.8107568228919574, "grad_norm": 0.09235373139381409, "learning_rate": 5.691506671005533e-06, "loss": 8.5529, "step": 162350 }, { "epoch": 0.8108067617168968, "grad_norm": 0.09205646812915802, "learning_rate": 5.690004756063981e-06, "loss": 8.5628, "step": 162360 }, { "epoch": 0.8108567005418362, "grad_norm": 0.08802807331085205, "learning_rate": 5.688502841122431e-06, "loss": 8.5639, "step": 162370 }, { "epoch": 0.8109066393667757, "grad_norm": 0.09021410346031189, "learning_rate": 5.6870009261808815e-06, "loss": 8.5609, "step": 162380 }, { "epoch": 0.8109565781917152, "grad_norm": 0.09240546077489853, "learning_rate": 5.68549901123933e-06, "loss": 8.5595, "step": 162390 }, { "epoch": 0.8110065170166546, "grad_norm": 0.08953437209129333, "learning_rate": 5.68399709629778e-06, "loss": 8.5495, "step": 162400 }, { "epoch": 0.811056455841594, "grad_norm": 0.09370911866426468, "learning_rate": 5.682495181356229e-06, "loss": 8.541, "step": 162410 }, { "epoch": 0.8111063946665334, "grad_norm": 0.09173066914081573, "learning_rate": 5.680993266414679e-06, "loss": 8.5352, "step": 162420 }, { "epoch": 0.811156333491473, "grad_norm": 0.08917384594678879, "learning_rate": 5.679491351473129e-06, "loss": 8.56, "step": 162430 }, { "epoch": 0.8112062723164124, "grad_norm": 0.09332337230443954, "learning_rate": 5.6779894365315774e-06, "loss": 8.5452, "step": 162440 }, { "epoch": 0.8112562111413518, "grad_norm": 0.08929426968097687, "learning_rate": 5.676487521590028e-06, "loss": 8.5625, "step": 162450 }, { "epoch": 0.8113061499662912, "grad_norm": 0.08893352746963501, "learning_rate": 5.674985606648477e-06, "loss": 8.5519, "step": 162460 }, { "epoch": 0.8113560887912308, "grad_norm": 0.0881025567650795, "learning_rate": 5.673483691706926e-06, "loss": 8.5514, "step": 162470 }, { "epoch": 0.8114060276161702, "grad_norm": 0.09232950210571289, "learning_rate": 5.6719817767653765e-06, "loss": 8.563, "step": 162480 }, { "epoch": 0.8114559664411096, "grad_norm": 0.09270848333835602, "learning_rate": 5.670479861823825e-06, "loss": 8.5434, "step": 162490 }, { "epoch": 0.811505905266049, "grad_norm": 0.0903157889842987, "learning_rate": 5.668977946882275e-06, "loss": 8.5461, "step": 162500 }, { "epoch": 0.8115558440909886, "grad_norm": 0.09540119767189026, "learning_rate": 5.6674760319407245e-06, "loss": 8.5403, "step": 162510 }, { "epoch": 0.811605782915928, "grad_norm": 0.09533382952213287, "learning_rate": 5.665974116999174e-06, "loss": 8.5465, "step": 162520 }, { "epoch": 0.8116557217408674, "grad_norm": 0.09012457728385925, "learning_rate": 5.664472202057624e-06, "loss": 8.5571, "step": 162530 }, { "epoch": 0.8117056605658068, "grad_norm": 0.09900517761707306, "learning_rate": 5.662970287116073e-06, "loss": 8.5448, "step": 162540 }, { "epoch": 0.8117555993907464, "grad_norm": 0.08922384679317474, "learning_rate": 5.661468372174523e-06, "loss": 8.5561, "step": 162550 }, { "epoch": 0.8118055382156858, "grad_norm": 0.09595987200737, "learning_rate": 5.659966457232972e-06, "loss": 8.5536, "step": 162560 }, { "epoch": 0.8118554770406252, "grad_norm": 0.08500176668167114, "learning_rate": 5.658464542291421e-06, "loss": 8.552, "step": 162570 }, { "epoch": 0.8119054158655646, "grad_norm": 0.0854329764842987, "learning_rate": 5.6569626273498715e-06, "loss": 8.5606, "step": 162580 }, { "epoch": 0.8119553546905042, "grad_norm": 0.08596555888652802, "learning_rate": 5.655460712408321e-06, "loss": 8.5476, "step": 162590 }, { "epoch": 0.8120052935154436, "grad_norm": 0.09005005657672882, "learning_rate": 5.65395879746677e-06, "loss": 8.5487, "step": 162600 }, { "epoch": 0.812055232340383, "grad_norm": 0.08684473484754562, "learning_rate": 5.6524568825252195e-06, "loss": 8.5494, "step": 162610 }, { "epoch": 0.8121051711653224, "grad_norm": 0.08870302140712738, "learning_rate": 5.65095496758367e-06, "loss": 8.5443, "step": 162620 }, { "epoch": 0.812155109990262, "grad_norm": 0.0927698165178299, "learning_rate": 5.649453052642119e-06, "loss": 8.5475, "step": 162630 }, { "epoch": 0.8122050488152014, "grad_norm": 0.09097401797771454, "learning_rate": 5.647951137700568e-06, "loss": 8.5476, "step": 162640 }, { "epoch": 0.8122549876401408, "grad_norm": 0.0920330137014389, "learning_rate": 5.646449222759018e-06, "loss": 8.5655, "step": 162650 }, { "epoch": 0.8123049264650802, "grad_norm": 0.09149347245693207, "learning_rate": 5.644947307817467e-06, "loss": 8.543, "step": 162660 }, { "epoch": 0.8123548652900198, "grad_norm": 0.09597709774971008, "learning_rate": 5.643445392875917e-06, "loss": 8.5521, "step": 162670 }, { "epoch": 0.8124048041149592, "grad_norm": 0.08816757798194885, "learning_rate": 5.6419434779343665e-06, "loss": 8.5451, "step": 162680 }, { "epoch": 0.8124547429398986, "grad_norm": 0.0923578068614006, "learning_rate": 5.640441562992816e-06, "loss": 8.5497, "step": 162690 }, { "epoch": 0.812504681764838, "grad_norm": 0.08633648604154587, "learning_rate": 5.638939648051266e-06, "loss": 8.5605, "step": 162700 }, { "epoch": 0.8125546205897776, "grad_norm": 0.09606485068798065, "learning_rate": 5.6374377331097145e-06, "loss": 8.5607, "step": 162710 }, { "epoch": 0.812604559414717, "grad_norm": 0.09084507077932358, "learning_rate": 5.635935818168165e-06, "loss": 8.559, "step": 162720 }, { "epoch": 0.8126544982396564, "grad_norm": 0.0889735296368599, "learning_rate": 5.634433903226614e-06, "loss": 8.5486, "step": 162730 }, { "epoch": 0.8127044370645958, "grad_norm": 0.09297562390565872, "learning_rate": 5.632931988285063e-06, "loss": 8.5603, "step": 162740 }, { "epoch": 0.8127543758895354, "grad_norm": 0.08924896270036697, "learning_rate": 5.6314300733435135e-06, "loss": 8.5499, "step": 162750 }, { "epoch": 0.8128043147144748, "grad_norm": 0.08938944339752197, "learning_rate": 5.629928158401962e-06, "loss": 8.5714, "step": 162760 }, { "epoch": 0.8128542535394142, "grad_norm": 0.09662458300590515, "learning_rate": 5.628426243460412e-06, "loss": 8.5555, "step": 162770 }, { "epoch": 0.8129041923643536, "grad_norm": 0.08405207097530365, "learning_rate": 5.626924328518862e-06, "loss": 8.5385, "step": 162780 }, { "epoch": 0.8129541311892932, "grad_norm": 0.09311365336179733, "learning_rate": 5.625422413577311e-06, "loss": 8.5482, "step": 162790 }, { "epoch": 0.8130040700142326, "grad_norm": 0.09118988364934921, "learning_rate": 5.623920498635761e-06, "loss": 8.5717, "step": 162800 }, { "epoch": 0.813054008839172, "grad_norm": 0.09193669259548187, "learning_rate": 5.62241858369421e-06, "loss": 8.5485, "step": 162810 }, { "epoch": 0.8131039476641114, "grad_norm": 0.09104682505130768, "learning_rate": 5.62091666875266e-06, "loss": 8.5567, "step": 162820 }, { "epoch": 0.813153886489051, "grad_norm": 0.0893746018409729, "learning_rate": 5.61941475381111e-06, "loss": 8.5666, "step": 162830 }, { "epoch": 0.8132038253139904, "grad_norm": 0.09030063450336456, "learning_rate": 5.617912838869558e-06, "loss": 8.5506, "step": 162840 }, { "epoch": 0.8132537641389298, "grad_norm": 0.0945194736123085, "learning_rate": 5.6164109239280085e-06, "loss": 8.5281, "step": 162850 }, { "epoch": 0.8133037029638692, "grad_norm": 0.09485594183206558, "learning_rate": 5.614909008986459e-06, "loss": 8.5553, "step": 162860 }, { "epoch": 0.8133536417888088, "grad_norm": 0.08975322544574738, "learning_rate": 5.613407094044907e-06, "loss": 8.5503, "step": 162870 }, { "epoch": 0.8134035806137482, "grad_norm": 0.09506627917289734, "learning_rate": 5.611905179103357e-06, "loss": 8.5368, "step": 162880 }, { "epoch": 0.8134535194386876, "grad_norm": 0.09811557084321976, "learning_rate": 5.610403264161806e-06, "loss": 8.5708, "step": 162890 }, { "epoch": 0.813503458263627, "grad_norm": 0.08716001361608505, "learning_rate": 5.608901349220256e-06, "loss": 8.5295, "step": 162900 }, { "epoch": 0.8135533970885666, "grad_norm": 0.08750799298286438, "learning_rate": 5.607399434278706e-06, "loss": 8.5557, "step": 162910 }, { "epoch": 0.813603335913506, "grad_norm": 0.09434546530246735, "learning_rate": 5.605897519337155e-06, "loss": 8.5626, "step": 162920 }, { "epoch": 0.8136532747384454, "grad_norm": 0.08905541896820068, "learning_rate": 5.604395604395605e-06, "loss": 8.543, "step": 162930 }, { "epoch": 0.8137032135633848, "grad_norm": 0.09109733253717422, "learning_rate": 5.602893689454054e-06, "loss": 8.5606, "step": 162940 }, { "epoch": 0.8137531523883244, "grad_norm": 0.09967444837093353, "learning_rate": 5.6013917745125035e-06, "loss": 8.534, "step": 162950 }, { "epoch": 0.8138030912132638, "grad_norm": 0.08702152967453003, "learning_rate": 5.599889859570954e-06, "loss": 8.5679, "step": 162960 }, { "epoch": 0.8138530300382032, "grad_norm": 0.09200234711170197, "learning_rate": 5.598387944629402e-06, "loss": 8.5477, "step": 162970 }, { "epoch": 0.8139029688631426, "grad_norm": 0.08344555646181107, "learning_rate": 5.596886029687852e-06, "loss": 8.5648, "step": 162980 }, { "epoch": 0.8139529076880822, "grad_norm": 0.09271971136331558, "learning_rate": 5.595384114746302e-06, "loss": 8.5407, "step": 162990 }, { "epoch": 0.8140028465130216, "grad_norm": 0.09208449721336365, "learning_rate": 5.593882199804751e-06, "loss": 8.5329, "step": 163000 }, { "epoch": 0.814052785337961, "grad_norm": 0.09522075206041336, "learning_rate": 5.592380284863201e-06, "loss": 8.5586, "step": 163010 }, { "epoch": 0.8141027241629004, "grad_norm": 0.09683191031217575, "learning_rate": 5.5908783699216505e-06, "loss": 8.5441, "step": 163020 }, { "epoch": 0.81415266298784, "grad_norm": 0.09313095360994339, "learning_rate": 5.5893764549801e-06, "loss": 8.5572, "step": 163030 }, { "epoch": 0.8142026018127794, "grad_norm": 0.09198789298534393, "learning_rate": 5.587874540038549e-06, "loss": 8.5576, "step": 163040 }, { "epoch": 0.8142525406377188, "grad_norm": 0.0924067571759224, "learning_rate": 5.5863726250969985e-06, "loss": 8.5483, "step": 163050 }, { "epoch": 0.8143024794626582, "grad_norm": 0.09334490448236465, "learning_rate": 5.584870710155449e-06, "loss": 8.5449, "step": 163060 }, { "epoch": 0.8143524182875977, "grad_norm": 0.08748748898506165, "learning_rate": 5.583368795213898e-06, "loss": 8.5381, "step": 163070 }, { "epoch": 0.8144023571125372, "grad_norm": 0.08966860920190811, "learning_rate": 5.581866880272347e-06, "loss": 8.5411, "step": 163080 }, { "epoch": 0.8144522959374766, "grad_norm": 0.09077271819114685, "learning_rate": 5.580364965330797e-06, "loss": 8.545, "step": 163090 }, { "epoch": 0.814502234762416, "grad_norm": 0.0929906815290451, "learning_rate": 5.578863050389247e-06, "loss": 8.5392, "step": 163100 }, { "epoch": 0.8145521735873555, "grad_norm": 0.09005864709615707, "learning_rate": 5.577361135447696e-06, "loss": 8.5423, "step": 163110 }, { "epoch": 0.814602112412295, "grad_norm": 0.09385271370410919, "learning_rate": 5.5758592205061455e-06, "loss": 8.5367, "step": 163120 }, { "epoch": 0.8146520512372344, "grad_norm": 0.09610309451818466, "learning_rate": 5.574357305564595e-06, "loss": 8.5475, "step": 163130 }, { "epoch": 0.8147019900621738, "grad_norm": 0.09091010689735413, "learning_rate": 5.572855390623044e-06, "loss": 8.5471, "step": 163140 }, { "epoch": 0.8147519288871133, "grad_norm": 0.09262845665216446, "learning_rate": 5.571353475681494e-06, "loss": 8.5337, "step": 163150 }, { "epoch": 0.8148018677120528, "grad_norm": 0.08759453892707825, "learning_rate": 5.569851560739944e-06, "loss": 8.5621, "step": 163160 }, { "epoch": 0.8148518065369922, "grad_norm": 0.08776848018169403, "learning_rate": 5.568349645798393e-06, "loss": 8.5529, "step": 163170 }, { "epoch": 0.8149017453619316, "grad_norm": 0.08764344453811646, "learning_rate": 5.566847730856843e-06, "loss": 8.5606, "step": 163180 }, { "epoch": 0.814951684186871, "grad_norm": 0.0934050902724266, "learning_rate": 5.565345815915292e-06, "loss": 8.5542, "step": 163190 }, { "epoch": 0.8150016230118106, "grad_norm": 0.08384846895933151, "learning_rate": 5.563843900973742e-06, "loss": 8.5444, "step": 163200 }, { "epoch": 0.81505156183675, "grad_norm": 0.0902908518910408, "learning_rate": 5.562341986032191e-06, "loss": 8.5412, "step": 163210 }, { "epoch": 0.8151015006616894, "grad_norm": 0.08977564424276352, "learning_rate": 5.5608400710906405e-06, "loss": 8.5599, "step": 163220 }, { "epoch": 0.8151514394866288, "grad_norm": 0.09766307473182678, "learning_rate": 5.559338156149091e-06, "loss": 8.5354, "step": 163230 }, { "epoch": 0.8152013783115684, "grad_norm": 0.08712057769298553, "learning_rate": 5.557836241207539e-06, "loss": 8.5372, "step": 163240 }, { "epoch": 0.8152513171365078, "grad_norm": 0.0951530709862709, "learning_rate": 5.556334326265989e-06, "loss": 8.5354, "step": 163250 }, { "epoch": 0.8153012559614472, "grad_norm": 0.09080353379249573, "learning_rate": 5.5548324113244396e-06, "loss": 8.5443, "step": 163260 }, { "epoch": 0.8153511947863866, "grad_norm": 0.09223667532205582, "learning_rate": 5.553330496382888e-06, "loss": 8.5453, "step": 163270 }, { "epoch": 0.8154011336113262, "grad_norm": 0.08771520853042603, "learning_rate": 5.551828581441338e-06, "loss": 8.5374, "step": 163280 }, { "epoch": 0.8154510724362656, "grad_norm": 0.08749589323997498, "learning_rate": 5.550326666499787e-06, "loss": 8.5422, "step": 163290 }, { "epoch": 0.815501011261205, "grad_norm": 0.08917036652565002, "learning_rate": 5.548824751558237e-06, "loss": 8.5481, "step": 163300 }, { "epoch": 0.8155509500861444, "grad_norm": 0.0868101641535759, "learning_rate": 5.547322836616687e-06, "loss": 8.5619, "step": 163310 }, { "epoch": 0.815600888911084, "grad_norm": 0.091803178191185, "learning_rate": 5.5458209216751355e-06, "loss": 8.5356, "step": 163320 }, { "epoch": 0.8156508277360234, "grad_norm": 0.09425832331180573, "learning_rate": 5.544319006733586e-06, "loss": 8.5293, "step": 163330 }, { "epoch": 0.8157007665609628, "grad_norm": 0.1101720854640007, "learning_rate": 5.542817091792035e-06, "loss": 8.544, "step": 163340 }, { "epoch": 0.8157507053859022, "grad_norm": 0.0971333235502243, "learning_rate": 5.541315176850484e-06, "loss": 8.5534, "step": 163350 }, { "epoch": 0.8158006442108418, "grad_norm": 0.09201129525899887, "learning_rate": 5.5398132619089346e-06, "loss": 8.5444, "step": 163360 }, { "epoch": 0.8158505830357812, "grad_norm": 0.08867859095335007, "learning_rate": 5.538311346967383e-06, "loss": 8.5282, "step": 163370 }, { "epoch": 0.8159005218607206, "grad_norm": 0.09262479841709137, "learning_rate": 5.536809432025833e-06, "loss": 8.5319, "step": 163380 }, { "epoch": 0.81595046068566, "grad_norm": 0.08705681562423706, "learning_rate": 5.5353075170842826e-06, "loss": 8.5469, "step": 163390 }, { "epoch": 0.8160003995105996, "grad_norm": 0.09213688969612122, "learning_rate": 5.533805602142732e-06, "loss": 8.5515, "step": 163400 }, { "epoch": 0.816050338335539, "grad_norm": 0.0938788652420044, "learning_rate": 5.532303687201182e-06, "loss": 8.538, "step": 163410 }, { "epoch": 0.8161002771604784, "grad_norm": 0.09791158139705658, "learning_rate": 5.530801772259631e-06, "loss": 8.5372, "step": 163420 }, { "epoch": 0.8161502159854178, "grad_norm": 0.09159670770168304, "learning_rate": 5.529299857318081e-06, "loss": 8.551, "step": 163430 }, { "epoch": 0.8162001548103573, "grad_norm": 0.08808984607458115, "learning_rate": 5.52779794237653e-06, "loss": 8.5498, "step": 163440 }, { "epoch": 0.8162500936352968, "grad_norm": 0.09138135612010956, "learning_rate": 5.526296027434979e-06, "loss": 8.5526, "step": 163450 }, { "epoch": 0.8163000324602362, "grad_norm": 0.08727425336837769, "learning_rate": 5.5247941124934296e-06, "loss": 8.5605, "step": 163460 }, { "epoch": 0.8163499712851756, "grad_norm": 0.0928439050912857, "learning_rate": 5.523292197551879e-06, "loss": 8.5574, "step": 163470 }, { "epoch": 0.8163999101101151, "grad_norm": 0.09845706075429916, "learning_rate": 5.521790282610328e-06, "loss": 8.5555, "step": 163480 }, { "epoch": 0.8164498489350546, "grad_norm": 0.09574224054813385, "learning_rate": 5.5202883676687776e-06, "loss": 8.5523, "step": 163490 }, { "epoch": 0.816499787759994, "grad_norm": 0.09411446005105972, "learning_rate": 5.518786452727228e-06, "loss": 8.5482, "step": 163500 }, { "epoch": 0.8165497265849334, "grad_norm": 0.09284447878599167, "learning_rate": 5.517284537785677e-06, "loss": 8.5463, "step": 163510 }, { "epoch": 0.8165996654098729, "grad_norm": 0.08727595210075378, "learning_rate": 5.515782622844126e-06, "loss": 8.5425, "step": 163520 }, { "epoch": 0.8166496042348124, "grad_norm": 0.09593432396650314, "learning_rate": 5.514280707902576e-06, "loss": 8.5428, "step": 163530 }, { "epoch": 0.8166995430597518, "grad_norm": 0.09068622440099716, "learning_rate": 5.512778792961025e-06, "loss": 8.5558, "step": 163540 }, { "epoch": 0.8167494818846912, "grad_norm": 0.09026969224214554, "learning_rate": 5.511276878019475e-06, "loss": 8.5397, "step": 163550 }, { "epoch": 0.8167994207096307, "grad_norm": 0.08629763126373291, "learning_rate": 5.509774963077925e-06, "loss": 8.5547, "step": 163560 }, { "epoch": 0.8168493595345702, "grad_norm": 0.09150740504264832, "learning_rate": 5.508273048136374e-06, "loss": 8.5438, "step": 163570 }, { "epoch": 0.8168992983595096, "grad_norm": 0.0950947180390358, "learning_rate": 5.506771133194824e-06, "loss": 8.556, "step": 163580 }, { "epoch": 0.816949237184449, "grad_norm": 0.0921635702252388, "learning_rate": 5.5052692182532726e-06, "loss": 8.5484, "step": 163590 }, { "epoch": 0.8169991760093885, "grad_norm": 0.08401766419410706, "learning_rate": 5.503767303311723e-06, "loss": 8.5567, "step": 163600 }, { "epoch": 0.817049114834328, "grad_norm": 0.09006239473819733, "learning_rate": 5.502265388370172e-06, "loss": 8.55, "step": 163610 }, { "epoch": 0.8170990536592674, "grad_norm": 0.08677154034376144, "learning_rate": 5.500763473428621e-06, "loss": 8.5348, "step": 163620 }, { "epoch": 0.8171489924842068, "grad_norm": 0.08495960384607315, "learning_rate": 5.499261558487072e-06, "loss": 8.5472, "step": 163630 }, { "epoch": 0.8171989313091463, "grad_norm": 0.08838516473770142, "learning_rate": 5.49775964354552e-06, "loss": 8.5546, "step": 163640 }, { "epoch": 0.8172488701340858, "grad_norm": 0.09089840948581696, "learning_rate": 5.49625772860397e-06, "loss": 8.5567, "step": 163650 }, { "epoch": 0.8172988089590252, "grad_norm": 0.08638277649879456, "learning_rate": 5.4947558136624204e-06, "loss": 8.5409, "step": 163660 }, { "epoch": 0.8173487477839646, "grad_norm": 0.09370175004005432, "learning_rate": 5.493253898720869e-06, "loss": 8.5309, "step": 163670 }, { "epoch": 0.8173986866089041, "grad_norm": 0.08680180460214615, "learning_rate": 5.491751983779319e-06, "loss": 8.5588, "step": 163680 }, { "epoch": 0.8174486254338436, "grad_norm": 0.09824364632368088, "learning_rate": 5.490250068837768e-06, "loss": 8.5466, "step": 163690 }, { "epoch": 0.817498564258783, "grad_norm": 0.09609740972518921, "learning_rate": 5.488748153896218e-06, "loss": 8.5486, "step": 163700 }, { "epoch": 0.8175485030837224, "grad_norm": 0.09737160056829453, "learning_rate": 5.487246238954668e-06, "loss": 8.5543, "step": 163710 }, { "epoch": 0.8175984419086619, "grad_norm": 0.09023190289735794, "learning_rate": 5.485744324013116e-06, "loss": 8.5539, "step": 163720 }, { "epoch": 0.8176483807336014, "grad_norm": 0.08571267127990723, "learning_rate": 5.484242409071567e-06, "loss": 8.5504, "step": 163730 }, { "epoch": 0.8176983195585408, "grad_norm": 0.08893782645463943, "learning_rate": 5.482740494130016e-06, "loss": 8.5526, "step": 163740 }, { "epoch": 0.8177482583834802, "grad_norm": 0.08946535736322403, "learning_rate": 5.481238579188465e-06, "loss": 8.5538, "step": 163750 }, { "epoch": 0.8177981972084197, "grad_norm": 0.09130077809095383, "learning_rate": 5.4797366642469154e-06, "loss": 8.5434, "step": 163760 }, { "epoch": 0.8178481360333592, "grad_norm": 0.08708508312702179, "learning_rate": 5.478234749305364e-06, "loss": 8.5406, "step": 163770 }, { "epoch": 0.8178980748582986, "grad_norm": 0.09231363236904144, "learning_rate": 5.476732834363814e-06, "loss": 8.5286, "step": 163780 }, { "epoch": 0.817948013683238, "grad_norm": 0.09548377245664597, "learning_rate": 5.4752309194222634e-06, "loss": 8.5447, "step": 163790 }, { "epoch": 0.8179979525081775, "grad_norm": 0.08776544034481049, "learning_rate": 5.473729004480713e-06, "loss": 8.5347, "step": 163800 }, { "epoch": 0.818047891333117, "grad_norm": 0.09309916198253632, "learning_rate": 5.472227089539163e-06, "loss": 8.5546, "step": 163810 }, { "epoch": 0.8180978301580564, "grad_norm": 0.09029585868120193, "learning_rate": 5.4707251745976114e-06, "loss": 8.531, "step": 163820 }, { "epoch": 0.8181477689829958, "grad_norm": 0.08917348831892014, "learning_rate": 5.469223259656062e-06, "loss": 8.5553, "step": 163830 }, { "epoch": 0.8181977078079353, "grad_norm": 0.09531484544277191, "learning_rate": 5.467721344714511e-06, "loss": 8.5415, "step": 163840 }, { "epoch": 0.8182476466328747, "grad_norm": 0.08932767063379288, "learning_rate": 5.46621942977296e-06, "loss": 8.5473, "step": 163850 }, { "epoch": 0.8182975854578142, "grad_norm": 0.08596362173557281, "learning_rate": 5.4647175148314104e-06, "loss": 8.552, "step": 163860 }, { "epoch": 0.8183475242827536, "grad_norm": 0.09185688197612762, "learning_rate": 5.46321559988986e-06, "loss": 8.5515, "step": 163870 }, { "epoch": 0.8183974631076931, "grad_norm": 0.09344456344842911, "learning_rate": 5.461713684948309e-06, "loss": 8.5385, "step": 163880 }, { "epoch": 0.8184474019326325, "grad_norm": 0.08724787086248398, "learning_rate": 5.4602117700067584e-06, "loss": 8.5502, "step": 163890 }, { "epoch": 0.818497340757572, "grad_norm": 0.09026943147182465, "learning_rate": 5.458709855065208e-06, "loss": 8.5269, "step": 163900 }, { "epoch": 0.8185472795825114, "grad_norm": 0.0960424542427063, "learning_rate": 5.457207940123658e-06, "loss": 8.5535, "step": 163910 }, { "epoch": 0.8185972184074509, "grad_norm": 0.09598157554864883, "learning_rate": 5.455706025182107e-06, "loss": 8.5696, "step": 163920 }, { "epoch": 0.8186471572323903, "grad_norm": 0.10426607728004456, "learning_rate": 5.454204110240557e-06, "loss": 8.5406, "step": 163930 }, { "epoch": 0.8186970960573298, "grad_norm": 0.09276427328586578, "learning_rate": 5.452702195299007e-06, "loss": 8.5369, "step": 163940 }, { "epoch": 0.8187470348822692, "grad_norm": 0.09880458563566208, "learning_rate": 5.451200280357456e-06, "loss": 8.5536, "step": 163950 }, { "epoch": 0.8187969737072087, "grad_norm": 0.09303481876850128, "learning_rate": 5.4496983654159055e-06, "loss": 8.5303, "step": 163960 }, { "epoch": 0.8188469125321481, "grad_norm": 0.09363751113414764, "learning_rate": 5.448196450474355e-06, "loss": 8.5507, "step": 163970 }, { "epoch": 0.8188968513570876, "grad_norm": 0.09529575705528259, "learning_rate": 5.446694535532804e-06, "loss": 8.5431, "step": 163980 }, { "epoch": 0.818946790182027, "grad_norm": 0.09039454907178879, "learning_rate": 5.445192620591254e-06, "loss": 8.5674, "step": 163990 }, { "epoch": 0.8189967290069665, "grad_norm": 0.08885947614908218, "learning_rate": 5.443690705649704e-06, "loss": 8.5497, "step": 164000 }, { "epoch": 0.8190466678319059, "grad_norm": 0.09177469462156296, "learning_rate": 5.442188790708153e-06, "loss": 8.5541, "step": 164010 }, { "epoch": 0.8190966066568454, "grad_norm": 0.08675147593021393, "learning_rate": 5.440686875766602e-06, "loss": 8.5576, "step": 164020 }, { "epoch": 0.8191465454817848, "grad_norm": 0.09036063402891159, "learning_rate": 5.4391849608250525e-06, "loss": 8.5723, "step": 164030 }, { "epoch": 0.8191964843067243, "grad_norm": 0.08989081531763077, "learning_rate": 5.437683045883502e-06, "loss": 8.5417, "step": 164040 }, { "epoch": 0.8192464231316637, "grad_norm": 0.0942939966917038, "learning_rate": 5.436181130941951e-06, "loss": 8.5311, "step": 164050 }, { "epoch": 0.8192963619566032, "grad_norm": 0.0847172811627388, "learning_rate": 5.4346792160004005e-06, "loss": 8.5449, "step": 164060 }, { "epoch": 0.8193463007815426, "grad_norm": 0.09628713876008987, "learning_rate": 5.43317730105885e-06, "loss": 8.5535, "step": 164070 }, { "epoch": 0.8193962396064821, "grad_norm": 0.08984513580799103, "learning_rate": 5.4316753861173e-06, "loss": 8.5574, "step": 164080 }, { "epoch": 0.8194461784314215, "grad_norm": 0.0882403776049614, "learning_rate": 5.430173471175749e-06, "loss": 8.5421, "step": 164090 }, { "epoch": 0.819496117256361, "grad_norm": 0.09667932987213135, "learning_rate": 5.428671556234199e-06, "loss": 8.5527, "step": 164100 }, { "epoch": 0.8195460560813004, "grad_norm": 0.08921948820352554, "learning_rate": 5.427169641292649e-06, "loss": 8.5558, "step": 164110 }, { "epoch": 0.8195959949062399, "grad_norm": 0.08947332203388214, "learning_rate": 5.425667726351097e-06, "loss": 8.5428, "step": 164120 }, { "epoch": 0.8196459337311793, "grad_norm": 0.09034401178359985, "learning_rate": 5.4241658114095475e-06, "loss": 8.5296, "step": 164130 }, { "epoch": 0.8196958725561188, "grad_norm": 0.0902322456240654, "learning_rate": 5.422663896467997e-06, "loss": 8.5575, "step": 164140 }, { "epoch": 0.8197458113810582, "grad_norm": 0.09341645240783691, "learning_rate": 5.421161981526446e-06, "loss": 8.5492, "step": 164150 }, { "epoch": 0.8197957502059976, "grad_norm": 0.09417828172445297, "learning_rate": 5.419660066584896e-06, "loss": 8.5322, "step": 164160 }, { "epoch": 0.8198456890309371, "grad_norm": 0.09243685007095337, "learning_rate": 5.418158151643345e-06, "loss": 8.5467, "step": 164170 }, { "epoch": 0.8198956278558766, "grad_norm": 0.09273440390825272, "learning_rate": 5.416656236701795e-06, "loss": 8.5316, "step": 164180 }, { "epoch": 0.819945566680816, "grad_norm": 0.09528848528862, "learning_rate": 5.415154321760245e-06, "loss": 8.5459, "step": 164190 }, { "epoch": 0.8199955055057554, "grad_norm": 0.08874344080686569, "learning_rate": 5.413652406818694e-06, "loss": 8.5414, "step": 164200 }, { "epoch": 0.8200454443306949, "grad_norm": 0.09056615829467773, "learning_rate": 5.412150491877144e-06, "loss": 8.5484, "step": 164210 }, { "epoch": 0.8200953831556343, "grad_norm": 0.09031280875205994, "learning_rate": 5.410648576935592e-06, "loss": 8.5455, "step": 164220 }, { "epoch": 0.8201453219805738, "grad_norm": 0.09306702762842178, "learning_rate": 5.4091466619940425e-06, "loss": 8.547, "step": 164230 }, { "epoch": 0.8201952608055132, "grad_norm": 0.09164511412382126, "learning_rate": 5.407644747052493e-06, "loss": 8.556, "step": 164240 }, { "epoch": 0.8202451996304527, "grad_norm": 0.10014204680919647, "learning_rate": 5.406142832110941e-06, "loss": 8.546, "step": 164250 }, { "epoch": 0.8202951384553921, "grad_norm": 0.09262999892234802, "learning_rate": 5.404640917169391e-06, "loss": 8.5525, "step": 164260 }, { "epoch": 0.8203450772803316, "grad_norm": 0.09438300132751465, "learning_rate": 5.403139002227841e-06, "loss": 8.5283, "step": 164270 }, { "epoch": 0.820395016105271, "grad_norm": 0.09390977770090103, "learning_rate": 5.40163708728629e-06, "loss": 8.5388, "step": 164280 }, { "epoch": 0.8204449549302105, "grad_norm": 0.09154833853244781, "learning_rate": 5.40013517234474e-06, "loss": 8.5558, "step": 164290 }, { "epoch": 0.82049489375515, "grad_norm": 0.09532206505537033, "learning_rate": 5.398633257403189e-06, "loss": 8.5325, "step": 164300 }, { "epoch": 0.8205448325800894, "grad_norm": 0.08532291650772095, "learning_rate": 5.397131342461639e-06, "loss": 8.5563, "step": 164310 }, { "epoch": 0.8205947714050288, "grad_norm": 0.10053151100873947, "learning_rate": 5.395629427520088e-06, "loss": 8.5493, "step": 164320 }, { "epoch": 0.8206447102299683, "grad_norm": 0.08957508206367493, "learning_rate": 5.3941275125785375e-06, "loss": 8.5511, "step": 164330 }, { "epoch": 0.8206946490549077, "grad_norm": 0.08716392517089844, "learning_rate": 5.392625597636988e-06, "loss": 8.5514, "step": 164340 }, { "epoch": 0.8207445878798472, "grad_norm": 0.09494642913341522, "learning_rate": 5.391123682695437e-06, "loss": 8.5466, "step": 164350 }, { "epoch": 0.8207945267047866, "grad_norm": 0.09567490220069885, "learning_rate": 5.389621767753886e-06, "loss": 8.5463, "step": 164360 }, { "epoch": 0.8208444655297261, "grad_norm": 0.09385741502046585, "learning_rate": 5.388119852812336e-06, "loss": 8.5292, "step": 164370 }, { "epoch": 0.8208944043546655, "grad_norm": 0.09710198640823364, "learning_rate": 5.386617937870785e-06, "loss": 8.5374, "step": 164380 }, { "epoch": 0.820944343179605, "grad_norm": 0.09070218354463577, "learning_rate": 5.385116022929235e-06, "loss": 8.5362, "step": 164390 }, { "epoch": 0.8209942820045444, "grad_norm": 0.09835637360811234, "learning_rate": 5.3836141079876845e-06, "loss": 8.5439, "step": 164400 }, { "epoch": 0.8210442208294839, "grad_norm": 0.09094212204217911, "learning_rate": 5.382112193046134e-06, "loss": 8.5619, "step": 164410 }, { "epoch": 0.8210941596544233, "grad_norm": 0.09400885552167892, "learning_rate": 5.380610278104583e-06, "loss": 8.5419, "step": 164420 }, { "epoch": 0.8211440984793628, "grad_norm": 0.09460953623056412, "learning_rate": 5.379108363163033e-06, "loss": 8.5401, "step": 164430 }, { "epoch": 0.8211940373043022, "grad_norm": 0.08946454524993896, "learning_rate": 5.377606448221483e-06, "loss": 8.5356, "step": 164440 }, { "epoch": 0.8212439761292417, "grad_norm": 0.08983763307332993, "learning_rate": 5.376104533279932e-06, "loss": 8.5455, "step": 164450 }, { "epoch": 0.8212939149541811, "grad_norm": 0.08810096234083176, "learning_rate": 5.374602618338381e-06, "loss": 8.5647, "step": 164460 }, { "epoch": 0.8213438537791206, "grad_norm": 0.08846630901098251, "learning_rate": 5.373100703396831e-06, "loss": 8.5521, "step": 164470 }, { "epoch": 0.82139379260406, "grad_norm": 0.08851570636034012, "learning_rate": 5.371598788455281e-06, "loss": 8.5412, "step": 164480 }, { "epoch": 0.8214437314289995, "grad_norm": 0.09497125446796417, "learning_rate": 5.37009687351373e-06, "loss": 8.5321, "step": 164490 }, { "epoch": 0.8214936702539389, "grad_norm": 0.09288336336612701, "learning_rate": 5.3685949585721795e-06, "loss": 8.5472, "step": 164500 }, { "epoch": 0.8215436090788784, "grad_norm": 0.0919954776763916, "learning_rate": 5.36709304363063e-06, "loss": 8.5613, "step": 164510 }, { "epoch": 0.8215935479038178, "grad_norm": 0.09269890189170837, "learning_rate": 5.365591128689078e-06, "loss": 8.546, "step": 164520 }, { "epoch": 0.8216434867287573, "grad_norm": 0.08948935568332672, "learning_rate": 5.364089213747528e-06, "loss": 8.5627, "step": 164530 }, { "epoch": 0.8216934255536967, "grad_norm": 0.09390202164649963, "learning_rate": 5.362587298805978e-06, "loss": 8.5363, "step": 164540 }, { "epoch": 0.8217433643786362, "grad_norm": 0.09372469782829285, "learning_rate": 5.361085383864427e-06, "loss": 8.5518, "step": 164550 }, { "epoch": 0.8217933032035756, "grad_norm": 0.0866970643401146, "learning_rate": 5.359583468922877e-06, "loss": 8.5598, "step": 164560 }, { "epoch": 0.8218432420285151, "grad_norm": 0.08886361867189407, "learning_rate": 5.358081553981326e-06, "loss": 8.5504, "step": 164570 }, { "epoch": 0.8218931808534545, "grad_norm": 0.09430024772882462, "learning_rate": 5.356579639039776e-06, "loss": 8.5434, "step": 164580 }, { "epoch": 0.821943119678394, "grad_norm": 0.09142236411571503, "learning_rate": 5.355077724098226e-06, "loss": 8.5417, "step": 164590 }, { "epoch": 0.8219930585033334, "grad_norm": 0.09642058610916138, "learning_rate": 5.3535758091566745e-06, "loss": 8.5808, "step": 164600 }, { "epoch": 0.8220429973282729, "grad_norm": 0.08818865567445755, "learning_rate": 5.352073894215125e-06, "loss": 8.5353, "step": 164610 }, { "epoch": 0.8220929361532123, "grad_norm": 0.09273871034383774, "learning_rate": 5.350571979273573e-06, "loss": 8.5444, "step": 164620 }, { "epoch": 0.8221428749781517, "grad_norm": 0.09132785350084305, "learning_rate": 5.349070064332023e-06, "loss": 8.5611, "step": 164630 }, { "epoch": 0.8221928138030912, "grad_norm": 0.09053760766983032, "learning_rate": 5.3475681493904735e-06, "loss": 8.5465, "step": 164640 }, { "epoch": 0.8222427526280307, "grad_norm": 0.1000586673617363, "learning_rate": 5.346066234448922e-06, "loss": 8.5449, "step": 164650 }, { "epoch": 0.8222926914529701, "grad_norm": 0.09241247922182083, "learning_rate": 5.344564319507372e-06, "loss": 8.5303, "step": 164660 }, { "epoch": 0.8223426302779095, "grad_norm": 0.09306442737579346, "learning_rate": 5.3430624045658215e-06, "loss": 8.5248, "step": 164670 }, { "epoch": 0.822392569102849, "grad_norm": 0.09631567448377609, "learning_rate": 5.341560489624271e-06, "loss": 8.5448, "step": 164680 }, { "epoch": 0.8224425079277885, "grad_norm": 0.09375005215406418, "learning_rate": 5.340058574682721e-06, "loss": 8.5468, "step": 164690 }, { "epoch": 0.8224924467527279, "grad_norm": 0.08916940540075302, "learning_rate": 5.3385566597411695e-06, "loss": 8.5525, "step": 164700 }, { "epoch": 0.8225423855776673, "grad_norm": 0.09325413405895233, "learning_rate": 5.33705474479962e-06, "loss": 8.5474, "step": 164710 }, { "epoch": 0.8225923244026068, "grad_norm": 0.08860702812671661, "learning_rate": 5.335552829858069e-06, "loss": 8.5582, "step": 164720 }, { "epoch": 0.8226422632275463, "grad_norm": 0.09240319579839706, "learning_rate": 5.334050914916518e-06, "loss": 8.5351, "step": 164730 }, { "epoch": 0.8226922020524857, "grad_norm": 0.09109298884868622, "learning_rate": 5.3325489999749685e-06, "loss": 8.5478, "step": 164740 }, { "epoch": 0.8227421408774251, "grad_norm": 0.09379500150680542, "learning_rate": 5.331047085033418e-06, "loss": 8.5532, "step": 164750 }, { "epoch": 0.8227920797023646, "grad_norm": 0.08987203985452652, "learning_rate": 5.329545170091867e-06, "loss": 8.554, "step": 164760 }, { "epoch": 0.8228420185273041, "grad_norm": 0.08933199197053909, "learning_rate": 5.3280432551503165e-06, "loss": 8.5402, "step": 164770 }, { "epoch": 0.8228919573522435, "grad_norm": 0.08755306154489517, "learning_rate": 5.326541340208766e-06, "loss": 8.5478, "step": 164780 }, { "epoch": 0.8229418961771829, "grad_norm": 0.0940420925617218, "learning_rate": 5.325039425267216e-06, "loss": 8.5383, "step": 164790 }, { "epoch": 0.8229918350021224, "grad_norm": 0.08608761429786682, "learning_rate": 5.323537510325665e-06, "loss": 8.5539, "step": 164800 }, { "epoch": 0.8230417738270619, "grad_norm": 0.0908665880560875, "learning_rate": 5.322035595384115e-06, "loss": 8.5249, "step": 164810 }, { "epoch": 0.8230917126520013, "grad_norm": 0.08952327817678452, "learning_rate": 5.320533680442564e-06, "loss": 8.5568, "step": 164820 }, { "epoch": 0.8231416514769407, "grad_norm": 0.09053060412406921, "learning_rate": 5.319031765501014e-06, "loss": 8.5554, "step": 164830 }, { "epoch": 0.8231915903018802, "grad_norm": 0.08716212213039398, "learning_rate": 5.3175298505594636e-06, "loss": 8.5512, "step": 164840 }, { "epoch": 0.8232415291268197, "grad_norm": 0.08893406391143799, "learning_rate": 5.316027935617913e-06, "loss": 8.557, "step": 164850 }, { "epoch": 0.8232914679517591, "grad_norm": 0.09567388892173767, "learning_rate": 5.314526020676362e-06, "loss": 8.5658, "step": 164860 }, { "epoch": 0.8233414067766985, "grad_norm": 0.09032758325338364, "learning_rate": 5.3130241057348115e-06, "loss": 8.5635, "step": 164870 }, { "epoch": 0.823391345601638, "grad_norm": 0.0944572165608406, "learning_rate": 5.311522190793262e-06, "loss": 8.5377, "step": 164880 }, { "epoch": 0.8234412844265775, "grad_norm": 0.08695536106824875, "learning_rate": 5.310020275851711e-06, "loss": 8.5491, "step": 164890 }, { "epoch": 0.8234912232515169, "grad_norm": 0.0900052860379219, "learning_rate": 5.30851836091016e-06, "loss": 8.5341, "step": 164900 }, { "epoch": 0.8235411620764563, "grad_norm": 0.09355796128511429, "learning_rate": 5.3070164459686106e-06, "loss": 8.5427, "step": 164910 }, { "epoch": 0.8235911009013958, "grad_norm": 0.09041264653205872, "learning_rate": 5.305514531027059e-06, "loss": 8.5535, "step": 164920 }, { "epoch": 0.8236410397263353, "grad_norm": 0.09315498918294907, "learning_rate": 5.304012616085509e-06, "loss": 8.5472, "step": 164930 }, { "epoch": 0.8236909785512747, "grad_norm": 0.08934438228607178, "learning_rate": 5.3025107011439586e-06, "loss": 8.5446, "step": 164940 }, { "epoch": 0.8237409173762141, "grad_norm": 0.09067961573600769, "learning_rate": 5.301008786202408e-06, "loss": 8.5461, "step": 164950 }, { "epoch": 0.8237908562011536, "grad_norm": 0.08916930854320526, "learning_rate": 5.299506871260858e-06, "loss": 8.5513, "step": 164960 }, { "epoch": 0.8238407950260931, "grad_norm": 0.09387677907943726, "learning_rate": 5.298004956319307e-06, "loss": 8.5447, "step": 164970 }, { "epoch": 0.8238907338510325, "grad_norm": 0.08811572939157486, "learning_rate": 5.296503041377757e-06, "loss": 8.5657, "step": 164980 }, { "epoch": 0.8239406726759719, "grad_norm": 0.09204726666212082, "learning_rate": 5.295001126436207e-06, "loss": 8.5476, "step": 164990 }, { "epoch": 0.8239906115009114, "grad_norm": 0.0907144770026207, "learning_rate": 5.293499211494655e-06, "loss": 8.5335, "step": 165000 }, { "epoch": 0.8240405503258509, "grad_norm": 0.09380396455526352, "learning_rate": 5.2919972965531056e-06, "loss": 8.5313, "step": 165010 }, { "epoch": 0.8240904891507903, "grad_norm": 0.09004945307970047, "learning_rate": 5.290495381611555e-06, "loss": 8.5442, "step": 165020 }, { "epoch": 0.8241404279757297, "grad_norm": 0.08934416621923447, "learning_rate": 5.288993466670004e-06, "loss": 8.5422, "step": 165030 }, { "epoch": 0.8241903668006691, "grad_norm": 0.08731923997402191, "learning_rate": 5.287491551728454e-06, "loss": 8.5354, "step": 165040 }, { "epoch": 0.8242403056256087, "grad_norm": 0.09137827903032303, "learning_rate": 5.285989636786903e-06, "loss": 8.5595, "step": 165050 }, { "epoch": 0.8242902444505481, "grad_norm": 0.09268289804458618, "learning_rate": 5.284487721845353e-06, "loss": 8.559, "step": 165060 }, { "epoch": 0.8243401832754875, "grad_norm": 0.0933336690068245, "learning_rate": 5.282985806903803e-06, "loss": 8.535, "step": 165070 }, { "epoch": 0.824390122100427, "grad_norm": 0.09035741537809372, "learning_rate": 5.281483891962252e-06, "loss": 8.546, "step": 165080 }, { "epoch": 0.8244400609253665, "grad_norm": 0.08535844087600708, "learning_rate": 5.279981977020702e-06, "loss": 8.5388, "step": 165090 }, { "epoch": 0.8244899997503059, "grad_norm": 0.09241851419210434, "learning_rate": 5.27848006207915e-06, "loss": 8.5666, "step": 165100 }, { "epoch": 0.8245399385752453, "grad_norm": 0.09012720733880997, "learning_rate": 5.276978147137601e-06, "loss": 8.5522, "step": 165110 }, { "epoch": 0.8245898774001847, "grad_norm": 0.09050553292036057, "learning_rate": 5.275476232196051e-06, "loss": 8.5381, "step": 165120 }, { "epoch": 0.8246398162251243, "grad_norm": 0.09144661575555801, "learning_rate": 5.273974317254499e-06, "loss": 8.5375, "step": 165130 }, { "epoch": 0.8246897550500637, "grad_norm": 0.08838358521461487, "learning_rate": 5.272472402312949e-06, "loss": 8.5498, "step": 165140 }, { "epoch": 0.8247396938750031, "grad_norm": 0.09238158166408539, "learning_rate": 5.270970487371399e-06, "loss": 8.538, "step": 165150 }, { "epoch": 0.8247896326999425, "grad_norm": 0.09907042235136032, "learning_rate": 5.269468572429848e-06, "loss": 8.5508, "step": 165160 }, { "epoch": 0.824839571524882, "grad_norm": 0.08940906822681427, "learning_rate": 5.267966657488298e-06, "loss": 8.5438, "step": 165170 }, { "epoch": 0.8248895103498215, "grad_norm": 0.09530402719974518, "learning_rate": 5.266464742546747e-06, "loss": 8.5375, "step": 165180 }, { "epoch": 0.8249394491747609, "grad_norm": 0.0869201272726059, "learning_rate": 5.264962827605197e-06, "loss": 8.5371, "step": 165190 }, { "epoch": 0.8249893879997003, "grad_norm": 0.08808686584234238, "learning_rate": 5.263460912663646e-06, "loss": 8.5476, "step": 165200 }, { "epoch": 0.8250393268246398, "grad_norm": 0.09098785370588303, "learning_rate": 5.261958997722096e-06, "loss": 8.5324, "step": 165210 }, { "epoch": 0.8250892656495793, "grad_norm": 0.09204449504613876, "learning_rate": 5.260457082780546e-06, "loss": 8.5425, "step": 165220 }, { "epoch": 0.8251392044745187, "grad_norm": 0.09334173798561096, "learning_rate": 5.258955167838995e-06, "loss": 8.5315, "step": 165230 }, { "epoch": 0.8251891432994581, "grad_norm": 0.08749334514141083, "learning_rate": 5.2574532528974444e-06, "loss": 8.5364, "step": 165240 }, { "epoch": 0.8252390821243976, "grad_norm": 0.09419584274291992, "learning_rate": 5.255951337955894e-06, "loss": 8.5426, "step": 165250 }, { "epoch": 0.8252890209493371, "grad_norm": 0.09130842983722687, "learning_rate": 5.254449423014343e-06, "loss": 8.5287, "step": 165260 }, { "epoch": 0.8253389597742765, "grad_norm": 0.0886424109339714, "learning_rate": 5.252947508072793e-06, "loss": 8.5396, "step": 165270 }, { "epoch": 0.8253888985992159, "grad_norm": 0.09385290741920471, "learning_rate": 5.251445593131243e-06, "loss": 8.5361, "step": 165280 }, { "epoch": 0.8254388374241554, "grad_norm": 0.0890863835811615, "learning_rate": 5.249943678189692e-06, "loss": 8.5467, "step": 165290 }, { "epoch": 0.8254887762490949, "grad_norm": 0.08851329982280731, "learning_rate": 5.248441763248141e-06, "loss": 8.5532, "step": 165300 }, { "epoch": 0.8255387150740343, "grad_norm": 0.09374735504388809, "learning_rate": 5.2469398483065914e-06, "loss": 8.5461, "step": 165310 }, { "epoch": 0.8255886538989737, "grad_norm": 0.08806613087654114, "learning_rate": 5.245437933365041e-06, "loss": 8.5575, "step": 165320 }, { "epoch": 0.8256385927239132, "grad_norm": 0.09200981259346008, "learning_rate": 5.24393601842349e-06, "loss": 8.5345, "step": 165330 }, { "epoch": 0.8256885315488527, "grad_norm": 0.08689451217651367, "learning_rate": 5.2424341034819394e-06, "loss": 8.5552, "step": 165340 }, { "epoch": 0.8257384703737921, "grad_norm": 0.08550180494785309, "learning_rate": 5.240932188540389e-06, "loss": 8.5494, "step": 165350 }, { "epoch": 0.8257884091987315, "grad_norm": 0.09254343062639236, "learning_rate": 5.239430273598839e-06, "loss": 8.5475, "step": 165360 }, { "epoch": 0.825838348023671, "grad_norm": 0.08904233574867249, "learning_rate": 5.237928358657288e-06, "loss": 8.5499, "step": 165370 }, { "epoch": 0.8258882868486105, "grad_norm": 0.09471232444047928, "learning_rate": 5.236426443715738e-06, "loss": 8.5489, "step": 165380 }, { "epoch": 0.8259382256735499, "grad_norm": 0.09248729050159454, "learning_rate": 5.234924528774188e-06, "loss": 8.5425, "step": 165390 }, { "epoch": 0.8259881644984893, "grad_norm": 0.09157528728246689, "learning_rate": 5.233422613832636e-06, "loss": 8.5438, "step": 165400 }, { "epoch": 0.8260381033234288, "grad_norm": 0.09400694817304611, "learning_rate": 5.2319206988910864e-06, "loss": 8.5369, "step": 165410 }, { "epoch": 0.8260880421483683, "grad_norm": 0.10043618083000183, "learning_rate": 5.230418783949536e-06, "loss": 8.5332, "step": 165420 }, { "epoch": 0.8261379809733077, "grad_norm": 0.09534844756126404, "learning_rate": 5.228916869007985e-06, "loss": 8.5501, "step": 165430 }, { "epoch": 0.8261879197982471, "grad_norm": 0.0883263498544693, "learning_rate": 5.227414954066435e-06, "loss": 8.5382, "step": 165440 }, { "epoch": 0.8262378586231865, "grad_norm": 0.08956603705883026, "learning_rate": 5.225913039124884e-06, "loss": 8.5308, "step": 165450 }, { "epoch": 0.8262877974481261, "grad_norm": 0.09134762734174728, "learning_rate": 5.224411124183334e-06, "loss": 8.5469, "step": 165460 }, { "epoch": 0.8263377362730655, "grad_norm": 0.09247227758169174, "learning_rate": 5.222909209241784e-06, "loss": 8.5423, "step": 165470 }, { "epoch": 0.8263876750980049, "grad_norm": 0.09392131119966507, "learning_rate": 5.221407294300233e-06, "loss": 8.5436, "step": 165480 }, { "epoch": 0.8264376139229443, "grad_norm": 0.0913129672408104, "learning_rate": 5.219905379358683e-06, "loss": 8.5342, "step": 165490 }, { "epoch": 0.8264875527478839, "grad_norm": 0.09089593589305878, "learning_rate": 5.218403464417131e-06, "loss": 8.5391, "step": 165500 }, { "epoch": 0.8265374915728233, "grad_norm": 0.08943469077348709, "learning_rate": 5.2169015494755815e-06, "loss": 8.537, "step": 165510 }, { "epoch": 0.8265874303977627, "grad_norm": 0.09096402674913406, "learning_rate": 5.215399634534032e-06, "loss": 8.5331, "step": 165520 }, { "epoch": 0.8266373692227021, "grad_norm": 0.09284608066082001, "learning_rate": 5.21389771959248e-06, "loss": 8.5359, "step": 165530 }, { "epoch": 0.8266873080476417, "grad_norm": 0.08914735913276672, "learning_rate": 5.21239580465093e-06, "loss": 8.5522, "step": 165540 }, { "epoch": 0.8267372468725811, "grad_norm": 0.10136851668357849, "learning_rate": 5.21089388970938e-06, "loss": 8.5232, "step": 165550 }, { "epoch": 0.8267871856975205, "grad_norm": 0.09322341531515121, "learning_rate": 5.209391974767829e-06, "loss": 8.5354, "step": 165560 }, { "epoch": 0.8268371245224599, "grad_norm": 0.09235122054815292, "learning_rate": 5.207890059826279e-06, "loss": 8.5405, "step": 165570 }, { "epoch": 0.8268870633473995, "grad_norm": 0.09125807136297226, "learning_rate": 5.206388144884728e-06, "loss": 8.5406, "step": 165580 }, { "epoch": 0.8269370021723389, "grad_norm": 0.09730856120586395, "learning_rate": 5.204886229943178e-06, "loss": 8.5739, "step": 165590 }, { "epoch": 0.8269869409972783, "grad_norm": 0.09331224858760834, "learning_rate": 5.203384315001627e-06, "loss": 8.5544, "step": 165600 }, { "epoch": 0.8270368798222177, "grad_norm": 0.09145566821098328, "learning_rate": 5.2018824000600765e-06, "loss": 8.5305, "step": 165610 }, { "epoch": 0.8270868186471573, "grad_norm": 0.09408487379550934, "learning_rate": 5.200380485118527e-06, "loss": 8.536, "step": 165620 }, { "epoch": 0.8271367574720967, "grad_norm": 0.09319987148046494, "learning_rate": 5.198878570176976e-06, "loss": 8.5615, "step": 165630 }, { "epoch": 0.8271866962970361, "grad_norm": 0.08938295394182205, "learning_rate": 5.197376655235425e-06, "loss": 8.5486, "step": 165640 }, { "epoch": 0.8272366351219755, "grad_norm": 0.09183337539434433, "learning_rate": 5.195874740293875e-06, "loss": 8.5406, "step": 165650 }, { "epoch": 0.8272865739469151, "grad_norm": 0.09237658977508545, "learning_rate": 5.194372825352324e-06, "loss": 8.5519, "step": 165660 }, { "epoch": 0.8273365127718545, "grad_norm": 0.08938465267419815, "learning_rate": 5.192870910410774e-06, "loss": 8.5421, "step": 165670 }, { "epoch": 0.8273864515967939, "grad_norm": 0.093377985060215, "learning_rate": 5.1913689954692235e-06, "loss": 8.5533, "step": 165680 }, { "epoch": 0.8274363904217333, "grad_norm": 0.09096071869134903, "learning_rate": 5.189867080527673e-06, "loss": 8.5406, "step": 165690 }, { "epoch": 0.8274863292466729, "grad_norm": 0.08967030048370361, "learning_rate": 5.188365165586122e-06, "loss": 8.5387, "step": 165700 }, { "epoch": 0.8275362680716123, "grad_norm": 0.09120657294988632, "learning_rate": 5.186863250644572e-06, "loss": 8.5298, "step": 165710 }, { "epoch": 0.8275862068965517, "grad_norm": 0.08661961555480957, "learning_rate": 5.185361335703022e-06, "loss": 8.538, "step": 165720 }, { "epoch": 0.8276361457214911, "grad_norm": 0.0883423313498497, "learning_rate": 5.183859420761471e-06, "loss": 8.5444, "step": 165730 }, { "epoch": 0.8276860845464307, "grad_norm": 0.09612096101045609, "learning_rate": 5.18235750581992e-06, "loss": 8.5432, "step": 165740 }, { "epoch": 0.8277360233713701, "grad_norm": 0.08954691141843796, "learning_rate": 5.18085559087837e-06, "loss": 8.5485, "step": 165750 }, { "epoch": 0.8277859621963095, "grad_norm": 0.09344494342803955, "learning_rate": 5.17935367593682e-06, "loss": 8.5501, "step": 165760 }, { "epoch": 0.8278359010212489, "grad_norm": 0.09104925394058228, "learning_rate": 5.177851760995269e-06, "loss": 8.5515, "step": 165770 }, { "epoch": 0.8278858398461885, "grad_norm": 0.09306161850690842, "learning_rate": 5.1763498460537185e-06, "loss": 8.5442, "step": 165780 }, { "epoch": 0.8279357786711279, "grad_norm": 0.08997157216072083, "learning_rate": 5.174847931112169e-06, "loss": 8.5439, "step": 165790 }, { "epoch": 0.8279857174960673, "grad_norm": 0.08354370296001434, "learning_rate": 5.173346016170617e-06, "loss": 8.5438, "step": 165800 }, { "epoch": 0.8280356563210067, "grad_norm": 0.09156732261180878, "learning_rate": 5.171844101229067e-06, "loss": 8.5204, "step": 165810 }, { "epoch": 0.8280855951459463, "grad_norm": 0.09536905586719513, "learning_rate": 5.170342186287517e-06, "loss": 8.5429, "step": 165820 }, { "epoch": 0.8281355339708857, "grad_norm": 0.09520316123962402, "learning_rate": 5.168840271345966e-06, "loss": 8.5403, "step": 165830 }, { "epoch": 0.8281854727958251, "grad_norm": 0.08938983827829361, "learning_rate": 5.167338356404416e-06, "loss": 8.5504, "step": 165840 }, { "epoch": 0.8282354116207645, "grad_norm": 0.09314066916704178, "learning_rate": 5.165836441462865e-06, "loss": 8.5305, "step": 165850 }, { "epoch": 0.8282853504457041, "grad_norm": 0.09131036698818207, "learning_rate": 5.164334526521315e-06, "loss": 8.5455, "step": 165860 }, { "epoch": 0.8283352892706435, "grad_norm": 0.09065386652946472, "learning_rate": 5.162832611579765e-06, "loss": 8.5364, "step": 165870 }, { "epoch": 0.8283852280955829, "grad_norm": 0.08912426233291626, "learning_rate": 5.1613306966382135e-06, "loss": 8.5208, "step": 165880 }, { "epoch": 0.8284351669205223, "grad_norm": 0.0999988466501236, "learning_rate": 5.159828781696664e-06, "loss": 8.5334, "step": 165890 }, { "epoch": 0.8284851057454619, "grad_norm": 0.09201761335134506, "learning_rate": 5.158326866755112e-06, "loss": 8.5401, "step": 165900 }, { "epoch": 0.8285350445704013, "grad_norm": 0.08930778503417969, "learning_rate": 5.156824951813562e-06, "loss": 8.5424, "step": 165910 }, { "epoch": 0.8285849833953407, "grad_norm": 0.08692941069602966, "learning_rate": 5.1553230368720125e-06, "loss": 8.554, "step": 165920 }, { "epoch": 0.8286349222202801, "grad_norm": 0.09199637919664383, "learning_rate": 5.153821121930461e-06, "loss": 8.5407, "step": 165930 }, { "epoch": 0.8286848610452197, "grad_norm": 0.09677516669034958, "learning_rate": 5.152319206988911e-06, "loss": 8.5306, "step": 165940 }, { "epoch": 0.8287347998701591, "grad_norm": 0.0840715691447258, "learning_rate": 5.1508172920473605e-06, "loss": 8.5462, "step": 165950 }, { "epoch": 0.8287847386950985, "grad_norm": 0.0945180281996727, "learning_rate": 5.14931537710581e-06, "loss": 8.5233, "step": 165960 }, { "epoch": 0.8288346775200379, "grad_norm": 0.09411770850419998, "learning_rate": 5.14781346216426e-06, "loss": 8.5413, "step": 165970 }, { "epoch": 0.8288846163449775, "grad_norm": 0.09039691835641861, "learning_rate": 5.1463115472227085e-06, "loss": 8.5273, "step": 165980 }, { "epoch": 0.8289345551699169, "grad_norm": 0.09304576367139816, "learning_rate": 5.144809632281159e-06, "loss": 8.5407, "step": 165990 }, { "epoch": 0.8289844939948563, "grad_norm": 0.09121018648147583, "learning_rate": 5.143307717339608e-06, "loss": 8.5493, "step": 166000 }, { "epoch": 0.8290344328197957, "grad_norm": 0.0907362774014473, "learning_rate": 5.141805802398057e-06, "loss": 8.5375, "step": 166010 }, { "epoch": 0.8290843716447353, "grad_norm": 0.0894848182797432, "learning_rate": 5.1403038874565075e-06, "loss": 8.5168, "step": 166020 }, { "epoch": 0.8291343104696747, "grad_norm": 0.09260275959968567, "learning_rate": 5.138801972514957e-06, "loss": 8.5411, "step": 166030 }, { "epoch": 0.8291842492946141, "grad_norm": 0.08942524343729019, "learning_rate": 5.137300057573406e-06, "loss": 8.5425, "step": 166040 }, { "epoch": 0.8292341881195535, "grad_norm": 0.08970021456480026, "learning_rate": 5.1357981426318555e-06, "loss": 8.5597, "step": 166050 }, { "epoch": 0.829284126944493, "grad_norm": 0.09671708196401596, "learning_rate": 5.134296227690305e-06, "loss": 8.5416, "step": 166060 }, { "epoch": 0.8293340657694325, "grad_norm": 0.09718456864356995, "learning_rate": 5.132794312748755e-06, "loss": 8.5404, "step": 166070 }, { "epoch": 0.8293840045943719, "grad_norm": 0.09095334261655807, "learning_rate": 5.131292397807204e-06, "loss": 8.5516, "step": 166080 }, { "epoch": 0.8294339434193113, "grad_norm": 0.09243844449520111, "learning_rate": 5.129790482865654e-06, "loss": 8.5487, "step": 166090 }, { "epoch": 0.8294838822442508, "grad_norm": 0.09933464974164963, "learning_rate": 5.128288567924104e-06, "loss": 8.5089, "step": 166100 }, { "epoch": 0.8295338210691903, "grad_norm": 0.09080641716718674, "learning_rate": 5.126786652982553e-06, "loss": 8.5346, "step": 166110 }, { "epoch": 0.8295837598941297, "grad_norm": 0.09354326874017715, "learning_rate": 5.1252847380410025e-06, "loss": 8.5647, "step": 166120 }, { "epoch": 0.8296336987190691, "grad_norm": 0.08734377473592758, "learning_rate": 5.123782823099452e-06, "loss": 8.544, "step": 166130 }, { "epoch": 0.8296836375440085, "grad_norm": 0.09000393003225327, "learning_rate": 5.122280908157901e-06, "loss": 8.532, "step": 166140 }, { "epoch": 0.8297335763689481, "grad_norm": 0.08724624663591385, "learning_rate": 5.120778993216351e-06, "loss": 8.5537, "step": 166150 }, { "epoch": 0.8297835151938875, "grad_norm": 0.09009451419115067, "learning_rate": 5.119277078274801e-06, "loss": 8.5666, "step": 166160 }, { "epoch": 0.8298334540188269, "grad_norm": 0.08647624403238297, "learning_rate": 5.11777516333325e-06, "loss": 8.5261, "step": 166170 }, { "epoch": 0.8298833928437663, "grad_norm": 0.09104521572589874, "learning_rate": 5.116273248391699e-06, "loss": 8.5332, "step": 166180 }, { "epoch": 0.8299333316687059, "grad_norm": 0.0890510231256485, "learning_rate": 5.1147713334501495e-06, "loss": 8.5533, "step": 166190 }, { "epoch": 0.8299832704936453, "grad_norm": 0.10203186422586441, "learning_rate": 5.113269418508599e-06, "loss": 8.5449, "step": 166200 }, { "epoch": 0.8300332093185847, "grad_norm": 0.08849181234836578, "learning_rate": 5.111767503567048e-06, "loss": 8.5472, "step": 166210 }, { "epoch": 0.8300831481435241, "grad_norm": 0.08866935223340988, "learning_rate": 5.1102655886254975e-06, "loss": 8.5432, "step": 166220 }, { "epoch": 0.8301330869684637, "grad_norm": 0.08821461349725723, "learning_rate": 5.108763673683947e-06, "loss": 8.5514, "step": 166230 }, { "epoch": 0.8301830257934031, "grad_norm": 0.09653317928314209, "learning_rate": 5.107261758742397e-06, "loss": 8.5564, "step": 166240 }, { "epoch": 0.8302329646183425, "grad_norm": 0.09071271866559982, "learning_rate": 5.105759843800846e-06, "loss": 8.5254, "step": 166250 }, { "epoch": 0.8302829034432819, "grad_norm": 0.09026788175106049, "learning_rate": 5.104257928859296e-06, "loss": 8.5428, "step": 166260 }, { "epoch": 0.8303328422682215, "grad_norm": 0.08606173098087311, "learning_rate": 5.102756013917746e-06, "loss": 8.5425, "step": 166270 }, { "epoch": 0.8303827810931609, "grad_norm": 0.09267769753932953, "learning_rate": 5.101254098976194e-06, "loss": 8.5487, "step": 166280 }, { "epoch": 0.8304327199181003, "grad_norm": 0.08565813302993774, "learning_rate": 5.0997521840346445e-06, "loss": 8.5572, "step": 166290 }, { "epoch": 0.8304826587430397, "grad_norm": 0.09123985469341278, "learning_rate": 5.098250269093094e-06, "loss": 8.5305, "step": 166300 }, { "epoch": 0.8305325975679793, "grad_norm": 0.08855421096086502, "learning_rate": 5.096748354151543e-06, "loss": 8.5374, "step": 166310 }, { "epoch": 0.8305825363929187, "grad_norm": 0.09398633986711502, "learning_rate": 5.095246439209993e-06, "loss": 8.5517, "step": 166320 }, { "epoch": 0.8306324752178581, "grad_norm": 0.08958692103624344, "learning_rate": 5.093744524268442e-06, "loss": 8.5346, "step": 166330 }, { "epoch": 0.8306824140427975, "grad_norm": 0.09231898933649063, "learning_rate": 5.092242609326892e-06, "loss": 8.5458, "step": 166340 }, { "epoch": 0.8307323528677371, "grad_norm": 0.09124819934368134, "learning_rate": 5.090740694385342e-06, "loss": 8.5355, "step": 166350 }, { "epoch": 0.8307822916926765, "grad_norm": 0.09533385187387466, "learning_rate": 5.089238779443791e-06, "loss": 8.5478, "step": 166360 }, { "epoch": 0.8308322305176159, "grad_norm": 0.0879926010966301, "learning_rate": 5.087736864502241e-06, "loss": 8.5447, "step": 166370 }, { "epoch": 0.8308821693425553, "grad_norm": 0.09048743546009064, "learning_rate": 5.086234949560689e-06, "loss": 8.5385, "step": 166380 }, { "epoch": 0.8309321081674949, "grad_norm": 0.09626738727092743, "learning_rate": 5.0847330346191396e-06, "loss": 8.5422, "step": 166390 }, { "epoch": 0.8309820469924343, "grad_norm": 0.09046705067157745, "learning_rate": 5.08323111967759e-06, "loss": 8.5361, "step": 166400 }, { "epoch": 0.8310319858173737, "grad_norm": 0.0912972241640091, "learning_rate": 5.081729204736038e-06, "loss": 8.5421, "step": 166410 }, { "epoch": 0.8310819246423131, "grad_norm": 0.08912695944309235, "learning_rate": 5.080227289794488e-06, "loss": 8.5457, "step": 166420 }, { "epoch": 0.8311318634672527, "grad_norm": 0.09452259540557861, "learning_rate": 5.078725374852938e-06, "loss": 8.5539, "step": 166430 }, { "epoch": 0.8311818022921921, "grad_norm": 0.0892590880393982, "learning_rate": 5.077223459911387e-06, "loss": 8.5343, "step": 166440 }, { "epoch": 0.8312317411171315, "grad_norm": 0.09252618998289108, "learning_rate": 5.075721544969837e-06, "loss": 8.5364, "step": 166450 }, { "epoch": 0.8312816799420709, "grad_norm": 0.0892346054315567, "learning_rate": 5.074219630028286e-06, "loss": 8.5372, "step": 166460 }, { "epoch": 0.8313316187670104, "grad_norm": 0.08983513712882996, "learning_rate": 5.072717715086736e-06, "loss": 8.5547, "step": 166470 }, { "epoch": 0.8313815575919499, "grad_norm": 0.09154525399208069, "learning_rate": 5.071215800145185e-06, "loss": 8.5316, "step": 166480 }, { "epoch": 0.8314314964168893, "grad_norm": 0.09328502416610718, "learning_rate": 5.0697138852036346e-06, "loss": 8.5355, "step": 166490 }, { "epoch": 0.8314814352418287, "grad_norm": 0.09083747118711472, "learning_rate": 5.068211970262085e-06, "loss": 8.5346, "step": 166500 }, { "epoch": 0.8315313740667682, "grad_norm": 0.09179224073886871, "learning_rate": 5.066710055320534e-06, "loss": 8.5448, "step": 166510 }, { "epoch": 0.8315813128917077, "grad_norm": 0.08915836364030838, "learning_rate": 5.065208140378983e-06, "loss": 8.5404, "step": 166520 }, { "epoch": 0.8316312517166471, "grad_norm": 0.08934465050697327, "learning_rate": 5.063706225437433e-06, "loss": 8.5365, "step": 166530 }, { "epoch": 0.8316811905415865, "grad_norm": 0.09658721089363098, "learning_rate": 5.062204310495882e-06, "loss": 8.5354, "step": 166540 }, { "epoch": 0.831731129366526, "grad_norm": 0.09819358587265015, "learning_rate": 5.060702395554332e-06, "loss": 8.5305, "step": 166550 }, { "epoch": 0.8317810681914655, "grad_norm": 0.0890253409743309, "learning_rate": 5.0592004806127816e-06, "loss": 8.5532, "step": 166560 }, { "epoch": 0.8318310070164049, "grad_norm": 0.09140022099018097, "learning_rate": 5.057698565671231e-06, "loss": 8.5178, "step": 166570 }, { "epoch": 0.8318809458413443, "grad_norm": 0.0952766090631485, "learning_rate": 5.05619665072968e-06, "loss": 8.5309, "step": 166580 }, { "epoch": 0.8319308846662838, "grad_norm": 0.08961894363164902, "learning_rate": 5.05469473578813e-06, "loss": 8.5458, "step": 166590 }, { "epoch": 0.8319808234912233, "grad_norm": 0.08949257433414459, "learning_rate": 5.05319282084658e-06, "loss": 8.5408, "step": 166600 }, { "epoch": 0.8320307623161627, "grad_norm": 0.09538450837135315, "learning_rate": 5.051690905905029e-06, "loss": 8.5492, "step": 166610 }, { "epoch": 0.8320807011411021, "grad_norm": 0.0917927548289299, "learning_rate": 5.050188990963478e-06, "loss": 8.5572, "step": 166620 }, { "epoch": 0.8321306399660416, "grad_norm": 0.09348586946725845, "learning_rate": 5.048687076021928e-06, "loss": 8.5529, "step": 166630 }, { "epoch": 0.8321805787909811, "grad_norm": 0.09373587369918823, "learning_rate": 5.047185161080378e-06, "loss": 8.527, "step": 166640 }, { "epoch": 0.8322305176159205, "grad_norm": 0.08720920234918594, "learning_rate": 5.045683246138827e-06, "loss": 8.5178, "step": 166650 }, { "epoch": 0.8322804564408599, "grad_norm": 0.08963396400213242, "learning_rate": 5.044181331197277e-06, "loss": 8.533, "step": 166660 }, { "epoch": 0.8323303952657994, "grad_norm": 0.0868087187409401, "learning_rate": 5.042679416255727e-06, "loss": 8.5402, "step": 166670 }, { "epoch": 0.8323803340907389, "grad_norm": 0.09393750131130219, "learning_rate": 5.041177501314175e-06, "loss": 8.5407, "step": 166680 }, { "epoch": 0.8324302729156783, "grad_norm": 0.09192768484354019, "learning_rate": 5.039675586372625e-06, "loss": 8.5346, "step": 166690 }, { "epoch": 0.8324802117406177, "grad_norm": 0.10022985190153122, "learning_rate": 5.038173671431075e-06, "loss": 8.5368, "step": 166700 }, { "epoch": 0.8325301505655572, "grad_norm": 0.0937315970659256, "learning_rate": 5.036671756489524e-06, "loss": 8.5186, "step": 166710 }, { "epoch": 0.8325800893904967, "grad_norm": 0.09312406182289124, "learning_rate": 5.035169841547974e-06, "loss": 8.5229, "step": 166720 }, { "epoch": 0.8326300282154361, "grad_norm": 0.08995606750249863, "learning_rate": 5.033667926606423e-06, "loss": 8.5457, "step": 166730 }, { "epoch": 0.8326799670403755, "grad_norm": 0.09093338251113892, "learning_rate": 5.032166011664873e-06, "loss": 8.5067, "step": 166740 }, { "epoch": 0.832729905865315, "grad_norm": 0.0915585607290268, "learning_rate": 5.030664096723322e-06, "loss": 8.5639, "step": 166750 }, { "epoch": 0.8327798446902545, "grad_norm": 0.09234713017940521, "learning_rate": 5.029162181781772e-06, "loss": 8.5346, "step": 166760 }, { "epoch": 0.8328297835151939, "grad_norm": 0.08565306663513184, "learning_rate": 5.027660266840222e-06, "loss": 8.541, "step": 166770 }, { "epoch": 0.8328797223401333, "grad_norm": 0.08641324937343597, "learning_rate": 5.02615835189867e-06, "loss": 8.5491, "step": 166780 }, { "epoch": 0.8329296611650728, "grad_norm": 0.09579795598983765, "learning_rate": 5.0246564369571204e-06, "loss": 8.549, "step": 166790 }, { "epoch": 0.8329795999900123, "grad_norm": 0.09320466220378876, "learning_rate": 5.023154522015571e-06, "loss": 8.5598, "step": 166800 }, { "epoch": 0.8330295388149517, "grad_norm": 0.09680447727441788, "learning_rate": 5.021652607074019e-06, "loss": 8.5379, "step": 166810 }, { "epoch": 0.8330794776398911, "grad_norm": 0.08433427661657333, "learning_rate": 5.020150692132469e-06, "loss": 8.5581, "step": 166820 }, { "epoch": 0.8331294164648306, "grad_norm": 0.09262140095233917, "learning_rate": 5.018648777190918e-06, "loss": 8.524, "step": 166830 }, { "epoch": 0.83317935528977, "grad_norm": 0.09593355655670166, "learning_rate": 5.017146862249368e-06, "loss": 8.5378, "step": 166840 }, { "epoch": 0.8332292941147095, "grad_norm": 0.09165123105049133, "learning_rate": 5.015644947307818e-06, "loss": 8.54, "step": 166850 }, { "epoch": 0.8332792329396489, "grad_norm": 0.09254296123981476, "learning_rate": 5.014143032366267e-06, "loss": 8.5392, "step": 166860 }, { "epoch": 0.8333291717645884, "grad_norm": 0.0935739278793335, "learning_rate": 5.012641117424717e-06, "loss": 8.5476, "step": 166870 }, { "epoch": 0.8333791105895278, "grad_norm": 0.09110825508832932, "learning_rate": 5.011139202483166e-06, "loss": 8.5356, "step": 166880 }, { "epoch": 0.8334290494144673, "grad_norm": 0.09004233777523041, "learning_rate": 5.0096372875416154e-06, "loss": 8.5363, "step": 166890 }, { "epoch": 0.8334789882394067, "grad_norm": 0.08766531944274902, "learning_rate": 5.008135372600066e-06, "loss": 8.5418, "step": 166900 }, { "epoch": 0.8335289270643462, "grad_norm": 0.09655581414699554, "learning_rate": 5.006633457658514e-06, "loss": 8.5334, "step": 166910 }, { "epoch": 0.8335788658892856, "grad_norm": 0.08657383173704147, "learning_rate": 5.005131542716964e-06, "loss": 8.5564, "step": 166920 }, { "epoch": 0.8336288047142251, "grad_norm": 0.09356348216533661, "learning_rate": 5.003629627775414e-06, "loss": 8.5438, "step": 166930 }, { "epoch": 0.8336787435391645, "grad_norm": 0.0967157855629921, "learning_rate": 5.002127712833863e-06, "loss": 8.5556, "step": 166940 }, { "epoch": 0.833728682364104, "grad_norm": 0.09090452641248703, "learning_rate": 5.000625797892313e-06, "loss": 8.5707, "step": 166950 }, { "epoch": 0.8337786211890434, "grad_norm": 0.09209217876195908, "learning_rate": 4.9991238829507624e-06, "loss": 8.5592, "step": 166960 }, { "epoch": 0.8338285600139829, "grad_norm": 0.09082140028476715, "learning_rate": 4.997621968009212e-06, "loss": 8.5343, "step": 166970 }, { "epoch": 0.8338784988389223, "grad_norm": 0.09138956665992737, "learning_rate": 4.996120053067661e-06, "loss": 8.5338, "step": 166980 }, { "epoch": 0.8339284376638618, "grad_norm": 0.09424415230751038, "learning_rate": 4.9946181381261104e-06, "loss": 8.5322, "step": 166990 }, { "epoch": 0.8339783764888012, "grad_norm": 0.09288877248764038, "learning_rate": 4.993116223184561e-06, "loss": 8.5364, "step": 167000 }, { "epoch": 0.8340283153137407, "grad_norm": 0.08676401525735855, "learning_rate": 4.99161430824301e-06, "loss": 8.5354, "step": 167010 }, { "epoch": 0.8340782541386801, "grad_norm": 0.08432342857122421, "learning_rate": 4.990112393301459e-06, "loss": 8.5373, "step": 167020 }, { "epoch": 0.8341281929636196, "grad_norm": 0.09226579964160919, "learning_rate": 4.988610478359909e-06, "loss": 8.5151, "step": 167030 }, { "epoch": 0.834178131788559, "grad_norm": 0.0887642651796341, "learning_rate": 4.987108563418359e-06, "loss": 8.5471, "step": 167040 }, { "epoch": 0.8342280706134985, "grad_norm": 0.0907331258058548, "learning_rate": 4.985606648476808e-06, "loss": 8.5403, "step": 167050 }, { "epoch": 0.8342780094384379, "grad_norm": 0.08834332972764969, "learning_rate": 4.9841047335352575e-06, "loss": 8.5228, "step": 167060 }, { "epoch": 0.8343279482633774, "grad_norm": 0.08750323951244354, "learning_rate": 4.982602818593707e-06, "loss": 8.5424, "step": 167070 }, { "epoch": 0.8343778870883168, "grad_norm": 0.08858982473611832, "learning_rate": 4.981100903652156e-06, "loss": 8.5551, "step": 167080 }, { "epoch": 0.8344278259132563, "grad_norm": 0.09022153168916702, "learning_rate": 4.979598988710606e-06, "loss": 8.5344, "step": 167090 }, { "epoch": 0.8344777647381957, "grad_norm": 0.0896066427230835, "learning_rate": 4.978097073769056e-06, "loss": 8.5426, "step": 167100 }, { "epoch": 0.8345277035631352, "grad_norm": 0.09046115726232529, "learning_rate": 4.976595158827505e-06, "loss": 8.545, "step": 167110 }, { "epoch": 0.8345776423880746, "grad_norm": 0.08858959376811981, "learning_rate": 4.975093243885955e-06, "loss": 8.5482, "step": 167120 }, { "epoch": 0.8346275812130141, "grad_norm": 0.08675581961870193, "learning_rate": 4.9735913289444045e-06, "loss": 8.5382, "step": 167130 }, { "epoch": 0.8346775200379535, "grad_norm": 0.08715889602899551, "learning_rate": 4.972089414002854e-06, "loss": 8.5394, "step": 167140 }, { "epoch": 0.8347274588628929, "grad_norm": 0.0885719358921051, "learning_rate": 4.970587499061303e-06, "loss": 8.5276, "step": 167150 }, { "epoch": 0.8347773976878324, "grad_norm": 0.09617254137992859, "learning_rate": 4.9690855841197525e-06, "loss": 8.5369, "step": 167160 }, { "epoch": 0.8348273365127719, "grad_norm": 0.09246990084648132, "learning_rate": 4.967583669178203e-06, "loss": 8.5449, "step": 167170 }, { "epoch": 0.8348772753377113, "grad_norm": 0.08657874912023544, "learning_rate": 4.966081754236652e-06, "loss": 8.5495, "step": 167180 }, { "epoch": 0.8349272141626507, "grad_norm": 0.08776073902845383, "learning_rate": 4.964579839295101e-06, "loss": 8.5435, "step": 167190 }, { "epoch": 0.8349771529875902, "grad_norm": 0.08979029953479767, "learning_rate": 4.9630779243535515e-06, "loss": 8.5452, "step": 167200 }, { "epoch": 0.8350270918125297, "grad_norm": 0.09311855584383011, "learning_rate": 4.961576009412e-06, "loss": 8.5497, "step": 167210 }, { "epoch": 0.8350770306374691, "grad_norm": 0.09252220392227173, "learning_rate": 4.96007409447045e-06, "loss": 8.5304, "step": 167220 }, { "epoch": 0.8351269694624085, "grad_norm": 0.08825825899839401, "learning_rate": 4.9585721795288995e-06, "loss": 8.5425, "step": 167230 }, { "epoch": 0.835176908287348, "grad_norm": 0.09545952081680298, "learning_rate": 4.957070264587349e-06, "loss": 8.5281, "step": 167240 }, { "epoch": 0.8352268471122875, "grad_norm": 0.09352894127368927, "learning_rate": 4.955568349645799e-06, "loss": 8.5386, "step": 167250 }, { "epoch": 0.8352767859372269, "grad_norm": 0.09909328073263168, "learning_rate": 4.9540664347042475e-06, "loss": 8.537, "step": 167260 }, { "epoch": 0.8353267247621663, "grad_norm": 0.09386103600263596, "learning_rate": 4.952564519762698e-06, "loss": 8.545, "step": 167270 }, { "epoch": 0.8353766635871058, "grad_norm": 0.08503425866365433, "learning_rate": 4.951062604821148e-06, "loss": 8.5802, "step": 167280 }, { "epoch": 0.8354266024120452, "grad_norm": 0.09073678404092789, "learning_rate": 4.949560689879596e-06, "loss": 8.5313, "step": 167290 }, { "epoch": 0.8354765412369847, "grad_norm": 0.09098771214485168, "learning_rate": 4.9480587749380465e-06, "loss": 8.5403, "step": 167300 }, { "epoch": 0.8355264800619241, "grad_norm": 0.09336119145154953, "learning_rate": 4.946556859996495e-06, "loss": 8.5428, "step": 167310 }, { "epoch": 0.8355764188868636, "grad_norm": 0.08903469145298004, "learning_rate": 4.945054945054945e-06, "loss": 8.5508, "step": 167320 }, { "epoch": 0.835626357711803, "grad_norm": 0.08720388263463974, "learning_rate": 4.943553030113395e-06, "loss": 8.54, "step": 167330 }, { "epoch": 0.8356762965367425, "grad_norm": 0.09315963834524155, "learning_rate": 4.942051115171844e-06, "loss": 8.549, "step": 167340 }, { "epoch": 0.8357262353616819, "grad_norm": 0.08953353762626648, "learning_rate": 4.940549200230294e-06, "loss": 8.5557, "step": 167350 }, { "epoch": 0.8357761741866214, "grad_norm": 0.0915287509560585, "learning_rate": 4.939047285288743e-06, "loss": 8.5594, "step": 167360 }, { "epoch": 0.8358261130115608, "grad_norm": 0.09571286290884018, "learning_rate": 4.937545370347193e-06, "loss": 8.541, "step": 167370 }, { "epoch": 0.8358760518365003, "grad_norm": 0.08816110342741013, "learning_rate": 4.936043455405643e-06, "loss": 8.5397, "step": 167380 }, { "epoch": 0.8359259906614397, "grad_norm": 0.08931413292884827, "learning_rate": 4.934541540464091e-06, "loss": 8.5379, "step": 167390 }, { "epoch": 0.8359759294863792, "grad_norm": 0.09298036247491837, "learning_rate": 4.9330396255225415e-06, "loss": 8.532, "step": 167400 }, { "epoch": 0.8360258683113186, "grad_norm": 0.08843599259853363, "learning_rate": 4.931537710580991e-06, "loss": 8.539, "step": 167410 }, { "epoch": 0.8360758071362581, "grad_norm": 0.0943143293261528, "learning_rate": 4.93003579563944e-06, "loss": 8.5463, "step": 167420 }, { "epoch": 0.8361257459611975, "grad_norm": 0.08989337831735611, "learning_rate": 4.92853388069789e-06, "loss": 8.5562, "step": 167430 }, { "epoch": 0.836175684786137, "grad_norm": 0.09261681884527206, "learning_rate": 4.92703196575634e-06, "loss": 8.5383, "step": 167440 }, { "epoch": 0.8362256236110764, "grad_norm": 0.088409423828125, "learning_rate": 4.925530050814789e-06, "loss": 8.5393, "step": 167450 }, { "epoch": 0.8362755624360159, "grad_norm": 0.0936957448720932, "learning_rate": 4.924028135873238e-06, "loss": 8.5375, "step": 167460 }, { "epoch": 0.8363255012609553, "grad_norm": 0.08874178677797318, "learning_rate": 4.922526220931688e-06, "loss": 8.5396, "step": 167470 }, { "epoch": 0.8363754400858948, "grad_norm": 0.09026958048343658, "learning_rate": 4.921024305990138e-06, "loss": 8.5348, "step": 167480 }, { "epoch": 0.8364253789108342, "grad_norm": 0.09545276314020157, "learning_rate": 4.919522391048587e-06, "loss": 8.5391, "step": 167490 }, { "epoch": 0.8364753177357737, "grad_norm": 0.08839011937379837, "learning_rate": 4.9180204761070365e-06, "loss": 8.5282, "step": 167500 }, { "epoch": 0.8365252565607131, "grad_norm": 0.08772112429141998, "learning_rate": 4.916518561165486e-06, "loss": 8.5369, "step": 167510 }, { "epoch": 0.8365751953856526, "grad_norm": 0.09278785437345505, "learning_rate": 4.915016646223936e-06, "loss": 8.5288, "step": 167520 }, { "epoch": 0.836625134210592, "grad_norm": 0.09600167721509933, "learning_rate": 4.913514731282385e-06, "loss": 8.5258, "step": 167530 }, { "epoch": 0.8366750730355315, "grad_norm": 0.08788032084703445, "learning_rate": 4.912012816340835e-06, "loss": 8.5342, "step": 167540 }, { "epoch": 0.8367250118604709, "grad_norm": 0.08811583369970322, "learning_rate": 4.910510901399284e-06, "loss": 8.5487, "step": 167550 }, { "epoch": 0.8367749506854104, "grad_norm": 0.08983556926250458, "learning_rate": 4.909008986457733e-06, "loss": 8.5451, "step": 167560 }, { "epoch": 0.8368248895103498, "grad_norm": 0.08975329995155334, "learning_rate": 4.9075070715161835e-06, "loss": 8.5408, "step": 167570 }, { "epoch": 0.8368748283352893, "grad_norm": 0.09413887560367584, "learning_rate": 4.906005156574633e-06, "loss": 8.5396, "step": 167580 }, { "epoch": 0.8369247671602287, "grad_norm": 0.09020008891820908, "learning_rate": 4.904503241633082e-06, "loss": 8.5279, "step": 167590 }, { "epoch": 0.8369747059851682, "grad_norm": 0.09177325665950775, "learning_rate": 4.903001326691532e-06, "loss": 8.5333, "step": 167600 }, { "epoch": 0.8370246448101076, "grad_norm": 0.09214575588703156, "learning_rate": 4.901499411749981e-06, "loss": 8.5326, "step": 167610 }, { "epoch": 0.837074583635047, "grad_norm": 0.09108281135559082, "learning_rate": 4.899997496808431e-06, "loss": 8.5556, "step": 167620 }, { "epoch": 0.8371245224599865, "grad_norm": 0.09346097707748413, "learning_rate": 4.89849558186688e-06, "loss": 8.5301, "step": 167630 }, { "epoch": 0.837174461284926, "grad_norm": 0.09483732283115387, "learning_rate": 4.89699366692533e-06, "loss": 8.5402, "step": 167640 }, { "epoch": 0.8372244001098654, "grad_norm": 0.09763989597558975, "learning_rate": 4.89549175198378e-06, "loss": 8.5482, "step": 167650 }, { "epoch": 0.8372743389348049, "grad_norm": 0.09391061961650848, "learning_rate": 4.893989837042228e-06, "loss": 8.5302, "step": 167660 }, { "epoch": 0.8373242777597443, "grad_norm": 0.09133297950029373, "learning_rate": 4.8924879221006785e-06, "loss": 8.5292, "step": 167670 }, { "epoch": 0.8373742165846838, "grad_norm": 0.09636905044317245, "learning_rate": 4.890986007159129e-06, "loss": 8.5271, "step": 167680 }, { "epoch": 0.8374241554096232, "grad_norm": 0.09621647000312805, "learning_rate": 4.889484092217577e-06, "loss": 8.5309, "step": 167690 }, { "epoch": 0.8374740942345626, "grad_norm": 0.09955976903438568, "learning_rate": 4.887982177276027e-06, "loss": 8.5377, "step": 167700 }, { "epoch": 0.8375240330595021, "grad_norm": 0.08797106146812439, "learning_rate": 4.886480262334476e-06, "loss": 8.5314, "step": 167710 }, { "epoch": 0.8375739718844416, "grad_norm": 0.08975420147180557, "learning_rate": 4.884978347392926e-06, "loss": 8.5436, "step": 167720 }, { "epoch": 0.837623910709381, "grad_norm": 0.09256165474653244, "learning_rate": 4.883476432451376e-06, "loss": 8.5286, "step": 167730 }, { "epoch": 0.8376738495343204, "grad_norm": 0.09067859500646591, "learning_rate": 4.881974517509825e-06, "loss": 8.5291, "step": 167740 }, { "epoch": 0.8377237883592599, "grad_norm": 0.08854568004608154, "learning_rate": 4.880472602568275e-06, "loss": 8.5603, "step": 167750 }, { "epoch": 0.8377737271841994, "grad_norm": 0.09424934536218643, "learning_rate": 4.878970687626724e-06, "loss": 8.544, "step": 167760 }, { "epoch": 0.8378236660091388, "grad_norm": 0.08619468659162521, "learning_rate": 4.8774687726851735e-06, "loss": 8.5323, "step": 167770 }, { "epoch": 0.8378736048340782, "grad_norm": 0.09160079061985016, "learning_rate": 4.875966857743624e-06, "loss": 8.5439, "step": 167780 }, { "epoch": 0.8379235436590177, "grad_norm": 0.09085732698440552, "learning_rate": 4.874464942802072e-06, "loss": 8.54, "step": 167790 }, { "epoch": 0.8379734824839572, "grad_norm": 0.08762364834547043, "learning_rate": 4.872963027860522e-06, "loss": 8.531, "step": 167800 }, { "epoch": 0.8380234213088966, "grad_norm": 0.09410136938095093, "learning_rate": 4.871461112918972e-06, "loss": 8.5436, "step": 167810 }, { "epoch": 0.838073360133836, "grad_norm": 0.0921444445848465, "learning_rate": 4.869959197977421e-06, "loss": 8.5332, "step": 167820 }, { "epoch": 0.8381232989587755, "grad_norm": 0.09351623058319092, "learning_rate": 4.868457283035871e-06, "loss": 8.5444, "step": 167830 }, { "epoch": 0.838173237783715, "grad_norm": 0.08999743312597275, "learning_rate": 4.8669553680943205e-06, "loss": 8.5551, "step": 167840 }, { "epoch": 0.8382231766086544, "grad_norm": 0.09446761757135391, "learning_rate": 4.86545345315277e-06, "loss": 8.5314, "step": 167850 }, { "epoch": 0.8382731154335938, "grad_norm": 0.09473822265863419, "learning_rate": 4.863951538211219e-06, "loss": 8.5381, "step": 167860 }, { "epoch": 0.8383230542585333, "grad_norm": 0.09794218093156815, "learning_rate": 4.8624496232696685e-06, "loss": 8.5321, "step": 167870 }, { "epoch": 0.8383729930834728, "grad_norm": 0.09353634715080261, "learning_rate": 4.860947708328119e-06, "loss": 8.5426, "step": 167880 }, { "epoch": 0.8384229319084122, "grad_norm": 0.0915239006280899, "learning_rate": 4.859445793386568e-06, "loss": 8.5361, "step": 167890 }, { "epoch": 0.8384728707333516, "grad_norm": 0.09556429833173752, "learning_rate": 4.857943878445017e-06, "loss": 8.5238, "step": 167900 }, { "epoch": 0.8385228095582911, "grad_norm": 0.09241461008787155, "learning_rate": 4.856441963503467e-06, "loss": 8.5465, "step": 167910 }, { "epoch": 0.8385727483832306, "grad_norm": 0.09638688713312149, "learning_rate": 4.854940048561917e-06, "loss": 8.5311, "step": 167920 }, { "epoch": 0.83862268720817, "grad_norm": 0.09017990529537201, "learning_rate": 4.853438133620366e-06, "loss": 8.5524, "step": 167930 }, { "epoch": 0.8386726260331094, "grad_norm": 0.0920003205537796, "learning_rate": 4.8519362186788156e-06, "loss": 8.5257, "step": 167940 }, { "epoch": 0.8387225648580489, "grad_norm": 0.09186447411775589, "learning_rate": 4.850434303737265e-06, "loss": 8.5343, "step": 167950 }, { "epoch": 0.8387725036829884, "grad_norm": 0.09250812232494354, "learning_rate": 4.848932388795714e-06, "loss": 8.5416, "step": 167960 }, { "epoch": 0.8388224425079278, "grad_norm": 0.096126988530159, "learning_rate": 4.847430473854164e-06, "loss": 8.5307, "step": 167970 }, { "epoch": 0.8388723813328672, "grad_norm": 0.09085419774055481, "learning_rate": 4.845928558912614e-06, "loss": 8.5487, "step": 167980 }, { "epoch": 0.8389223201578067, "grad_norm": 0.09151645004749298, "learning_rate": 4.844426643971063e-06, "loss": 8.5358, "step": 167990 }, { "epoch": 0.8389722589827462, "grad_norm": 0.09600605070590973, "learning_rate": 4.842924729029513e-06, "loss": 8.5415, "step": 168000 }, { "epoch": 0.8390221978076856, "grad_norm": 0.08343446999788284, "learning_rate": 4.841422814087962e-06, "loss": 8.5486, "step": 168010 }, { "epoch": 0.839072136632625, "grad_norm": 0.0970931351184845, "learning_rate": 4.839920899146412e-06, "loss": 8.5497, "step": 168020 }, { "epoch": 0.8391220754575645, "grad_norm": 0.09128961712121964, "learning_rate": 4.838418984204861e-06, "loss": 8.5345, "step": 168030 }, { "epoch": 0.839172014282504, "grad_norm": 0.08671637624502182, "learning_rate": 4.8369170692633106e-06, "loss": 8.5515, "step": 168040 }, { "epoch": 0.8392219531074434, "grad_norm": 0.09237731248140335, "learning_rate": 4.835415154321761e-06, "loss": 8.4984, "step": 168050 }, { "epoch": 0.8392718919323828, "grad_norm": 0.08762998133897781, "learning_rate": 4.833913239380209e-06, "loss": 8.5373, "step": 168060 }, { "epoch": 0.8393218307573223, "grad_norm": 0.08894840627908707, "learning_rate": 4.832411324438659e-06, "loss": 8.5283, "step": 168070 }, { "epoch": 0.8393717695822618, "grad_norm": 0.0915420651435852, "learning_rate": 4.83090940949711e-06, "loss": 8.5282, "step": 168080 }, { "epoch": 0.8394217084072012, "grad_norm": 0.09219279140233994, "learning_rate": 4.829407494555558e-06, "loss": 8.5298, "step": 168090 }, { "epoch": 0.8394716472321406, "grad_norm": 0.09041428565979004, "learning_rate": 4.827905579614008e-06, "loss": 8.5258, "step": 168100 }, { "epoch": 0.83952158605708, "grad_norm": 0.08865369856357574, "learning_rate": 4.826403664672457e-06, "loss": 8.5421, "step": 168110 }, { "epoch": 0.8395715248820195, "grad_norm": 0.09181597828865051, "learning_rate": 4.824901749730907e-06, "loss": 8.5309, "step": 168120 }, { "epoch": 0.839621463706959, "grad_norm": 0.0943143367767334, "learning_rate": 4.823399834789357e-06, "loss": 8.5376, "step": 168130 }, { "epoch": 0.8396714025318984, "grad_norm": 0.0917147845029831, "learning_rate": 4.8218979198478056e-06, "loss": 8.5423, "step": 168140 }, { "epoch": 0.8397213413568378, "grad_norm": 0.09135729819536209, "learning_rate": 4.820396004906256e-06, "loss": 8.5337, "step": 168150 }, { "epoch": 0.8397712801817773, "grad_norm": 0.09105221182107925, "learning_rate": 4.818894089964705e-06, "loss": 8.5277, "step": 168160 }, { "epoch": 0.8398212190067168, "grad_norm": 0.09912960231304169, "learning_rate": 4.817392175023154e-06, "loss": 8.5492, "step": 168170 }, { "epoch": 0.8398711578316562, "grad_norm": 0.08730529993772507, "learning_rate": 4.815890260081605e-06, "loss": 8.5422, "step": 168180 }, { "epoch": 0.8399210966565956, "grad_norm": 0.09402299672365189, "learning_rate": 4.814388345140053e-06, "loss": 8.5343, "step": 168190 }, { "epoch": 0.8399710354815351, "grad_norm": 0.09148437529802322, "learning_rate": 4.812886430198503e-06, "loss": 8.5231, "step": 168200 }, { "epoch": 0.8400209743064746, "grad_norm": 0.08780474960803986, "learning_rate": 4.811384515256953e-06, "loss": 8.5322, "step": 168210 }, { "epoch": 0.840070913131414, "grad_norm": 0.08754914999008179, "learning_rate": 4.809882600315402e-06, "loss": 8.5507, "step": 168220 }, { "epoch": 0.8401208519563534, "grad_norm": 0.08717711269855499, "learning_rate": 4.808380685373852e-06, "loss": 8.5388, "step": 168230 }, { "epoch": 0.8401707907812929, "grad_norm": 0.09865214675664902, "learning_rate": 4.806878770432301e-06, "loss": 8.5198, "step": 168240 }, { "epoch": 0.8402207296062324, "grad_norm": 0.09098966419696808, "learning_rate": 4.805376855490751e-06, "loss": 8.5319, "step": 168250 }, { "epoch": 0.8402706684311718, "grad_norm": 0.08804722875356674, "learning_rate": 4.803874940549201e-06, "loss": 8.5533, "step": 168260 }, { "epoch": 0.8403206072561112, "grad_norm": 0.09023669362068176, "learning_rate": 4.802373025607649e-06, "loss": 8.5487, "step": 168270 }, { "epoch": 0.8403705460810507, "grad_norm": 0.09287792444229126, "learning_rate": 4.8008711106661e-06, "loss": 8.5345, "step": 168280 }, { "epoch": 0.8404204849059902, "grad_norm": 0.09334767609834671, "learning_rate": 4.799369195724549e-06, "loss": 8.5383, "step": 168290 }, { "epoch": 0.8404704237309296, "grad_norm": 0.09085261076688766, "learning_rate": 4.797867280782998e-06, "loss": 8.5452, "step": 168300 }, { "epoch": 0.840520362555869, "grad_norm": 0.08985559642314911, "learning_rate": 4.7963653658414484e-06, "loss": 8.5287, "step": 168310 }, { "epoch": 0.8405703013808085, "grad_norm": 0.09192880243062973, "learning_rate": 4.794863450899898e-06, "loss": 8.5477, "step": 168320 }, { "epoch": 0.840620240205748, "grad_norm": 0.08942985534667969, "learning_rate": 4.793361535958347e-06, "loss": 8.5286, "step": 168330 }, { "epoch": 0.8406701790306874, "grad_norm": 0.09409672021865845, "learning_rate": 4.7918596210167964e-06, "loss": 8.5213, "step": 168340 }, { "epoch": 0.8407201178556268, "grad_norm": 0.0878412127494812, "learning_rate": 4.790357706075246e-06, "loss": 8.5445, "step": 168350 }, { "epoch": 0.8407700566805663, "grad_norm": 0.09042507410049438, "learning_rate": 4.788855791133696e-06, "loss": 8.5327, "step": 168360 }, { "epoch": 0.8408199955055058, "grad_norm": 0.09061606973409653, "learning_rate": 4.787353876192145e-06, "loss": 8.5255, "step": 168370 }, { "epoch": 0.8408699343304452, "grad_norm": 0.08747304230928421, "learning_rate": 4.785851961250595e-06, "loss": 8.5308, "step": 168380 }, { "epoch": 0.8409198731553846, "grad_norm": 0.08752010762691498, "learning_rate": 4.784350046309044e-06, "loss": 8.5585, "step": 168390 }, { "epoch": 0.8409698119803241, "grad_norm": 0.09220509976148605, "learning_rate": 4.782848131367494e-06, "loss": 8.5299, "step": 168400 }, { "epoch": 0.8410197508052636, "grad_norm": 0.09601190686225891, "learning_rate": 4.7813462164259434e-06, "loss": 8.5213, "step": 168410 }, { "epoch": 0.841069689630203, "grad_norm": 0.09144755452871323, "learning_rate": 4.779844301484393e-06, "loss": 8.5377, "step": 168420 }, { "epoch": 0.8411196284551424, "grad_norm": 0.09117061644792557, "learning_rate": 4.778342386542842e-06, "loss": 8.5383, "step": 168430 }, { "epoch": 0.8411695672800819, "grad_norm": 0.0880817323923111, "learning_rate": 4.7768404716012914e-06, "loss": 8.548, "step": 168440 }, { "epoch": 0.8412195061050214, "grad_norm": 0.09438186883926392, "learning_rate": 4.775338556659742e-06, "loss": 8.5538, "step": 168450 }, { "epoch": 0.8412694449299608, "grad_norm": 0.09241968393325806, "learning_rate": 4.773836641718191e-06, "loss": 8.5313, "step": 168460 }, { "epoch": 0.8413193837549002, "grad_norm": 0.09603623300790787, "learning_rate": 4.77233472677664e-06, "loss": 8.5278, "step": 168470 }, { "epoch": 0.8413693225798397, "grad_norm": 0.094639852643013, "learning_rate": 4.7708328118350905e-06, "loss": 8.5353, "step": 168480 }, { "epoch": 0.8414192614047792, "grad_norm": 0.09024223685264587, "learning_rate": 4.769330896893539e-06, "loss": 8.5444, "step": 168490 }, { "epoch": 0.8414692002297186, "grad_norm": 0.08873730897903442, "learning_rate": 4.767828981951989e-06, "loss": 8.523, "step": 168500 }, { "epoch": 0.841519139054658, "grad_norm": 0.0888361856341362, "learning_rate": 4.7663270670104384e-06, "loss": 8.5363, "step": 168510 }, { "epoch": 0.8415690778795974, "grad_norm": 0.0935666486620903, "learning_rate": 4.764825152068888e-06, "loss": 8.5435, "step": 168520 }, { "epoch": 0.841619016704537, "grad_norm": 0.0903899073600769, "learning_rate": 4.763323237127338e-06, "loss": 8.5419, "step": 168530 }, { "epoch": 0.8416689555294764, "grad_norm": 0.09109959751367569, "learning_rate": 4.7618213221857864e-06, "loss": 8.5273, "step": 168540 }, { "epoch": 0.8417188943544158, "grad_norm": 0.08860781043767929, "learning_rate": 4.760319407244237e-06, "loss": 8.5301, "step": 168550 }, { "epoch": 0.8417688331793552, "grad_norm": 0.09561283141374588, "learning_rate": 4.758817492302687e-06, "loss": 8.5381, "step": 168560 }, { "epoch": 0.8418187720042948, "grad_norm": 0.09598873555660248, "learning_rate": 4.757315577361135e-06, "loss": 8.5349, "step": 168570 }, { "epoch": 0.8418687108292342, "grad_norm": 0.09074491262435913, "learning_rate": 4.7558136624195855e-06, "loss": 8.5473, "step": 168580 }, { "epoch": 0.8419186496541736, "grad_norm": 0.08866456151008606, "learning_rate": 4.754311747478034e-06, "loss": 8.5383, "step": 168590 }, { "epoch": 0.841968588479113, "grad_norm": 0.09333822876214981, "learning_rate": 4.752809832536484e-06, "loss": 8.5318, "step": 168600 }, { "epoch": 0.8420185273040526, "grad_norm": 0.09061551094055176, "learning_rate": 4.751307917594934e-06, "loss": 8.5349, "step": 168610 }, { "epoch": 0.842068466128992, "grad_norm": 0.09228689223527908, "learning_rate": 4.749806002653383e-06, "loss": 8.5363, "step": 168620 }, { "epoch": 0.8421184049539314, "grad_norm": 0.09009858220815659, "learning_rate": 4.748304087711833e-06, "loss": 8.5303, "step": 168630 }, { "epoch": 0.8421683437788708, "grad_norm": 0.09529682248830795, "learning_rate": 4.746802172770282e-06, "loss": 8.5392, "step": 168640 }, { "epoch": 0.8422182826038104, "grad_norm": 0.09646670520305634, "learning_rate": 4.745300257828732e-06, "loss": 8.5259, "step": 168650 }, { "epoch": 0.8422682214287498, "grad_norm": 0.09157206863164902, "learning_rate": 4.743798342887182e-06, "loss": 8.5305, "step": 168660 }, { "epoch": 0.8423181602536892, "grad_norm": 0.09323584288358688, "learning_rate": 4.74229642794563e-06, "loss": 8.5238, "step": 168670 }, { "epoch": 0.8423680990786286, "grad_norm": 0.08792025595903397, "learning_rate": 4.7407945130040805e-06, "loss": 8.5433, "step": 168680 }, { "epoch": 0.8424180379035682, "grad_norm": 0.08940023183822632, "learning_rate": 4.73929259806253e-06, "loss": 8.5252, "step": 168690 }, { "epoch": 0.8424679767285076, "grad_norm": 0.09656330198049545, "learning_rate": 4.737790683120979e-06, "loss": 8.5101, "step": 168700 }, { "epoch": 0.842517915553447, "grad_norm": 0.09180876612663269, "learning_rate": 4.736288768179429e-06, "loss": 8.5169, "step": 168710 }, { "epoch": 0.8425678543783864, "grad_norm": 0.08640285581350327, "learning_rate": 4.734786853237879e-06, "loss": 8.5429, "step": 168720 }, { "epoch": 0.842617793203326, "grad_norm": 0.0890103355050087, "learning_rate": 4.733284938296328e-06, "loss": 8.5434, "step": 168730 }, { "epoch": 0.8426677320282654, "grad_norm": 0.09099707752466202, "learning_rate": 4.731783023354777e-06, "loss": 8.5422, "step": 168740 }, { "epoch": 0.8427176708532048, "grad_norm": 0.09098941087722778, "learning_rate": 4.730281108413227e-06, "loss": 8.5546, "step": 168750 }, { "epoch": 0.8427676096781442, "grad_norm": 0.09195592254400253, "learning_rate": 4.728779193471677e-06, "loss": 8.5293, "step": 168760 }, { "epoch": 0.8428175485030838, "grad_norm": 0.09096460044384003, "learning_rate": 4.727277278530126e-06, "loss": 8.5446, "step": 168770 }, { "epoch": 0.8428674873280232, "grad_norm": 0.08720100671052933, "learning_rate": 4.7257753635885755e-06, "loss": 8.5341, "step": 168780 }, { "epoch": 0.8429174261529626, "grad_norm": 0.09274513274431229, "learning_rate": 4.724273448647025e-06, "loss": 8.5603, "step": 168790 }, { "epoch": 0.842967364977902, "grad_norm": 0.09329745918512344, "learning_rate": 4.722771533705475e-06, "loss": 8.561, "step": 168800 }, { "epoch": 0.8430173038028416, "grad_norm": 0.08319691568613052, "learning_rate": 4.721269618763924e-06, "loss": 8.531, "step": 168810 }, { "epoch": 0.843067242627781, "grad_norm": 0.0921143889427185, "learning_rate": 4.719767703822374e-06, "loss": 8.539, "step": 168820 }, { "epoch": 0.8431171814527204, "grad_norm": 0.09409576654434204, "learning_rate": 4.718265788880823e-06, "loss": 8.5339, "step": 168830 }, { "epoch": 0.8431671202776598, "grad_norm": 0.08891736716032028, "learning_rate": 4.716763873939272e-06, "loss": 8.541, "step": 168840 }, { "epoch": 0.8432170591025994, "grad_norm": 0.08662791550159454, "learning_rate": 4.7152619589977225e-06, "loss": 8.5403, "step": 168850 }, { "epoch": 0.8432669979275388, "grad_norm": 0.09259507060050964, "learning_rate": 4.713760044056172e-06, "loss": 8.5186, "step": 168860 }, { "epoch": 0.8433169367524782, "grad_norm": 0.09273169934749603, "learning_rate": 4.712258129114621e-06, "loss": 8.533, "step": 168870 }, { "epoch": 0.8433668755774176, "grad_norm": 0.0853755995631218, "learning_rate": 4.710756214173071e-06, "loss": 8.5447, "step": 168880 }, { "epoch": 0.8434168144023572, "grad_norm": 0.09415735304355621, "learning_rate": 4.70925429923152e-06, "loss": 8.5433, "step": 168890 }, { "epoch": 0.8434667532272966, "grad_norm": 0.0893118679523468, "learning_rate": 4.70775238428997e-06, "loss": 8.5281, "step": 168900 }, { "epoch": 0.843516692052236, "grad_norm": 0.09335058182477951, "learning_rate": 4.706250469348419e-06, "loss": 8.5326, "step": 168910 }, { "epoch": 0.8435666308771754, "grad_norm": 0.09207750111818314, "learning_rate": 4.704748554406869e-06, "loss": 8.5416, "step": 168920 }, { "epoch": 0.843616569702115, "grad_norm": 0.08903926610946655, "learning_rate": 4.703246639465319e-06, "loss": 8.5295, "step": 168930 }, { "epoch": 0.8436665085270544, "grad_norm": 0.09720440953969955, "learning_rate": 4.701744724523767e-06, "loss": 8.5293, "step": 168940 }, { "epoch": 0.8437164473519938, "grad_norm": 0.09019520878791809, "learning_rate": 4.7002428095822175e-06, "loss": 8.5264, "step": 168950 }, { "epoch": 0.8437663861769332, "grad_norm": 0.08853203058242798, "learning_rate": 4.698740894640668e-06, "loss": 8.5345, "step": 168960 }, { "epoch": 0.8438163250018728, "grad_norm": 0.09496580064296722, "learning_rate": 4.697238979699116e-06, "loss": 8.5143, "step": 168970 }, { "epoch": 0.8438662638268122, "grad_norm": 0.09561099112033844, "learning_rate": 4.695737064757566e-06, "loss": 8.5355, "step": 168980 }, { "epoch": 0.8439162026517516, "grad_norm": 0.08805878460407257, "learning_rate": 4.694235149816015e-06, "loss": 8.5346, "step": 168990 }, { "epoch": 0.843966141476691, "grad_norm": 0.08811572194099426, "learning_rate": 4.692733234874465e-06, "loss": 8.5266, "step": 169000 }, { "epoch": 0.8440160803016306, "grad_norm": 0.08896370977163315, "learning_rate": 4.691231319932915e-06, "loss": 8.5476, "step": 169010 }, { "epoch": 0.84406601912657, "grad_norm": 0.09269924461841583, "learning_rate": 4.689729404991364e-06, "loss": 8.5217, "step": 169020 }, { "epoch": 0.8441159579515094, "grad_norm": 0.0937482938170433, "learning_rate": 4.688227490049814e-06, "loss": 8.5131, "step": 169030 }, { "epoch": 0.8441658967764488, "grad_norm": 0.09169386327266693, "learning_rate": 4.686725575108263e-06, "loss": 8.5409, "step": 169040 }, { "epoch": 0.8442158356013884, "grad_norm": 0.09201053529977798, "learning_rate": 4.6852236601667125e-06, "loss": 8.5279, "step": 169050 }, { "epoch": 0.8442657744263278, "grad_norm": 0.08865007758140564, "learning_rate": 4.683721745225163e-06, "loss": 8.5374, "step": 169060 }, { "epoch": 0.8443157132512672, "grad_norm": 0.09043735265731812, "learning_rate": 4.682219830283611e-06, "loss": 8.5423, "step": 169070 }, { "epoch": 0.8443656520762066, "grad_norm": 0.08876598626375198, "learning_rate": 4.680717915342061e-06, "loss": 8.5303, "step": 169080 }, { "epoch": 0.8444155909011462, "grad_norm": 0.09009053558111191, "learning_rate": 4.679216000400511e-06, "loss": 8.5288, "step": 169090 }, { "epoch": 0.8444655297260856, "grad_norm": 0.09224365651607513, "learning_rate": 4.67771408545896e-06, "loss": 8.5421, "step": 169100 }, { "epoch": 0.844515468551025, "grad_norm": 0.09646381437778473, "learning_rate": 4.67621217051741e-06, "loss": 8.5252, "step": 169110 }, { "epoch": 0.8445654073759644, "grad_norm": 0.09529738873243332, "learning_rate": 4.6747102555758595e-06, "loss": 8.5247, "step": 169120 }, { "epoch": 0.8446153462009038, "grad_norm": 0.09440014511346817, "learning_rate": 4.673208340634309e-06, "loss": 8.5509, "step": 169130 }, { "epoch": 0.8446652850258434, "grad_norm": 0.09085705131292343, "learning_rate": 4.671706425692758e-06, "loss": 8.5411, "step": 169140 }, { "epoch": 0.8447152238507828, "grad_norm": 0.08810048550367355, "learning_rate": 4.6702045107512075e-06, "loss": 8.5213, "step": 169150 }, { "epoch": 0.8447651626757222, "grad_norm": 0.0883641242980957, "learning_rate": 4.668702595809658e-06, "loss": 8.5451, "step": 169160 }, { "epoch": 0.8448151015006616, "grad_norm": 0.09298266470432281, "learning_rate": 4.667200680868107e-06, "loss": 8.5362, "step": 169170 }, { "epoch": 0.8448650403256012, "grad_norm": 0.08597780764102936, "learning_rate": 4.665698765926556e-06, "loss": 8.5315, "step": 169180 }, { "epoch": 0.8449149791505406, "grad_norm": 0.09305087476968765, "learning_rate": 4.664196850985006e-06, "loss": 8.5321, "step": 169190 }, { "epoch": 0.84496491797548, "grad_norm": 0.09523943066596985, "learning_rate": 4.662694936043456e-06, "loss": 8.5278, "step": 169200 }, { "epoch": 0.8450148568004194, "grad_norm": 0.08965837210416794, "learning_rate": 4.661193021101905e-06, "loss": 8.5233, "step": 169210 }, { "epoch": 0.845064795625359, "grad_norm": 0.08698379248380661, "learning_rate": 4.6596911061603545e-06, "loss": 8.5409, "step": 169220 }, { "epoch": 0.8451147344502984, "grad_norm": 0.0960690900683403, "learning_rate": 4.658189191218804e-06, "loss": 8.5348, "step": 169230 }, { "epoch": 0.8451646732752378, "grad_norm": 0.09241048991680145, "learning_rate": 4.656687276277253e-06, "loss": 8.5386, "step": 169240 }, { "epoch": 0.8452146121001772, "grad_norm": 0.09009848535060883, "learning_rate": 4.655185361335703e-06, "loss": 8.5341, "step": 169250 }, { "epoch": 0.8452645509251168, "grad_norm": 0.0927545353770256, "learning_rate": 4.653683446394153e-06, "loss": 8.5328, "step": 169260 }, { "epoch": 0.8453144897500562, "grad_norm": 0.09297183901071548, "learning_rate": 4.652181531452602e-06, "loss": 8.5347, "step": 169270 }, { "epoch": 0.8453644285749956, "grad_norm": 0.0915551409125328, "learning_rate": 4.650679616511052e-06, "loss": 8.5317, "step": 169280 }, { "epoch": 0.845414367399935, "grad_norm": 0.0902283638715744, "learning_rate": 4.649177701569501e-06, "loss": 8.5346, "step": 169290 }, { "epoch": 0.8454643062248746, "grad_norm": 0.0934210941195488, "learning_rate": 4.647675786627951e-06, "loss": 8.5406, "step": 169300 }, { "epoch": 0.845514245049814, "grad_norm": 0.09512994438409805, "learning_rate": 4.6461738716864e-06, "loss": 8.5249, "step": 169310 }, { "epoch": 0.8455641838747534, "grad_norm": 0.08952774852514267, "learning_rate": 4.6446719567448495e-06, "loss": 8.5413, "step": 169320 }, { "epoch": 0.8456141226996928, "grad_norm": 0.09026441723108292, "learning_rate": 4.6431700418033e-06, "loss": 8.5283, "step": 169330 }, { "epoch": 0.8456640615246324, "grad_norm": 0.09348812699317932, "learning_rate": 4.641668126861749e-06, "loss": 8.5296, "step": 169340 }, { "epoch": 0.8457140003495718, "grad_norm": 0.09530653804540634, "learning_rate": 4.640166211920198e-06, "loss": 8.5413, "step": 169350 }, { "epoch": 0.8457639391745112, "grad_norm": 0.0886545479297638, "learning_rate": 4.6386642969786486e-06, "loss": 8.5314, "step": 169360 }, { "epoch": 0.8458138779994506, "grad_norm": 0.08922586590051651, "learning_rate": 4.637162382037097e-06, "loss": 8.5231, "step": 169370 }, { "epoch": 0.8458638168243902, "grad_norm": 0.09260469675064087, "learning_rate": 4.635660467095547e-06, "loss": 8.5274, "step": 169380 }, { "epoch": 0.8459137556493296, "grad_norm": 0.0915549173951149, "learning_rate": 4.6341585521539965e-06, "loss": 8.5405, "step": 169390 }, { "epoch": 0.845963694474269, "grad_norm": 0.09589400887489319, "learning_rate": 4.632656637212446e-06, "loss": 8.5489, "step": 169400 }, { "epoch": 0.8460136332992084, "grad_norm": 0.09545934200286865, "learning_rate": 4.631154722270896e-06, "loss": 8.5183, "step": 169410 }, { "epoch": 0.846063572124148, "grad_norm": 0.09627781063318253, "learning_rate": 4.6296528073293445e-06, "loss": 8.5336, "step": 169420 }, { "epoch": 0.8461135109490874, "grad_norm": 0.08923333883285522, "learning_rate": 4.628150892387795e-06, "loss": 8.5482, "step": 169430 }, { "epoch": 0.8461634497740268, "grad_norm": 0.0943409726023674, "learning_rate": 4.626648977446245e-06, "loss": 8.5379, "step": 169440 }, { "epoch": 0.8462133885989662, "grad_norm": 0.09771402925252914, "learning_rate": 4.625147062504693e-06, "loss": 8.5228, "step": 169450 }, { "epoch": 0.8462633274239058, "grad_norm": 0.09028983116149902, "learning_rate": 4.6236451475631436e-06, "loss": 8.5318, "step": 169460 }, { "epoch": 0.8463132662488452, "grad_norm": 0.09272675216197968, "learning_rate": 4.622143232621592e-06, "loss": 8.5224, "step": 169470 }, { "epoch": 0.8463632050737846, "grad_norm": 0.09558822959661484, "learning_rate": 4.620641317680042e-06, "loss": 8.5264, "step": 169480 }, { "epoch": 0.846413143898724, "grad_norm": 0.09761396795511246, "learning_rate": 4.619139402738492e-06, "loss": 8.5319, "step": 169490 }, { "epoch": 0.8464630827236636, "grad_norm": 0.08748037368059158, "learning_rate": 4.617637487796941e-06, "loss": 8.543, "step": 169500 }, { "epoch": 0.846513021548603, "grad_norm": 0.0940655842423439, "learning_rate": 4.616135572855391e-06, "loss": 8.5201, "step": 169510 }, { "epoch": 0.8465629603735424, "grad_norm": 0.09007947146892548, "learning_rate": 4.61463365791384e-06, "loss": 8.5319, "step": 169520 }, { "epoch": 0.8466128991984818, "grad_norm": 0.09856663644313812, "learning_rate": 4.61313174297229e-06, "loss": 8.5355, "step": 169530 }, { "epoch": 0.8466628380234213, "grad_norm": 0.09389778226613998, "learning_rate": 4.61162982803074e-06, "loss": 8.5349, "step": 169540 }, { "epoch": 0.8467127768483608, "grad_norm": 0.08669053763151169, "learning_rate": 4.610127913089188e-06, "loss": 8.5311, "step": 169550 }, { "epoch": 0.8467627156733002, "grad_norm": 0.08856794238090515, "learning_rate": 4.6086259981476386e-06, "loss": 8.5275, "step": 169560 }, { "epoch": 0.8468126544982396, "grad_norm": 0.09139453619718552, "learning_rate": 4.607124083206088e-06, "loss": 8.526, "step": 169570 }, { "epoch": 0.8468625933231791, "grad_norm": 0.09540039300918579, "learning_rate": 4.605622168264537e-06, "loss": 8.5266, "step": 169580 }, { "epoch": 0.8469125321481186, "grad_norm": 0.09145918488502502, "learning_rate": 4.604120253322987e-06, "loss": 8.5374, "step": 169590 }, { "epoch": 0.846962470973058, "grad_norm": 0.08852418512105942, "learning_rate": 4.602618338381437e-06, "loss": 8.5433, "step": 169600 }, { "epoch": 0.8470124097979974, "grad_norm": 0.09328535944223404, "learning_rate": 4.601116423439886e-06, "loss": 8.5504, "step": 169610 }, { "epoch": 0.847062348622937, "grad_norm": 0.09695547074079514, "learning_rate": 4.599614508498335e-06, "loss": 8.5343, "step": 169620 }, { "epoch": 0.8471122874478764, "grad_norm": 0.09098362177610397, "learning_rate": 4.598112593556785e-06, "loss": 8.5452, "step": 169630 }, { "epoch": 0.8471622262728158, "grad_norm": 0.0890357717871666, "learning_rate": 4.596610678615235e-06, "loss": 8.5353, "step": 169640 }, { "epoch": 0.8472121650977552, "grad_norm": 0.09094347059726715, "learning_rate": 4.595108763673684e-06, "loss": 8.5324, "step": 169650 }, { "epoch": 0.8472621039226947, "grad_norm": 0.09218890964984894, "learning_rate": 4.5936068487321336e-06, "loss": 8.5394, "step": 169660 }, { "epoch": 0.8473120427476342, "grad_norm": 0.09381541609764099, "learning_rate": 4.592104933790583e-06, "loss": 8.5285, "step": 169670 }, { "epoch": 0.8473619815725736, "grad_norm": 0.09734323620796204, "learning_rate": 4.590603018849032e-06, "loss": 8.5248, "step": 169680 }, { "epoch": 0.847411920397513, "grad_norm": 0.09610073268413544, "learning_rate": 4.589101103907482e-06, "loss": 8.526, "step": 169690 }, { "epoch": 0.8474618592224525, "grad_norm": 0.08428271114826202, "learning_rate": 4.587599188965932e-06, "loss": 8.5368, "step": 169700 }, { "epoch": 0.847511798047392, "grad_norm": 0.09294697642326355, "learning_rate": 4.586097274024381e-06, "loss": 8.537, "step": 169710 }, { "epoch": 0.8475617368723314, "grad_norm": 0.08724416792392731, "learning_rate": 4.58459535908283e-06, "loss": 8.5352, "step": 169720 }, { "epoch": 0.8476116756972708, "grad_norm": 0.08911699056625366, "learning_rate": 4.583093444141281e-06, "loss": 8.5249, "step": 169730 }, { "epoch": 0.8476616145222103, "grad_norm": 0.08514885604381561, "learning_rate": 4.58159152919973e-06, "loss": 8.5576, "step": 169740 }, { "epoch": 0.8477115533471498, "grad_norm": 0.08839089423418045, "learning_rate": 4.580089614258179e-06, "loss": 8.5511, "step": 169750 }, { "epoch": 0.8477614921720892, "grad_norm": 0.0928574949502945, "learning_rate": 4.578587699316629e-06, "loss": 8.5221, "step": 169760 }, { "epoch": 0.8478114309970286, "grad_norm": 0.09458362311124802, "learning_rate": 4.577085784375078e-06, "loss": 8.5231, "step": 169770 }, { "epoch": 0.8478613698219681, "grad_norm": 0.09411700069904327, "learning_rate": 4.575583869433528e-06, "loss": 8.5104, "step": 169780 }, { "epoch": 0.8479113086469076, "grad_norm": 0.09059763699769974, "learning_rate": 4.574081954491977e-06, "loss": 8.5246, "step": 169790 }, { "epoch": 0.847961247471847, "grad_norm": 0.09194177389144897, "learning_rate": 4.572580039550427e-06, "loss": 8.5261, "step": 169800 }, { "epoch": 0.8480111862967864, "grad_norm": 0.08624966442584991, "learning_rate": 4.571078124608877e-06, "loss": 8.517, "step": 169810 }, { "epoch": 0.8480611251217259, "grad_norm": 0.09074903279542923, "learning_rate": 4.569576209667325e-06, "loss": 8.5229, "step": 169820 }, { "epoch": 0.8481110639466654, "grad_norm": 0.09367188811302185, "learning_rate": 4.568074294725776e-06, "loss": 8.5213, "step": 169830 }, { "epoch": 0.8481610027716048, "grad_norm": 0.09176365286111832, "learning_rate": 4.566572379784225e-06, "loss": 8.5176, "step": 169840 }, { "epoch": 0.8482109415965442, "grad_norm": 0.09426657855510712, "learning_rate": 4.565070464842674e-06, "loss": 8.5222, "step": 169850 }, { "epoch": 0.8482608804214837, "grad_norm": 0.09585726261138916, "learning_rate": 4.5635685499011244e-06, "loss": 8.5333, "step": 169860 }, { "epoch": 0.8483108192464232, "grad_norm": 0.08798620849847794, "learning_rate": 4.562066634959573e-06, "loss": 8.5385, "step": 169870 }, { "epoch": 0.8483607580713626, "grad_norm": 0.09025800973176956, "learning_rate": 4.560564720018023e-06, "loss": 8.5288, "step": 169880 }, { "epoch": 0.848410696896302, "grad_norm": 0.09170225262641907, "learning_rate": 4.559062805076473e-06, "loss": 8.5304, "step": 169890 }, { "epoch": 0.8484606357212415, "grad_norm": 0.08960150182247162, "learning_rate": 4.557560890134922e-06, "loss": 8.5373, "step": 169900 }, { "epoch": 0.848510574546181, "grad_norm": 0.08712244778871536, "learning_rate": 4.556058975193372e-06, "loss": 8.5318, "step": 169910 }, { "epoch": 0.8485605133711204, "grad_norm": 0.09316559135913849, "learning_rate": 4.5545570602518204e-06, "loss": 8.5409, "step": 169920 }, { "epoch": 0.8486104521960598, "grad_norm": 0.0908828005194664, "learning_rate": 4.553055145310271e-06, "loss": 8.5518, "step": 169930 }, { "epoch": 0.8486603910209993, "grad_norm": 0.09130831062793732, "learning_rate": 4.551553230368721e-06, "loss": 8.5231, "step": 169940 }, { "epoch": 0.8487103298459387, "grad_norm": 0.0959784984588623, "learning_rate": 4.550051315427169e-06, "loss": 8.542, "step": 169950 }, { "epoch": 0.8487602686708782, "grad_norm": 0.09128502756357193, "learning_rate": 4.5485494004856194e-06, "loss": 8.5307, "step": 169960 }, { "epoch": 0.8488102074958176, "grad_norm": 0.10153272747993469, "learning_rate": 4.547047485544069e-06, "loss": 8.5301, "step": 169970 }, { "epoch": 0.8488601463207571, "grad_norm": 0.09342774748802185, "learning_rate": 4.545545570602518e-06, "loss": 8.5356, "step": 169980 }, { "epoch": 0.8489100851456965, "grad_norm": 0.0933808907866478, "learning_rate": 4.544043655660968e-06, "loss": 8.5405, "step": 169990 }, { "epoch": 0.848960023970636, "grad_norm": 0.09019854664802551, "learning_rate": 4.542541740719417e-06, "loss": 8.5424, "step": 170000 }, { "epoch": 0.8490099627955754, "grad_norm": 0.08802162855863571, "learning_rate": 4.541039825777867e-06, "loss": 8.532, "step": 170010 }, { "epoch": 0.8490599016205149, "grad_norm": 0.09536787867546082, "learning_rate": 4.539537910836316e-06, "loss": 8.5354, "step": 170020 }, { "epoch": 0.8491098404454543, "grad_norm": 0.0905720591545105, "learning_rate": 4.538035995894766e-06, "loss": 8.5336, "step": 170030 }, { "epoch": 0.8491597792703938, "grad_norm": 0.09646139293909073, "learning_rate": 4.536534080953216e-06, "loss": 8.5237, "step": 170040 }, { "epoch": 0.8492097180953332, "grad_norm": 0.09050284326076508, "learning_rate": 4.535032166011665e-06, "loss": 8.5426, "step": 170050 }, { "epoch": 0.8492596569202727, "grad_norm": 0.09438873827457428, "learning_rate": 4.5335302510701145e-06, "loss": 8.5094, "step": 170060 }, { "epoch": 0.8493095957452121, "grad_norm": 0.09040695428848267, "learning_rate": 4.532028336128564e-06, "loss": 8.5364, "step": 170070 }, { "epoch": 0.8493595345701516, "grad_norm": 0.09196127206087112, "learning_rate": 4.530526421187013e-06, "loss": 8.5276, "step": 170080 }, { "epoch": 0.849409473395091, "grad_norm": 0.09099120646715164, "learning_rate": 4.529024506245463e-06, "loss": 8.5473, "step": 170090 }, { "epoch": 0.8494594122200304, "grad_norm": 0.08868347853422165, "learning_rate": 4.527522591303913e-06, "loss": 8.5644, "step": 170100 }, { "epoch": 0.8495093510449699, "grad_norm": 0.09560364484786987, "learning_rate": 4.526020676362362e-06, "loss": 8.5208, "step": 170110 }, { "epoch": 0.8495592898699094, "grad_norm": 0.09278056770563126, "learning_rate": 4.524518761420811e-06, "loss": 8.5264, "step": 170120 }, { "epoch": 0.8496092286948488, "grad_norm": 0.09304804354906082, "learning_rate": 4.5230168464792615e-06, "loss": 8.5363, "step": 170130 }, { "epoch": 0.8496591675197882, "grad_norm": 0.09038883447647095, "learning_rate": 4.521514931537711e-06, "loss": 8.521, "step": 170140 }, { "epoch": 0.8497091063447277, "grad_norm": 0.09431783854961395, "learning_rate": 4.52001301659616e-06, "loss": 8.5134, "step": 170150 }, { "epoch": 0.8497590451696672, "grad_norm": 0.09250378608703613, "learning_rate": 4.5185111016546095e-06, "loss": 8.5085, "step": 170160 }, { "epoch": 0.8498089839946066, "grad_norm": 0.09243625402450562, "learning_rate": 4.517009186713059e-06, "loss": 8.5148, "step": 170170 }, { "epoch": 0.849858922819546, "grad_norm": 0.09091030061244965, "learning_rate": 4.515507271771509e-06, "loss": 8.5367, "step": 170180 }, { "epoch": 0.8499088616444855, "grad_norm": 0.09004100412130356, "learning_rate": 4.514005356829958e-06, "loss": 8.5418, "step": 170190 }, { "epoch": 0.849958800469425, "grad_norm": 0.08924288302659988, "learning_rate": 4.512503441888408e-06, "loss": 8.5241, "step": 170200 }, { "epoch": 0.8500087392943644, "grad_norm": 0.0939495712518692, "learning_rate": 4.511001526946858e-06, "loss": 8.5362, "step": 170210 }, { "epoch": 0.8500586781193038, "grad_norm": 0.09469542652368546, "learning_rate": 4.509499612005306e-06, "loss": 8.5327, "step": 170220 }, { "epoch": 0.8501086169442433, "grad_norm": 0.08831597119569778, "learning_rate": 4.5079976970637565e-06, "loss": 8.5328, "step": 170230 }, { "epoch": 0.8501585557691828, "grad_norm": 0.0914112776517868, "learning_rate": 4.506495782122206e-06, "loss": 8.5414, "step": 170240 }, { "epoch": 0.8502084945941222, "grad_norm": 0.0885150134563446, "learning_rate": 4.504993867180655e-06, "loss": 8.5305, "step": 170250 }, { "epoch": 0.8502584334190616, "grad_norm": 0.08355226367712021, "learning_rate": 4.503491952239105e-06, "loss": 8.5322, "step": 170260 }, { "epoch": 0.8503083722440011, "grad_norm": 0.08838168531656265, "learning_rate": 4.501990037297554e-06, "loss": 8.5281, "step": 170270 }, { "epoch": 0.8503583110689406, "grad_norm": 0.0902080163359642, "learning_rate": 4.500488122356004e-06, "loss": 8.5394, "step": 170280 }, { "epoch": 0.85040824989388, "grad_norm": 0.09007387608289719, "learning_rate": 4.498986207414454e-06, "loss": 8.5112, "step": 170290 }, { "epoch": 0.8504581887188194, "grad_norm": 0.08401839435100555, "learning_rate": 4.497484292472903e-06, "loss": 8.533, "step": 170300 }, { "epoch": 0.8505081275437589, "grad_norm": 0.0899522602558136, "learning_rate": 4.495982377531353e-06, "loss": 8.5388, "step": 170310 }, { "epoch": 0.8505580663686984, "grad_norm": 0.09382461756467819, "learning_rate": 4.494480462589801e-06, "loss": 8.5133, "step": 170320 }, { "epoch": 0.8506080051936378, "grad_norm": 0.09093036502599716, "learning_rate": 4.4929785476482515e-06, "loss": 8.5497, "step": 170330 }, { "epoch": 0.8506579440185772, "grad_norm": 0.08482268452644348, "learning_rate": 4.491476632706702e-06, "loss": 8.5193, "step": 170340 }, { "epoch": 0.8507078828435167, "grad_norm": 0.09074404090642929, "learning_rate": 4.48997471776515e-06, "loss": 8.5359, "step": 170350 }, { "epoch": 0.8507578216684561, "grad_norm": 0.09321633726358414, "learning_rate": 4.4884728028236e-06, "loss": 8.5268, "step": 170360 }, { "epoch": 0.8508077604933956, "grad_norm": 0.09202466160058975, "learning_rate": 4.48697088788205e-06, "loss": 8.5414, "step": 170370 }, { "epoch": 0.850857699318335, "grad_norm": 0.08913063257932663, "learning_rate": 4.485468972940499e-06, "loss": 8.5411, "step": 170380 }, { "epoch": 0.8509076381432745, "grad_norm": 0.09542038291692734, "learning_rate": 4.483967057998949e-06, "loss": 8.5335, "step": 170390 }, { "epoch": 0.850957576968214, "grad_norm": 0.09215617924928665, "learning_rate": 4.482465143057398e-06, "loss": 8.5483, "step": 170400 }, { "epoch": 0.8510075157931534, "grad_norm": 0.09391964226961136, "learning_rate": 4.480963228115848e-06, "loss": 8.5355, "step": 170410 }, { "epoch": 0.8510574546180928, "grad_norm": 0.09085561335086823, "learning_rate": 4.479461313174298e-06, "loss": 8.5412, "step": 170420 }, { "epoch": 0.8511073934430323, "grad_norm": 0.08789962530136108, "learning_rate": 4.4779593982327465e-06, "loss": 8.5273, "step": 170430 }, { "epoch": 0.8511573322679717, "grad_norm": 0.08881513774394989, "learning_rate": 4.476457483291197e-06, "loss": 8.5312, "step": 170440 }, { "epoch": 0.8512072710929112, "grad_norm": 0.0915045440196991, "learning_rate": 4.474955568349646e-06, "loss": 8.5231, "step": 170450 }, { "epoch": 0.8512572099178506, "grad_norm": 0.09604188799858093, "learning_rate": 4.473453653408095e-06, "loss": 8.5366, "step": 170460 }, { "epoch": 0.8513071487427901, "grad_norm": 0.09160809963941574, "learning_rate": 4.4719517384665455e-06, "loss": 8.5187, "step": 170470 }, { "epoch": 0.8513570875677295, "grad_norm": 0.08846515417098999, "learning_rate": 4.470449823524994e-06, "loss": 8.5163, "step": 170480 }, { "epoch": 0.851407026392669, "grad_norm": 0.09089051187038422, "learning_rate": 4.468947908583444e-06, "loss": 8.5191, "step": 170490 }, { "epoch": 0.8514569652176084, "grad_norm": 0.09371612966060638, "learning_rate": 4.4674459936418935e-06, "loss": 8.5244, "step": 170500 }, { "epoch": 0.8515069040425479, "grad_norm": 0.09188021719455719, "learning_rate": 4.465944078700343e-06, "loss": 8.5292, "step": 170510 }, { "epoch": 0.8515568428674873, "grad_norm": 0.08881582319736481, "learning_rate": 4.464442163758793e-06, "loss": 8.5183, "step": 170520 }, { "epoch": 0.8516067816924268, "grad_norm": 0.09312296658754349, "learning_rate": 4.462940248817242e-06, "loss": 8.5437, "step": 170530 }, { "epoch": 0.8516567205173662, "grad_norm": 0.08998645842075348, "learning_rate": 4.461438333875692e-06, "loss": 8.5336, "step": 170540 }, { "epoch": 0.8517066593423057, "grad_norm": 0.09388935565948486, "learning_rate": 4.459936418934141e-06, "loss": 8.5427, "step": 170550 }, { "epoch": 0.8517565981672451, "grad_norm": 0.08806626498699188, "learning_rate": 4.45843450399259e-06, "loss": 8.5317, "step": 170560 }, { "epoch": 0.8518065369921846, "grad_norm": 0.09021688997745514, "learning_rate": 4.4569325890510405e-06, "loss": 8.5494, "step": 170570 }, { "epoch": 0.851856475817124, "grad_norm": 0.08856786042451859, "learning_rate": 4.45543067410949e-06, "loss": 8.5258, "step": 170580 }, { "epoch": 0.8519064146420635, "grad_norm": 0.09338122606277466, "learning_rate": 4.453928759167939e-06, "loss": 8.5315, "step": 170590 }, { "epoch": 0.8519563534670029, "grad_norm": 0.09389621019363403, "learning_rate": 4.4524268442263885e-06, "loss": 8.5314, "step": 170600 }, { "epoch": 0.8520062922919424, "grad_norm": 0.08809863030910492, "learning_rate": 4.450924929284839e-06, "loss": 8.5296, "step": 170610 }, { "epoch": 0.8520562311168818, "grad_norm": 0.08649566769599915, "learning_rate": 4.449423014343288e-06, "loss": 8.5521, "step": 170620 }, { "epoch": 0.8521061699418213, "grad_norm": 0.09101077169179916, "learning_rate": 4.447921099401737e-06, "loss": 8.5458, "step": 170630 }, { "epoch": 0.8521561087667607, "grad_norm": 0.0848645567893982, "learning_rate": 4.446419184460187e-06, "loss": 8.5266, "step": 170640 }, { "epoch": 0.8522060475917002, "grad_norm": 0.0918324664235115, "learning_rate": 4.444917269518636e-06, "loss": 8.517, "step": 170650 }, { "epoch": 0.8522559864166396, "grad_norm": 0.08957000821828842, "learning_rate": 4.443415354577086e-06, "loss": 8.5481, "step": 170660 }, { "epoch": 0.8523059252415791, "grad_norm": 0.09019914269447327, "learning_rate": 4.4419134396355355e-06, "loss": 8.5189, "step": 170670 }, { "epoch": 0.8523558640665185, "grad_norm": 0.09360361099243164, "learning_rate": 4.440411524693985e-06, "loss": 8.5331, "step": 170680 }, { "epoch": 0.852405802891458, "grad_norm": 0.09016086161136627, "learning_rate": 4.438909609752435e-06, "loss": 8.5151, "step": 170690 }, { "epoch": 0.8524557417163974, "grad_norm": 0.09195458889007568, "learning_rate": 4.4374076948108835e-06, "loss": 8.5457, "step": 170700 }, { "epoch": 0.8525056805413369, "grad_norm": 0.0889841690659523, "learning_rate": 4.435905779869334e-06, "loss": 8.5435, "step": 170710 }, { "epoch": 0.8525556193662763, "grad_norm": 0.09094245731830597, "learning_rate": 4.434403864927783e-06, "loss": 8.5134, "step": 170720 }, { "epoch": 0.8526055581912158, "grad_norm": 0.09037609398365021, "learning_rate": 4.432901949986232e-06, "loss": 8.5313, "step": 170730 }, { "epoch": 0.8526554970161552, "grad_norm": 0.0911756157875061, "learning_rate": 4.4314000350446825e-06, "loss": 8.5236, "step": 170740 }, { "epoch": 0.8527054358410947, "grad_norm": 0.09256346523761749, "learning_rate": 4.429898120103131e-06, "loss": 8.5366, "step": 170750 }, { "epoch": 0.8527553746660341, "grad_norm": 0.09345842152833939, "learning_rate": 4.428396205161581e-06, "loss": 8.5308, "step": 170760 }, { "epoch": 0.8528053134909735, "grad_norm": 0.10060130059719086, "learning_rate": 4.426894290220031e-06, "loss": 8.5234, "step": 170770 }, { "epoch": 0.852855252315913, "grad_norm": 0.09197411686182022, "learning_rate": 4.42539237527848e-06, "loss": 8.558, "step": 170780 }, { "epoch": 0.8529051911408525, "grad_norm": 0.09203599393367767, "learning_rate": 4.42389046033693e-06, "loss": 8.5315, "step": 170790 }, { "epoch": 0.8529551299657919, "grad_norm": 0.09319550544023514, "learning_rate": 4.4223885453953785e-06, "loss": 8.5225, "step": 170800 }, { "epoch": 0.8530050687907313, "grad_norm": 0.08547905087471008, "learning_rate": 4.420886630453829e-06, "loss": 8.5404, "step": 170810 }, { "epoch": 0.8530550076156708, "grad_norm": 0.08869150280952454, "learning_rate": 4.419384715512279e-06, "loss": 8.5281, "step": 170820 }, { "epoch": 0.8531049464406103, "grad_norm": 0.08847400546073914, "learning_rate": 4.417882800570727e-06, "loss": 8.544, "step": 170830 }, { "epoch": 0.8531548852655497, "grad_norm": 0.09268223494291306, "learning_rate": 4.4163808856291775e-06, "loss": 8.5184, "step": 170840 }, { "epoch": 0.8532048240904891, "grad_norm": 0.09480337798595428, "learning_rate": 4.414878970687627e-06, "loss": 8.5208, "step": 170850 }, { "epoch": 0.8532547629154286, "grad_norm": 0.09097526222467422, "learning_rate": 4.413377055746076e-06, "loss": 8.5258, "step": 170860 }, { "epoch": 0.8533047017403681, "grad_norm": 0.0875224694609642, "learning_rate": 4.411875140804526e-06, "loss": 8.5318, "step": 170870 }, { "epoch": 0.8533546405653075, "grad_norm": 0.08786869794130325, "learning_rate": 4.410373225862975e-06, "loss": 8.5334, "step": 170880 }, { "epoch": 0.8534045793902469, "grad_norm": 0.08923707902431488, "learning_rate": 4.408871310921425e-06, "loss": 8.535, "step": 170890 }, { "epoch": 0.8534545182151864, "grad_norm": 0.0923088863492012, "learning_rate": 4.407369395979874e-06, "loss": 8.5256, "step": 170900 }, { "epoch": 0.8535044570401259, "grad_norm": 0.0889919325709343, "learning_rate": 4.405867481038324e-06, "loss": 8.5382, "step": 170910 }, { "epoch": 0.8535543958650653, "grad_norm": 0.09233865886926651, "learning_rate": 4.404365566096774e-06, "loss": 8.5243, "step": 170920 }, { "epoch": 0.8536043346900047, "grad_norm": 0.09140230715274811, "learning_rate": 4.402863651155223e-06, "loss": 8.5295, "step": 170930 }, { "epoch": 0.8536542735149442, "grad_norm": 0.08698879182338715, "learning_rate": 4.4013617362136725e-06, "loss": 8.5412, "step": 170940 }, { "epoch": 0.8537042123398837, "grad_norm": 0.09249941259622574, "learning_rate": 4.399859821272122e-06, "loss": 8.533, "step": 170950 }, { "epoch": 0.8537541511648231, "grad_norm": 0.0933549553155899, "learning_rate": 4.398357906330571e-06, "loss": 8.5375, "step": 170960 }, { "epoch": 0.8538040899897625, "grad_norm": 0.09042268991470337, "learning_rate": 4.396855991389021e-06, "loss": 8.5378, "step": 170970 }, { "epoch": 0.853854028814702, "grad_norm": 0.09002401679754257, "learning_rate": 4.395354076447471e-06, "loss": 8.5339, "step": 170980 }, { "epoch": 0.8539039676396415, "grad_norm": 0.09069207310676575, "learning_rate": 4.39385216150592e-06, "loss": 8.5289, "step": 170990 }, { "epoch": 0.8539539064645809, "grad_norm": 0.09139145910739899, "learning_rate": 4.392350246564369e-06, "loss": 8.5352, "step": 171000 }, { "epoch": 0.8540038452895203, "grad_norm": 0.0929713174700737, "learning_rate": 4.3908483316228196e-06, "loss": 8.5344, "step": 171010 }, { "epoch": 0.8540537841144598, "grad_norm": 0.09418080747127533, "learning_rate": 4.389346416681269e-06, "loss": 8.5188, "step": 171020 }, { "epoch": 0.8541037229393993, "grad_norm": 0.08637245744466782, "learning_rate": 4.387844501739718e-06, "loss": 8.5291, "step": 171030 }, { "epoch": 0.8541536617643387, "grad_norm": 0.08483462035655975, "learning_rate": 4.3863425867981676e-06, "loss": 8.5162, "step": 171040 }, { "epoch": 0.8542036005892781, "grad_norm": 0.08939448744058609, "learning_rate": 4.384840671856617e-06, "loss": 8.5235, "step": 171050 }, { "epoch": 0.8542535394142176, "grad_norm": 0.09517613798379898, "learning_rate": 4.383338756915067e-06, "loss": 8.5371, "step": 171060 }, { "epoch": 0.8543034782391571, "grad_norm": 0.09513095766305923, "learning_rate": 4.381836841973516e-06, "loss": 8.5406, "step": 171070 }, { "epoch": 0.8543534170640965, "grad_norm": 0.08950556814670563, "learning_rate": 4.380334927031966e-06, "loss": 8.5353, "step": 171080 }, { "epoch": 0.8544033558890359, "grad_norm": 0.09560289233922958, "learning_rate": 4.378833012090416e-06, "loss": 8.5242, "step": 171090 }, { "epoch": 0.8544532947139754, "grad_norm": 0.0961078554391861, "learning_rate": 4.377331097148864e-06, "loss": 8.5337, "step": 171100 }, { "epoch": 0.8545032335389148, "grad_norm": 0.0894903764128685, "learning_rate": 4.3758291822073146e-06, "loss": 8.5246, "step": 171110 }, { "epoch": 0.8545531723638543, "grad_norm": 0.08703772723674774, "learning_rate": 4.374327267265764e-06, "loss": 8.5198, "step": 171120 }, { "epoch": 0.8546031111887937, "grad_norm": 0.08781271427869797, "learning_rate": 4.372825352324213e-06, "loss": 8.5344, "step": 171130 }, { "epoch": 0.8546530500137332, "grad_norm": 0.09302118420600891, "learning_rate": 4.371323437382663e-06, "loss": 8.5205, "step": 171140 }, { "epoch": 0.8547029888386726, "grad_norm": 0.09112757444381714, "learning_rate": 4.369821522441112e-06, "loss": 8.5174, "step": 171150 }, { "epoch": 0.8547529276636121, "grad_norm": 0.0904446691274643, "learning_rate": 4.368319607499562e-06, "loss": 8.5156, "step": 171160 }, { "epoch": 0.8548028664885515, "grad_norm": 0.09799622744321823, "learning_rate": 4.366817692558012e-06, "loss": 8.538, "step": 171170 }, { "epoch": 0.854852805313491, "grad_norm": 0.08375813066959381, "learning_rate": 4.365315777616461e-06, "loss": 8.5342, "step": 171180 }, { "epoch": 0.8549027441384304, "grad_norm": 0.08957131952047348, "learning_rate": 4.363813862674911e-06, "loss": 8.5287, "step": 171190 }, { "epoch": 0.8549526829633699, "grad_norm": 0.0968262180685997, "learning_rate": 4.362311947733359e-06, "loss": 8.5356, "step": 171200 }, { "epoch": 0.8550026217883093, "grad_norm": 0.09194337576627731, "learning_rate": 4.3608100327918096e-06, "loss": 8.5205, "step": 171210 }, { "epoch": 0.8550525606132487, "grad_norm": 0.09763994812965393, "learning_rate": 4.35930811785026e-06, "loss": 8.5341, "step": 171220 }, { "epoch": 0.8551024994381882, "grad_norm": 0.09552284330129623, "learning_rate": 4.357806202908708e-06, "loss": 8.5164, "step": 171230 }, { "epoch": 0.8551524382631277, "grad_norm": 0.09413187205791473, "learning_rate": 4.356304287967158e-06, "loss": 8.5397, "step": 171240 }, { "epoch": 0.8552023770880671, "grad_norm": 0.09369870275259018, "learning_rate": 4.354802373025608e-06, "loss": 8.5478, "step": 171250 }, { "epoch": 0.8552523159130065, "grad_norm": 0.08899905532598495, "learning_rate": 4.353300458084057e-06, "loss": 8.5269, "step": 171260 }, { "epoch": 0.855302254737946, "grad_norm": 0.09390175342559814, "learning_rate": 4.351798543142507e-06, "loss": 8.5124, "step": 171270 }, { "epoch": 0.8553521935628855, "grad_norm": 0.09079564362764359, "learning_rate": 4.350296628200956e-06, "loss": 8.5169, "step": 171280 }, { "epoch": 0.8554021323878249, "grad_norm": 0.09164270758628845, "learning_rate": 4.348794713259406e-06, "loss": 8.5276, "step": 171290 }, { "epoch": 0.8554520712127643, "grad_norm": 0.0873217061161995, "learning_rate": 4.347292798317855e-06, "loss": 8.5337, "step": 171300 }, { "epoch": 0.8555020100377038, "grad_norm": 0.09027426689863205, "learning_rate": 4.345790883376305e-06, "loss": 8.5357, "step": 171310 }, { "epoch": 0.8555519488626433, "grad_norm": 0.09546752274036407, "learning_rate": 4.344288968434755e-06, "loss": 8.5264, "step": 171320 }, { "epoch": 0.8556018876875827, "grad_norm": 0.0898887887597084, "learning_rate": 4.342787053493204e-06, "loss": 8.5286, "step": 171330 }, { "epoch": 0.8556518265125221, "grad_norm": 0.0918826311826706, "learning_rate": 4.3412851385516534e-06, "loss": 8.5244, "step": 171340 }, { "epoch": 0.8557017653374616, "grad_norm": 0.09295768290758133, "learning_rate": 4.339783223610103e-06, "loss": 8.5359, "step": 171350 }, { "epoch": 0.8557517041624011, "grad_norm": 0.08988602459430695, "learning_rate": 4.338281308668552e-06, "loss": 8.5364, "step": 171360 }, { "epoch": 0.8558016429873405, "grad_norm": 0.0889463871717453, "learning_rate": 4.336779393727002e-06, "loss": 8.5293, "step": 171370 }, { "epoch": 0.8558515818122799, "grad_norm": 0.09765569865703583, "learning_rate": 4.335277478785452e-06, "loss": 8.5084, "step": 171380 }, { "epoch": 0.8559015206372194, "grad_norm": 0.08945247530937195, "learning_rate": 4.333775563843901e-06, "loss": 8.5243, "step": 171390 }, { "epoch": 0.8559514594621589, "grad_norm": 0.08958765864372253, "learning_rate": 4.33227364890235e-06, "loss": 8.5287, "step": 171400 }, { "epoch": 0.8560013982870983, "grad_norm": 0.09407828748226166, "learning_rate": 4.3307717339608004e-06, "loss": 8.521, "step": 171410 }, { "epoch": 0.8560513371120377, "grad_norm": 0.09075424075126648, "learning_rate": 4.32926981901925e-06, "loss": 8.5308, "step": 171420 }, { "epoch": 0.8561012759369772, "grad_norm": 0.09202433377504349, "learning_rate": 4.327767904077699e-06, "loss": 8.5345, "step": 171430 }, { "epoch": 0.8561512147619167, "grad_norm": 0.09231635928153992, "learning_rate": 4.3262659891361484e-06, "loss": 8.5238, "step": 171440 }, { "epoch": 0.8562011535868561, "grad_norm": 0.09123031049966812, "learning_rate": 4.324764074194598e-06, "loss": 8.5197, "step": 171450 }, { "epoch": 0.8562510924117955, "grad_norm": 0.08414209634065628, "learning_rate": 4.323262159253048e-06, "loss": 8.5187, "step": 171460 }, { "epoch": 0.856301031236735, "grad_norm": 0.09748626500368118, "learning_rate": 4.321760244311497e-06, "loss": 8.5257, "step": 171470 }, { "epoch": 0.8563509700616745, "grad_norm": 0.08869002014398575, "learning_rate": 4.320258329369947e-06, "loss": 8.5425, "step": 171480 }, { "epoch": 0.8564009088866139, "grad_norm": 0.09146328270435333, "learning_rate": 4.318756414428397e-06, "loss": 8.5481, "step": 171490 }, { "epoch": 0.8564508477115533, "grad_norm": 0.0903269499540329, "learning_rate": 4.317254499486846e-06, "loss": 8.5161, "step": 171500 }, { "epoch": 0.8565007865364928, "grad_norm": 0.09317917376756668, "learning_rate": 4.3157525845452954e-06, "loss": 8.5282, "step": 171510 }, { "epoch": 0.8565507253614323, "grad_norm": 0.09211008250713348, "learning_rate": 4.314250669603745e-06, "loss": 8.5379, "step": 171520 }, { "epoch": 0.8566006641863717, "grad_norm": 0.08755582571029663, "learning_rate": 4.312748754662194e-06, "loss": 8.5338, "step": 171530 }, { "epoch": 0.8566506030113111, "grad_norm": 0.09266114234924316, "learning_rate": 4.311246839720644e-06, "loss": 8.5368, "step": 171540 }, { "epoch": 0.8567005418362506, "grad_norm": 0.09207313507795334, "learning_rate": 4.309744924779094e-06, "loss": 8.5345, "step": 171550 }, { "epoch": 0.8567504806611901, "grad_norm": 0.09798050671815872, "learning_rate": 4.308243009837543e-06, "loss": 8.5375, "step": 171560 }, { "epoch": 0.8568004194861295, "grad_norm": 0.08942960947751999, "learning_rate": 4.306741094895993e-06, "loss": 8.542, "step": 171570 }, { "epoch": 0.8568503583110689, "grad_norm": 0.0846295952796936, "learning_rate": 4.305239179954442e-06, "loss": 8.5393, "step": 171580 }, { "epoch": 0.8569002971360083, "grad_norm": 0.09053222835063934, "learning_rate": 4.303737265012892e-06, "loss": 8.5335, "step": 171590 }, { "epoch": 0.8569502359609479, "grad_norm": 0.09028905630111694, "learning_rate": 4.302235350071341e-06, "loss": 8.5206, "step": 171600 }, { "epoch": 0.8570001747858873, "grad_norm": 0.09010299295186996, "learning_rate": 4.3007334351297905e-06, "loss": 8.5239, "step": 171610 }, { "epoch": 0.8570501136108267, "grad_norm": 0.08910666406154633, "learning_rate": 4.299231520188241e-06, "loss": 8.5292, "step": 171620 }, { "epoch": 0.8571000524357661, "grad_norm": 0.0874292179942131, "learning_rate": 4.297729605246689e-06, "loss": 8.5219, "step": 171630 }, { "epoch": 0.8571499912607057, "grad_norm": 0.08846402913331985, "learning_rate": 4.296227690305139e-06, "loss": 8.5341, "step": 171640 }, { "epoch": 0.8571999300856451, "grad_norm": 0.09597437083721161, "learning_rate": 4.2947257753635895e-06, "loss": 8.52, "step": 171650 }, { "epoch": 0.8572498689105845, "grad_norm": 0.08995895087718964, "learning_rate": 4.293223860422038e-06, "loss": 8.5257, "step": 171660 }, { "epoch": 0.8572998077355239, "grad_norm": 0.08784112334251404, "learning_rate": 4.291721945480488e-06, "loss": 8.5186, "step": 171670 }, { "epoch": 0.8573497465604635, "grad_norm": 0.09828951209783554, "learning_rate": 4.290220030538937e-06, "loss": 8.5305, "step": 171680 }, { "epoch": 0.8573996853854029, "grad_norm": 0.09064572304487228, "learning_rate": 4.288718115597387e-06, "loss": 8.5354, "step": 171690 }, { "epoch": 0.8574496242103423, "grad_norm": 0.09304898232221603, "learning_rate": 4.287216200655837e-06, "loss": 8.5223, "step": 171700 }, { "epoch": 0.8574995630352817, "grad_norm": 0.09134497493505478, "learning_rate": 4.2857142857142855e-06, "loss": 8.5243, "step": 171710 }, { "epoch": 0.8575495018602213, "grad_norm": 0.0942414253950119, "learning_rate": 4.284212370772736e-06, "loss": 8.5371, "step": 171720 }, { "epoch": 0.8575994406851607, "grad_norm": 0.10213427990674973, "learning_rate": 4.282710455831185e-06, "loss": 8.5147, "step": 171730 }, { "epoch": 0.8576493795101001, "grad_norm": 0.09323253482580185, "learning_rate": 4.281208540889634e-06, "loss": 8.5178, "step": 171740 }, { "epoch": 0.8576993183350395, "grad_norm": 0.09151145815849304, "learning_rate": 4.2797066259480845e-06, "loss": 8.5257, "step": 171750 }, { "epoch": 0.8577492571599791, "grad_norm": 0.0880388393998146, "learning_rate": 4.278204711006533e-06, "loss": 8.5441, "step": 171760 }, { "epoch": 0.8577991959849185, "grad_norm": 0.0961805209517479, "learning_rate": 4.276702796064983e-06, "loss": 8.5351, "step": 171770 }, { "epoch": 0.8578491348098579, "grad_norm": 0.09380482137203217, "learning_rate": 4.2752008811234325e-06, "loss": 8.5219, "step": 171780 }, { "epoch": 0.8578990736347973, "grad_norm": 0.08797604590654373, "learning_rate": 4.273698966181882e-06, "loss": 8.524, "step": 171790 }, { "epoch": 0.8579490124597369, "grad_norm": 0.08902735263109207, "learning_rate": 4.272197051240332e-06, "loss": 8.5404, "step": 171800 }, { "epoch": 0.8579989512846763, "grad_norm": 0.09067311882972717, "learning_rate": 4.270695136298781e-06, "loss": 8.5317, "step": 171810 }, { "epoch": 0.8580488901096157, "grad_norm": 0.09175264835357666, "learning_rate": 4.269193221357231e-06, "loss": 8.5181, "step": 171820 }, { "epoch": 0.8580988289345551, "grad_norm": 0.0938398614525795, "learning_rate": 4.26769130641568e-06, "loss": 8.5388, "step": 171830 }, { "epoch": 0.8581487677594947, "grad_norm": 0.09269008040428162, "learning_rate": 4.266189391474129e-06, "loss": 8.5437, "step": 171840 }, { "epoch": 0.8581987065844341, "grad_norm": 0.09139380604028702, "learning_rate": 4.2646874765325795e-06, "loss": 8.5397, "step": 171850 }, { "epoch": 0.8582486454093735, "grad_norm": 0.09328168630599976, "learning_rate": 4.263185561591029e-06, "loss": 8.5047, "step": 171860 }, { "epoch": 0.8582985842343129, "grad_norm": 0.0878739282488823, "learning_rate": 4.261683646649478e-06, "loss": 8.5314, "step": 171870 }, { "epoch": 0.8583485230592525, "grad_norm": 0.0920386090874672, "learning_rate": 4.2601817317079275e-06, "loss": 8.5369, "step": 171880 }, { "epoch": 0.8583984618841919, "grad_norm": 0.09493181109428406, "learning_rate": 4.258679816766378e-06, "loss": 8.5218, "step": 171890 }, { "epoch": 0.8584484007091313, "grad_norm": 0.09276656806468964, "learning_rate": 4.257177901824827e-06, "loss": 8.5195, "step": 171900 }, { "epoch": 0.8584983395340707, "grad_norm": 0.09002747386693954, "learning_rate": 4.255675986883276e-06, "loss": 8.5195, "step": 171910 }, { "epoch": 0.8585482783590103, "grad_norm": 0.09539522230625153, "learning_rate": 4.254174071941726e-06, "loss": 8.5185, "step": 171920 }, { "epoch": 0.8585982171839497, "grad_norm": 0.09918307512998581, "learning_rate": 4.252672157000175e-06, "loss": 8.5317, "step": 171930 }, { "epoch": 0.8586481560088891, "grad_norm": 0.09323044866323471, "learning_rate": 4.251170242058625e-06, "loss": 8.5361, "step": 171940 }, { "epoch": 0.8586980948338285, "grad_norm": 0.09227460622787476, "learning_rate": 4.2496683271170745e-06, "loss": 8.5207, "step": 171950 }, { "epoch": 0.8587480336587681, "grad_norm": 0.09323973953723907, "learning_rate": 4.248166412175524e-06, "loss": 8.5152, "step": 171960 }, { "epoch": 0.8587979724837075, "grad_norm": 0.0909019485116005, "learning_rate": 4.246664497233974e-06, "loss": 8.5242, "step": 171970 }, { "epoch": 0.8588479113086469, "grad_norm": 0.08941137790679932, "learning_rate": 4.2451625822924225e-06, "loss": 8.5398, "step": 171980 }, { "epoch": 0.8588978501335863, "grad_norm": 0.08684715628623962, "learning_rate": 4.243660667350873e-06, "loss": 8.5233, "step": 171990 }, { "epoch": 0.8589477889585259, "grad_norm": 0.0915994718670845, "learning_rate": 4.242158752409322e-06, "loss": 8.5258, "step": 172000 }, { "epoch": 0.8589977277834653, "grad_norm": 0.09104558825492859, "learning_rate": 4.240656837467771e-06, "loss": 8.534, "step": 172010 }, { "epoch": 0.8590476666084047, "grad_norm": 0.09221166372299194, "learning_rate": 4.2391549225262215e-06, "loss": 8.5333, "step": 172020 }, { "epoch": 0.8590976054333441, "grad_norm": 0.09822104871273041, "learning_rate": 4.23765300758467e-06, "loss": 8.52, "step": 172030 }, { "epoch": 0.8591475442582837, "grad_norm": 0.08423840254545212, "learning_rate": 4.23615109264312e-06, "loss": 8.5458, "step": 172040 }, { "epoch": 0.8591974830832231, "grad_norm": 0.0882524624466896, "learning_rate": 4.23464917770157e-06, "loss": 8.5479, "step": 172050 }, { "epoch": 0.8592474219081625, "grad_norm": 0.09635046869516373, "learning_rate": 4.233147262760019e-06, "loss": 8.5266, "step": 172060 }, { "epoch": 0.8592973607331019, "grad_norm": 0.09102864563465118, "learning_rate": 4.231645347818469e-06, "loss": 8.5349, "step": 172070 }, { "epoch": 0.8593472995580413, "grad_norm": 0.09232207387685776, "learning_rate": 4.2301434328769175e-06, "loss": 8.5276, "step": 172080 }, { "epoch": 0.8593972383829809, "grad_norm": 0.09616389870643616, "learning_rate": 4.228641517935368e-06, "loss": 8.5269, "step": 172090 }, { "epoch": 0.8594471772079203, "grad_norm": 0.09412415325641632, "learning_rate": 4.227139602993818e-06, "loss": 8.5328, "step": 172100 }, { "epoch": 0.8594971160328597, "grad_norm": 0.09291058033704758, "learning_rate": 4.225637688052266e-06, "loss": 8.5311, "step": 172110 }, { "epoch": 0.8595470548577991, "grad_norm": 0.09364159405231476, "learning_rate": 4.2241357731107165e-06, "loss": 8.5437, "step": 172120 }, { "epoch": 0.8595969936827387, "grad_norm": 0.09280949085950851, "learning_rate": 4.222633858169166e-06, "loss": 8.5254, "step": 172130 }, { "epoch": 0.8596469325076781, "grad_norm": 0.09044373035430908, "learning_rate": 4.221131943227615e-06, "loss": 8.5283, "step": 172140 }, { "epoch": 0.8596968713326175, "grad_norm": 0.0903530940413475, "learning_rate": 4.219630028286065e-06, "loss": 8.5234, "step": 172150 }, { "epoch": 0.8597468101575569, "grad_norm": 0.09625843167304993, "learning_rate": 4.218128113344514e-06, "loss": 8.5441, "step": 172160 }, { "epoch": 0.8597967489824965, "grad_norm": 0.08800741285085678, "learning_rate": 4.216626198402964e-06, "loss": 8.5485, "step": 172170 }, { "epoch": 0.8598466878074359, "grad_norm": 0.0877734124660492, "learning_rate": 4.215124283461413e-06, "loss": 8.5253, "step": 172180 }, { "epoch": 0.8598966266323753, "grad_norm": 0.09386802464723587, "learning_rate": 4.213622368519863e-06, "loss": 8.522, "step": 172190 }, { "epoch": 0.8599465654573147, "grad_norm": 0.09531255811452866, "learning_rate": 4.212120453578313e-06, "loss": 8.5214, "step": 172200 }, { "epoch": 0.8599965042822543, "grad_norm": 0.09443308413028717, "learning_rate": 4.210618538636762e-06, "loss": 8.5338, "step": 172210 }, { "epoch": 0.8600464431071937, "grad_norm": 0.08627857267856598, "learning_rate": 4.2091166236952115e-06, "loss": 8.5226, "step": 172220 }, { "epoch": 0.8600963819321331, "grad_norm": 0.09067834168672562, "learning_rate": 4.207614708753661e-06, "loss": 8.5358, "step": 172230 }, { "epoch": 0.8601463207570725, "grad_norm": 0.08249117434024811, "learning_rate": 4.20611279381211e-06, "loss": 8.5418, "step": 172240 }, { "epoch": 0.8601962595820121, "grad_norm": 0.08913594484329224, "learning_rate": 4.20461087887056e-06, "loss": 8.5061, "step": 172250 }, { "epoch": 0.8602461984069515, "grad_norm": 0.09263671189546585, "learning_rate": 4.20310896392901e-06, "loss": 8.5203, "step": 172260 }, { "epoch": 0.8602961372318909, "grad_norm": 0.09188566356897354, "learning_rate": 4.201607048987459e-06, "loss": 8.5296, "step": 172270 }, { "epoch": 0.8603460760568303, "grad_norm": 0.0945729911327362, "learning_rate": 4.200105134045908e-06, "loss": 8.5218, "step": 172280 }, { "epoch": 0.8603960148817699, "grad_norm": 0.09193189442157745, "learning_rate": 4.1986032191043585e-06, "loss": 8.5329, "step": 172290 }, { "epoch": 0.8604459537067093, "grad_norm": 0.08755156397819519, "learning_rate": 4.197101304162808e-06, "loss": 8.5227, "step": 172300 }, { "epoch": 0.8604958925316487, "grad_norm": 0.08843012899160385, "learning_rate": 4.195599389221257e-06, "loss": 8.5337, "step": 172310 }, { "epoch": 0.8605458313565881, "grad_norm": 0.09322084486484528, "learning_rate": 4.1940974742797065e-06, "loss": 8.5342, "step": 172320 }, { "epoch": 0.8605957701815277, "grad_norm": 0.09291605651378632, "learning_rate": 4.192595559338156e-06, "loss": 8.516, "step": 172330 }, { "epoch": 0.8606457090064671, "grad_norm": 0.09476222097873688, "learning_rate": 4.191093644396606e-06, "loss": 8.5342, "step": 172340 }, { "epoch": 0.8606956478314065, "grad_norm": 0.09349306672811508, "learning_rate": 4.189591729455055e-06, "loss": 8.5237, "step": 172350 }, { "epoch": 0.8607455866563459, "grad_norm": 0.09003052115440369, "learning_rate": 4.188089814513505e-06, "loss": 8.5341, "step": 172360 }, { "epoch": 0.8607955254812855, "grad_norm": 0.08857399970293045, "learning_rate": 4.186587899571955e-06, "loss": 8.5271, "step": 172370 }, { "epoch": 0.8608454643062249, "grad_norm": 0.09072058647871017, "learning_rate": 4.185085984630403e-06, "loss": 8.5211, "step": 172380 }, { "epoch": 0.8608954031311643, "grad_norm": 0.09271258860826492, "learning_rate": 4.1835840696888535e-06, "loss": 8.5312, "step": 172390 }, { "epoch": 0.8609453419561037, "grad_norm": 0.09086389094591141, "learning_rate": 4.182082154747303e-06, "loss": 8.5093, "step": 172400 }, { "epoch": 0.8609952807810433, "grad_norm": 0.09174318611621857, "learning_rate": 4.180580239805752e-06, "loss": 8.5343, "step": 172410 }, { "epoch": 0.8610452196059827, "grad_norm": 0.1004238873720169, "learning_rate": 4.179078324864202e-06, "loss": 8.5097, "step": 172420 }, { "epoch": 0.8610951584309221, "grad_norm": 0.09396187216043472, "learning_rate": 4.177576409922651e-06, "loss": 8.5219, "step": 172430 }, { "epoch": 0.8611450972558615, "grad_norm": 0.09725307673215866, "learning_rate": 4.176074494981101e-06, "loss": 8.5057, "step": 172440 }, { "epoch": 0.8611950360808011, "grad_norm": 0.0942634865641594, "learning_rate": 4.174572580039551e-06, "loss": 8.5479, "step": 172450 }, { "epoch": 0.8612449749057405, "grad_norm": 0.09349733591079712, "learning_rate": 4.173070665098e-06, "loss": 8.5116, "step": 172460 }, { "epoch": 0.8612949137306799, "grad_norm": 0.09070730209350586, "learning_rate": 4.17156875015645e-06, "loss": 8.5297, "step": 172470 }, { "epoch": 0.8613448525556193, "grad_norm": 0.09090225398540497, "learning_rate": 4.170066835214898e-06, "loss": 8.5385, "step": 172480 }, { "epoch": 0.8613947913805589, "grad_norm": 0.08755583316087723, "learning_rate": 4.1685649202733485e-06, "loss": 8.511, "step": 172490 }, { "epoch": 0.8614447302054983, "grad_norm": 0.08842223882675171, "learning_rate": 4.167063005331799e-06, "loss": 8.53, "step": 172500 }, { "epoch": 0.8614946690304377, "grad_norm": 0.08680158108472824, "learning_rate": 4.165561090390247e-06, "loss": 8.541, "step": 172510 }, { "epoch": 0.8615446078553771, "grad_norm": 0.08641988784074783, "learning_rate": 4.164059175448697e-06, "loss": 8.5344, "step": 172520 }, { "epoch": 0.8615945466803167, "grad_norm": 0.08488810807466507, "learning_rate": 4.162557260507147e-06, "loss": 8.5253, "step": 172530 }, { "epoch": 0.8616444855052561, "grad_norm": 0.08614518493413925, "learning_rate": 4.161055345565596e-06, "loss": 8.5317, "step": 172540 }, { "epoch": 0.8616944243301955, "grad_norm": 0.09032870084047318, "learning_rate": 4.159553430624046e-06, "loss": 8.5269, "step": 172550 }, { "epoch": 0.8617443631551349, "grad_norm": 0.09359986335039139, "learning_rate": 4.158051515682495e-06, "loss": 8.5229, "step": 172560 }, { "epoch": 0.8617943019800745, "grad_norm": 0.09470424801111221, "learning_rate": 4.156549600740945e-06, "loss": 8.5238, "step": 172570 }, { "epoch": 0.8618442408050139, "grad_norm": 0.09623333811759949, "learning_rate": 4.155047685799394e-06, "loss": 8.5354, "step": 172580 }, { "epoch": 0.8618941796299533, "grad_norm": 0.08704448491334915, "learning_rate": 4.1535457708578436e-06, "loss": 8.5351, "step": 172590 }, { "epoch": 0.8619441184548927, "grad_norm": 0.09469842165708542, "learning_rate": 4.152043855916294e-06, "loss": 8.5372, "step": 172600 }, { "epoch": 0.8619940572798322, "grad_norm": 0.09458217769861221, "learning_rate": 4.150541940974742e-06, "loss": 8.543, "step": 172610 }, { "epoch": 0.8620439961047717, "grad_norm": 0.09154288470745087, "learning_rate": 4.149040026033192e-06, "loss": 8.5297, "step": 172620 }, { "epoch": 0.8620939349297111, "grad_norm": 0.09419039636850357, "learning_rate": 4.1475381110916426e-06, "loss": 8.5533, "step": 172630 }, { "epoch": 0.8621438737546505, "grad_norm": 0.09300480782985687, "learning_rate": 4.146036196150091e-06, "loss": 8.5419, "step": 172640 }, { "epoch": 0.86219381257959, "grad_norm": 0.09122762829065323, "learning_rate": 4.144534281208541e-06, "loss": 8.5167, "step": 172650 }, { "epoch": 0.8622437514045295, "grad_norm": 0.09213479608297348, "learning_rate": 4.1430323662669906e-06, "loss": 8.5241, "step": 172660 }, { "epoch": 0.8622936902294689, "grad_norm": 0.09271895885467529, "learning_rate": 4.14153045132544e-06, "loss": 8.5352, "step": 172670 }, { "epoch": 0.8623436290544083, "grad_norm": 0.08686958998441696, "learning_rate": 4.14002853638389e-06, "loss": 8.5167, "step": 172680 }, { "epoch": 0.8623935678793478, "grad_norm": 0.08590806275606155, "learning_rate": 4.1385266214423386e-06, "loss": 8.5366, "step": 172690 }, { "epoch": 0.8624435067042873, "grad_norm": 0.0904327780008316, "learning_rate": 4.137024706500789e-06, "loss": 8.5325, "step": 172700 }, { "epoch": 0.8624934455292267, "grad_norm": 0.09133438020944595, "learning_rate": 4.135522791559238e-06, "loss": 8.5142, "step": 172710 }, { "epoch": 0.8625433843541661, "grad_norm": 0.08940550684928894, "learning_rate": 4.134020876617687e-06, "loss": 8.51, "step": 172720 }, { "epoch": 0.8625933231791056, "grad_norm": 0.09443370252847672, "learning_rate": 4.132518961676138e-06, "loss": 8.5289, "step": 172730 }, { "epoch": 0.8626432620040451, "grad_norm": 0.09132427722215652, "learning_rate": 4.131017046734587e-06, "loss": 8.5351, "step": 172740 }, { "epoch": 0.8626932008289845, "grad_norm": 0.08972794562578201, "learning_rate": 4.129515131793036e-06, "loss": 8.5338, "step": 172750 }, { "epoch": 0.8627431396539239, "grad_norm": 0.09007082879543304, "learning_rate": 4.128013216851486e-06, "loss": 8.514, "step": 172760 }, { "epoch": 0.8627930784788634, "grad_norm": 0.09930828213691711, "learning_rate": 4.126511301909935e-06, "loss": 8.5355, "step": 172770 }, { "epoch": 0.8628430173038029, "grad_norm": 0.09435442090034485, "learning_rate": 4.125009386968385e-06, "loss": 8.539, "step": 172780 }, { "epoch": 0.8628929561287423, "grad_norm": 0.09018246084451675, "learning_rate": 4.123507472026834e-06, "loss": 8.5403, "step": 172790 }, { "epoch": 0.8629428949536817, "grad_norm": 0.09555966407060623, "learning_rate": 4.122005557085284e-06, "loss": 8.5138, "step": 172800 }, { "epoch": 0.8629928337786212, "grad_norm": 0.09227462112903595, "learning_rate": 4.120503642143733e-06, "loss": 8.5498, "step": 172810 }, { "epoch": 0.8630427726035607, "grad_norm": 0.08898046612739563, "learning_rate": 4.119001727202183e-06, "loss": 8.5182, "step": 172820 }, { "epoch": 0.8630927114285001, "grad_norm": 0.08735623210668564, "learning_rate": 4.117499812260633e-06, "loss": 8.5232, "step": 172830 }, { "epoch": 0.8631426502534395, "grad_norm": 0.09244896471500397, "learning_rate": 4.115997897319082e-06, "loss": 8.5405, "step": 172840 }, { "epoch": 0.863192589078379, "grad_norm": 0.08761844038963318, "learning_rate": 4.114495982377531e-06, "loss": 8.5422, "step": 172850 }, { "epoch": 0.8632425279033185, "grad_norm": 0.0887509286403656, "learning_rate": 4.112994067435981e-06, "loss": 8.5045, "step": 172860 }, { "epoch": 0.8632924667282579, "grad_norm": 0.09360551834106445, "learning_rate": 4.111492152494431e-06, "loss": 8.5411, "step": 172870 }, { "epoch": 0.8633424055531973, "grad_norm": 0.0890243873000145, "learning_rate": 4.10999023755288e-06, "loss": 8.5195, "step": 172880 }, { "epoch": 0.8633923443781368, "grad_norm": 0.09118281304836273, "learning_rate": 4.1084883226113294e-06, "loss": 8.5162, "step": 172890 }, { "epoch": 0.8634422832030763, "grad_norm": 0.08943572640419006, "learning_rate": 4.10698640766978e-06, "loss": 8.5457, "step": 172900 }, { "epoch": 0.8634922220280157, "grad_norm": 0.09161094576120377, "learning_rate": 4.105484492728228e-06, "loss": 8.5199, "step": 172910 }, { "epoch": 0.8635421608529551, "grad_norm": 0.09737364202737808, "learning_rate": 4.103982577786678e-06, "loss": 8.528, "step": 172920 }, { "epoch": 0.8635920996778946, "grad_norm": 0.08938228338956833, "learning_rate": 4.102480662845128e-06, "loss": 8.5449, "step": 172930 }, { "epoch": 0.863642038502834, "grad_norm": 0.09068731218576431, "learning_rate": 4.100978747903577e-06, "loss": 8.5252, "step": 172940 }, { "epoch": 0.8636919773277735, "grad_norm": 0.09687364101409912, "learning_rate": 4.099476832962027e-06, "loss": 8.5316, "step": 172950 }, { "epoch": 0.8637419161527129, "grad_norm": 0.09029592573642731, "learning_rate": 4.097974918020476e-06, "loss": 8.5268, "step": 172960 }, { "epoch": 0.8637918549776524, "grad_norm": 0.0884825587272644, "learning_rate": 4.096473003078926e-06, "loss": 8.5217, "step": 172970 }, { "epoch": 0.8638417938025919, "grad_norm": 0.09405983984470367, "learning_rate": 4.094971088137376e-06, "loss": 8.5221, "step": 172980 }, { "epoch": 0.8638917326275313, "grad_norm": 0.0932493582367897, "learning_rate": 4.0934691731958244e-06, "loss": 8.5124, "step": 172990 }, { "epoch": 0.8639416714524707, "grad_norm": 0.09520955383777618, "learning_rate": 4.091967258254275e-06, "loss": 8.5114, "step": 173000 }, { "epoch": 0.8639916102774102, "grad_norm": 0.08801741153001785, "learning_rate": 4.090465343312723e-06, "loss": 8.5435, "step": 173010 }, { "epoch": 0.8640415491023496, "grad_norm": 0.09374932199716568, "learning_rate": 4.088963428371173e-06, "loss": 8.513, "step": 173020 }, { "epoch": 0.8640914879272891, "grad_norm": 0.09420095384120941, "learning_rate": 4.0874615134296235e-06, "loss": 8.5286, "step": 173030 }, { "epoch": 0.8641414267522285, "grad_norm": 0.09835788607597351, "learning_rate": 4.085959598488072e-06, "loss": 8.514, "step": 173040 }, { "epoch": 0.864191365577168, "grad_norm": 0.0848371684551239, "learning_rate": 4.084457683546522e-06, "loss": 8.5214, "step": 173050 }, { "epoch": 0.8642413044021074, "grad_norm": 0.09027153998613358, "learning_rate": 4.0829557686049714e-06, "loss": 8.5318, "step": 173060 }, { "epoch": 0.8642912432270469, "grad_norm": 0.09047489613294601, "learning_rate": 4.081453853663421e-06, "loss": 8.538, "step": 173070 }, { "epoch": 0.8643411820519863, "grad_norm": 0.09193349629640579, "learning_rate": 4.079951938721871e-06, "loss": 8.517, "step": 173080 }, { "epoch": 0.8643911208769257, "grad_norm": 0.09359715133905411, "learning_rate": 4.0784500237803194e-06, "loss": 8.5216, "step": 173090 }, { "epoch": 0.8644410597018652, "grad_norm": 0.09064842015504837, "learning_rate": 4.07694810883877e-06, "loss": 8.5191, "step": 173100 }, { "epoch": 0.8644909985268047, "grad_norm": 0.09371822327375412, "learning_rate": 4.075446193897219e-06, "loss": 8.5121, "step": 173110 }, { "epoch": 0.8645409373517441, "grad_norm": 0.09190354496240616, "learning_rate": 4.073944278955668e-06, "loss": 8.4994, "step": 173120 }, { "epoch": 0.8645908761766835, "grad_norm": 0.09204579889774323, "learning_rate": 4.0724423640141185e-06, "loss": 8.5339, "step": 173130 }, { "epoch": 0.864640815001623, "grad_norm": 0.09318622201681137, "learning_rate": 4.070940449072568e-06, "loss": 8.5116, "step": 173140 }, { "epoch": 0.8646907538265625, "grad_norm": 0.09237584471702576, "learning_rate": 4.069438534131017e-06, "loss": 8.5358, "step": 173150 }, { "epoch": 0.8647406926515019, "grad_norm": 0.09040313959121704, "learning_rate": 4.0679366191894665e-06, "loss": 8.5232, "step": 173160 }, { "epoch": 0.8647906314764413, "grad_norm": 0.09376216679811478, "learning_rate": 4.066434704247916e-06, "loss": 8.5028, "step": 173170 }, { "epoch": 0.8648405703013808, "grad_norm": 0.08734447509050369, "learning_rate": 4.064932789306366e-06, "loss": 8.5056, "step": 173180 }, { "epoch": 0.8648905091263203, "grad_norm": 0.0926559641957283, "learning_rate": 4.063430874364815e-06, "loss": 8.5099, "step": 173190 }, { "epoch": 0.8649404479512597, "grad_norm": 0.08731448650360107, "learning_rate": 4.061928959423265e-06, "loss": 8.5165, "step": 173200 }, { "epoch": 0.8649903867761991, "grad_norm": 0.08865402638912201, "learning_rate": 4.060427044481714e-06, "loss": 8.5291, "step": 173210 }, { "epoch": 0.8650403256011386, "grad_norm": 0.08650528639554977, "learning_rate": 4.058925129540164e-06, "loss": 8.5137, "step": 173220 }, { "epoch": 0.8650902644260781, "grad_norm": 0.08930091559886932, "learning_rate": 4.0574232145986135e-06, "loss": 8.5208, "step": 173230 }, { "epoch": 0.8651402032510175, "grad_norm": 0.09235706925392151, "learning_rate": 4.055921299657063e-06, "loss": 8.5156, "step": 173240 }, { "epoch": 0.8651901420759569, "grad_norm": 0.09423262625932693, "learning_rate": 4.054419384715512e-06, "loss": 8.5162, "step": 173250 }, { "epoch": 0.8652400809008964, "grad_norm": 0.0886596292257309, "learning_rate": 4.0529174697739615e-06, "loss": 8.5152, "step": 173260 }, { "epoch": 0.8652900197258359, "grad_norm": 0.08678395301103592, "learning_rate": 4.051415554832412e-06, "loss": 8.53, "step": 173270 }, { "epoch": 0.8653399585507753, "grad_norm": 0.08788527548313141, "learning_rate": 4.049913639890861e-06, "loss": 8.5281, "step": 173280 }, { "epoch": 0.8653898973757147, "grad_norm": 0.08688144385814667, "learning_rate": 4.04841172494931e-06, "loss": 8.5394, "step": 173290 }, { "epoch": 0.8654398362006542, "grad_norm": 0.09183796495199203, "learning_rate": 4.0469098100077605e-06, "loss": 8.5355, "step": 173300 }, { "epoch": 0.8654897750255937, "grad_norm": 0.09272154420614243, "learning_rate": 4.045407895066209e-06, "loss": 8.5449, "step": 173310 }, { "epoch": 0.8655397138505331, "grad_norm": 0.0878337025642395, "learning_rate": 4.043905980124659e-06, "loss": 8.543, "step": 173320 }, { "epoch": 0.8655896526754725, "grad_norm": 0.0915573239326477, "learning_rate": 4.0424040651831085e-06, "loss": 8.5153, "step": 173330 }, { "epoch": 0.865639591500412, "grad_norm": 0.09731071442365646, "learning_rate": 4.040902150241558e-06, "loss": 8.5377, "step": 173340 }, { "epoch": 0.8656895303253515, "grad_norm": 0.09491939097642899, "learning_rate": 4.039400235300008e-06, "loss": 8.5258, "step": 173350 }, { "epoch": 0.8657394691502909, "grad_norm": 0.09417067468166351, "learning_rate": 4.0378983203584565e-06, "loss": 8.5305, "step": 173360 }, { "epoch": 0.8657894079752303, "grad_norm": 0.09818973392248154, "learning_rate": 4.036396405416907e-06, "loss": 8.5226, "step": 173370 }, { "epoch": 0.8658393468001698, "grad_norm": 0.10228383541107178, "learning_rate": 4.034894490475357e-06, "loss": 8.5162, "step": 173380 }, { "epoch": 0.8658892856251093, "grad_norm": 0.09025364369153976, "learning_rate": 4.033392575533805e-06, "loss": 8.5282, "step": 173390 }, { "epoch": 0.8659392244500487, "grad_norm": 0.09199435263872147, "learning_rate": 4.0318906605922555e-06, "loss": 8.5222, "step": 173400 }, { "epoch": 0.8659891632749881, "grad_norm": 0.08623290807008743, "learning_rate": 4.030388745650704e-06, "loss": 8.5133, "step": 173410 }, { "epoch": 0.8660391020999276, "grad_norm": 0.09399810433387756, "learning_rate": 4.028886830709154e-06, "loss": 8.5254, "step": 173420 }, { "epoch": 0.866089040924867, "grad_norm": 0.08971837162971497, "learning_rate": 4.027384915767604e-06, "loss": 8.5333, "step": 173430 }, { "epoch": 0.8661389797498065, "grad_norm": 0.08642110228538513, "learning_rate": 4.025883000826053e-06, "loss": 8.5237, "step": 173440 }, { "epoch": 0.8661889185747459, "grad_norm": 0.09314007312059402, "learning_rate": 4.024381085884503e-06, "loss": 8.5056, "step": 173450 }, { "epoch": 0.8662388573996854, "grad_norm": 0.09386899322271347, "learning_rate": 4.022879170942952e-06, "loss": 8.524, "step": 173460 }, { "epoch": 0.8662887962246248, "grad_norm": 0.09295320510864258, "learning_rate": 4.021377256001402e-06, "loss": 8.5266, "step": 173470 }, { "epoch": 0.8663387350495643, "grad_norm": 0.09699882566928864, "learning_rate": 4.019875341059852e-06, "loss": 8.5314, "step": 173480 }, { "epoch": 0.8663886738745037, "grad_norm": 0.08937513083219528, "learning_rate": 4.0183734261183e-06, "loss": 8.5286, "step": 173490 }, { "epoch": 0.8664386126994432, "grad_norm": 0.0886300727725029, "learning_rate": 4.0168715111767505e-06, "loss": 8.519, "step": 173500 }, { "epoch": 0.8664885515243826, "grad_norm": 0.08783800154924393, "learning_rate": 4.0153695962352e-06, "loss": 8.5191, "step": 173510 }, { "epoch": 0.8665384903493221, "grad_norm": 0.09207980334758759, "learning_rate": 4.013867681293649e-06, "loss": 8.5026, "step": 173520 }, { "epoch": 0.8665884291742615, "grad_norm": 0.09273963421583176, "learning_rate": 4.012365766352099e-06, "loss": 8.5182, "step": 173530 }, { "epoch": 0.866638367999201, "grad_norm": 0.08379366248846054, "learning_rate": 4.010863851410549e-06, "loss": 8.5278, "step": 173540 }, { "epoch": 0.8666883068241404, "grad_norm": 0.08762573450803757, "learning_rate": 4.009361936468998e-06, "loss": 8.5255, "step": 173550 }, { "epoch": 0.8667382456490799, "grad_norm": 0.088675856590271, "learning_rate": 4.007860021527447e-06, "loss": 8.5256, "step": 173560 }, { "epoch": 0.8667881844740193, "grad_norm": 0.08745112270116806, "learning_rate": 4.006358106585897e-06, "loss": 8.5365, "step": 173570 }, { "epoch": 0.8668381232989588, "grad_norm": 0.08667652308940887, "learning_rate": 4.004856191644347e-06, "loss": 8.5282, "step": 173580 }, { "epoch": 0.8668880621238982, "grad_norm": 0.09410617500543594, "learning_rate": 4.003354276702796e-06, "loss": 8.5143, "step": 173590 }, { "epoch": 0.8669380009488377, "grad_norm": 0.0875682532787323, "learning_rate": 4.0018523617612455e-06, "loss": 8.5359, "step": 173600 }, { "epoch": 0.8669879397737771, "grad_norm": 0.08985573053359985, "learning_rate": 4.000350446819695e-06, "loss": 8.5462, "step": 173610 }, { "epoch": 0.8670378785987166, "grad_norm": 0.09056945890188217, "learning_rate": 3.998848531878145e-06, "loss": 8.555, "step": 173620 }, { "epoch": 0.867087817423656, "grad_norm": 0.09406935423612595, "learning_rate": 3.997346616936594e-06, "loss": 8.5018, "step": 173630 }, { "epoch": 0.8671377562485955, "grad_norm": 0.08972731977701187, "learning_rate": 3.995844701995044e-06, "loss": 8.5286, "step": 173640 }, { "epoch": 0.8671876950735349, "grad_norm": 0.09432131797075272, "learning_rate": 3.994342787053493e-06, "loss": 8.527, "step": 173650 }, { "epoch": 0.8672376338984744, "grad_norm": 0.09251857548952103, "learning_rate": 3.992840872111943e-06, "loss": 8.5157, "step": 173660 }, { "epoch": 0.8672875727234138, "grad_norm": 0.0903116911649704, "learning_rate": 3.9913389571703925e-06, "loss": 8.5218, "step": 173670 }, { "epoch": 0.8673375115483533, "grad_norm": 0.09307548403739929, "learning_rate": 3.989837042228842e-06, "loss": 8.5296, "step": 173680 }, { "epoch": 0.8673874503732927, "grad_norm": 0.08810535818338394, "learning_rate": 3.988335127287291e-06, "loss": 8.5257, "step": 173690 }, { "epoch": 0.8674373891982322, "grad_norm": 0.09042003750801086, "learning_rate": 3.986833212345741e-06, "loss": 8.5462, "step": 173700 }, { "epoch": 0.8674873280231716, "grad_norm": 0.09091649949550629, "learning_rate": 3.985331297404191e-06, "loss": 8.5249, "step": 173710 }, { "epoch": 0.867537266848111, "grad_norm": 0.09188742935657501, "learning_rate": 3.98382938246264e-06, "loss": 8.5144, "step": 173720 }, { "epoch": 0.8675872056730505, "grad_norm": 0.09003132581710815, "learning_rate": 3.982327467521089e-06, "loss": 8.5341, "step": 173730 }, { "epoch": 0.86763714449799, "grad_norm": 0.09096615016460419, "learning_rate": 3.980825552579539e-06, "loss": 8.532, "step": 173740 }, { "epoch": 0.8676870833229294, "grad_norm": 0.09003739058971405, "learning_rate": 3.979323637637989e-06, "loss": 8.5205, "step": 173750 }, { "epoch": 0.8677370221478689, "grad_norm": 0.09441889822483063, "learning_rate": 3.977821722696438e-06, "loss": 8.5302, "step": 173760 }, { "epoch": 0.8677869609728083, "grad_norm": 0.09117145091295242, "learning_rate": 3.9763198077548875e-06, "loss": 8.5129, "step": 173770 }, { "epoch": 0.8678368997977478, "grad_norm": 0.0931628867983818, "learning_rate": 3.974817892813338e-06, "loss": 8.5337, "step": 173780 }, { "epoch": 0.8678868386226872, "grad_norm": 0.09708821773529053, "learning_rate": 3.973315977871786e-06, "loss": 8.5166, "step": 173790 }, { "epoch": 0.8679367774476267, "grad_norm": 0.08967326581478119, "learning_rate": 3.971814062930236e-06, "loss": 8.5294, "step": 173800 }, { "epoch": 0.8679867162725661, "grad_norm": 0.09614343196153641, "learning_rate": 3.970312147988686e-06, "loss": 8.5138, "step": 173810 }, { "epoch": 0.8680366550975056, "grad_norm": 0.09332430362701416, "learning_rate": 3.968810233047135e-06, "loss": 8.5287, "step": 173820 }, { "epoch": 0.868086593922445, "grad_norm": 0.09939328581094742, "learning_rate": 3.967308318105585e-06, "loss": 8.5366, "step": 173830 }, { "epoch": 0.8681365327473844, "grad_norm": 0.09348172694444656, "learning_rate": 3.965806403164034e-06, "loss": 8.524, "step": 173840 }, { "epoch": 0.8681864715723239, "grad_norm": 0.09327511489391327, "learning_rate": 3.964304488222484e-06, "loss": 8.5344, "step": 173850 }, { "epoch": 0.8682364103972634, "grad_norm": 0.089813232421875, "learning_rate": 3.962802573280934e-06, "loss": 8.5293, "step": 173860 }, { "epoch": 0.8682863492222028, "grad_norm": 0.09029410034418106, "learning_rate": 3.9613006583393825e-06, "loss": 8.5281, "step": 173870 }, { "epoch": 0.8683362880471422, "grad_norm": 0.09171853214502335, "learning_rate": 3.959798743397833e-06, "loss": 8.5053, "step": 173880 }, { "epoch": 0.8683862268720817, "grad_norm": 0.08777931332588196, "learning_rate": 3.958296828456281e-06, "loss": 8.51, "step": 173890 }, { "epoch": 0.8684361656970212, "grad_norm": 0.09193766117095947, "learning_rate": 3.956794913514731e-06, "loss": 8.5269, "step": 173900 }, { "epoch": 0.8684861045219606, "grad_norm": 0.08400430530309677, "learning_rate": 3.9552929985731815e-06, "loss": 8.5356, "step": 173910 }, { "epoch": 0.8685360433469, "grad_norm": 0.09466172009706497, "learning_rate": 3.95379108363163e-06, "loss": 8.5184, "step": 173920 }, { "epoch": 0.8685859821718395, "grad_norm": 0.08804745227098465, "learning_rate": 3.95228916869008e-06, "loss": 8.5084, "step": 173930 }, { "epoch": 0.868635920996779, "grad_norm": 0.08844754099845886, "learning_rate": 3.9507872537485295e-06, "loss": 8.5206, "step": 173940 }, { "epoch": 0.8686858598217184, "grad_norm": 0.08931338787078857, "learning_rate": 3.949285338806979e-06, "loss": 8.5227, "step": 173950 }, { "epoch": 0.8687357986466578, "grad_norm": 0.092722587287426, "learning_rate": 3.947783423865429e-06, "loss": 8.5357, "step": 173960 }, { "epoch": 0.8687857374715973, "grad_norm": 0.08423691242933273, "learning_rate": 3.9462815089238775e-06, "loss": 8.5368, "step": 173970 }, { "epoch": 0.8688356762965368, "grad_norm": 0.08981618285179138, "learning_rate": 3.944779593982328e-06, "loss": 8.5206, "step": 173980 }, { "epoch": 0.8688856151214762, "grad_norm": 0.0881986990571022, "learning_rate": 3.943277679040777e-06, "loss": 8.5184, "step": 173990 }, { "epoch": 0.8689355539464156, "grad_norm": 0.09177501499652863, "learning_rate": 3.941775764099226e-06, "loss": 8.5301, "step": 174000 }, { "epoch": 0.8689854927713551, "grad_norm": 0.0890003889799118, "learning_rate": 3.9402738491576766e-06, "loss": 8.5185, "step": 174010 }, { "epoch": 0.8690354315962946, "grad_norm": 0.08756007254123688, "learning_rate": 3.938771934216126e-06, "loss": 8.5242, "step": 174020 }, { "epoch": 0.869085370421234, "grad_norm": 0.09130045026540756, "learning_rate": 3.937270019274575e-06, "loss": 8.5241, "step": 174030 }, { "epoch": 0.8691353092461734, "grad_norm": 0.09635766595602036, "learning_rate": 3.9357681043330246e-06, "loss": 8.5303, "step": 174040 }, { "epoch": 0.8691852480711129, "grad_norm": 0.09500184655189514, "learning_rate": 3.934266189391474e-06, "loss": 8.5289, "step": 174050 }, { "epoch": 0.8692351868960523, "grad_norm": 0.08810250461101532, "learning_rate": 3.932764274449924e-06, "loss": 8.5216, "step": 174060 }, { "epoch": 0.8692851257209918, "grad_norm": 0.08967293798923492, "learning_rate": 3.931262359508373e-06, "loss": 8.5238, "step": 174070 }, { "epoch": 0.8693350645459312, "grad_norm": 0.0923919528722763, "learning_rate": 3.929760444566823e-06, "loss": 8.5099, "step": 174080 }, { "epoch": 0.8693850033708707, "grad_norm": 0.09465716779232025, "learning_rate": 3.928258529625272e-06, "loss": 8.5313, "step": 174090 }, { "epoch": 0.8694349421958101, "grad_norm": 0.08958126604557037, "learning_rate": 3.926756614683722e-06, "loss": 8.5358, "step": 174100 }, { "epoch": 0.8694848810207496, "grad_norm": 0.0895143523812294, "learning_rate": 3.9252546997421716e-06, "loss": 8.5239, "step": 174110 }, { "epoch": 0.869534819845689, "grad_norm": 0.0893610343337059, "learning_rate": 3.923752784800621e-06, "loss": 8.5262, "step": 174120 }, { "epoch": 0.8695847586706285, "grad_norm": 0.0977412685751915, "learning_rate": 3.92225086985907e-06, "loss": 8.5102, "step": 174130 }, { "epoch": 0.8696346974955679, "grad_norm": 0.09177926927804947, "learning_rate": 3.9207489549175196e-06, "loss": 8.5045, "step": 174140 }, { "epoch": 0.8696846363205074, "grad_norm": 0.08462709933519363, "learning_rate": 3.91924703997597e-06, "loss": 8.5163, "step": 174150 }, { "epoch": 0.8697345751454468, "grad_norm": 0.09177839756011963, "learning_rate": 3.917745125034419e-06, "loss": 8.5165, "step": 174160 }, { "epoch": 0.8697845139703863, "grad_norm": 0.08942088484764099, "learning_rate": 3.916243210092868e-06, "loss": 8.542, "step": 174170 }, { "epoch": 0.8698344527953257, "grad_norm": 0.0959954783320427, "learning_rate": 3.914741295151319e-06, "loss": 8.5206, "step": 174180 }, { "epoch": 0.8698843916202652, "grad_norm": 0.09030862152576447, "learning_rate": 3.913239380209767e-06, "loss": 8.5127, "step": 174190 }, { "epoch": 0.8699343304452046, "grad_norm": 0.08896990120410919, "learning_rate": 3.911737465268217e-06, "loss": 8.5275, "step": 174200 }, { "epoch": 0.869984269270144, "grad_norm": 0.08994099497795105, "learning_rate": 3.9102355503266666e-06, "loss": 8.5147, "step": 174210 }, { "epoch": 0.8700342080950835, "grad_norm": 0.0879681408405304, "learning_rate": 3.908733635385116e-06, "loss": 8.5341, "step": 174220 }, { "epoch": 0.870084146920023, "grad_norm": 0.0852343812584877, "learning_rate": 3.907231720443566e-06, "loss": 8.5343, "step": 174230 }, { "epoch": 0.8701340857449624, "grad_norm": 0.08718617260456085, "learning_rate": 3.9057298055020146e-06, "loss": 8.5334, "step": 174240 }, { "epoch": 0.8701840245699018, "grad_norm": 0.09171243757009506, "learning_rate": 3.904227890560465e-06, "loss": 8.5341, "step": 174250 }, { "epoch": 0.8702339633948413, "grad_norm": 0.08820389956235886, "learning_rate": 3.902725975618915e-06, "loss": 8.5417, "step": 174260 }, { "epoch": 0.8702839022197808, "grad_norm": 0.08782992511987686, "learning_rate": 3.901224060677363e-06, "loss": 8.5333, "step": 174270 }, { "epoch": 0.8703338410447202, "grad_norm": 0.08638270199298859, "learning_rate": 3.899722145735814e-06, "loss": 8.5396, "step": 174280 }, { "epoch": 0.8703837798696596, "grad_norm": 0.09227801114320755, "learning_rate": 3.898220230794262e-06, "loss": 8.5034, "step": 174290 }, { "epoch": 0.8704337186945991, "grad_norm": 0.09477287530899048, "learning_rate": 3.896718315852712e-06, "loss": 8.5067, "step": 174300 }, { "epoch": 0.8704836575195386, "grad_norm": 0.09321620315313339, "learning_rate": 3.8952164009111624e-06, "loss": 8.5343, "step": 174310 }, { "epoch": 0.870533596344478, "grad_norm": 0.08606991916894913, "learning_rate": 3.893714485969611e-06, "loss": 8.531, "step": 174320 }, { "epoch": 0.8705835351694174, "grad_norm": 0.08892033994197845, "learning_rate": 3.892212571028061e-06, "loss": 8.531, "step": 174330 }, { "epoch": 0.8706334739943569, "grad_norm": 0.08995813876390457, "learning_rate": 3.89071065608651e-06, "loss": 8.512, "step": 174340 }, { "epoch": 0.8706834128192964, "grad_norm": 0.08822611719369888, "learning_rate": 3.88920874114496e-06, "loss": 8.527, "step": 174350 }, { "epoch": 0.8707333516442358, "grad_norm": 0.09670012444257736, "learning_rate": 3.88770682620341e-06, "loss": 8.5085, "step": 174360 }, { "epoch": 0.8707832904691752, "grad_norm": 0.0893353670835495, "learning_rate": 3.886204911261858e-06, "loss": 8.5151, "step": 174370 }, { "epoch": 0.8708332292941147, "grad_norm": 0.0889049619436264, "learning_rate": 3.884702996320309e-06, "loss": 8.4997, "step": 174380 }, { "epoch": 0.8708831681190542, "grad_norm": 0.08892576396465302, "learning_rate": 3.883201081378758e-06, "loss": 8.5215, "step": 174390 }, { "epoch": 0.8709331069439936, "grad_norm": 0.09408024698495865, "learning_rate": 3.881699166437207e-06, "loss": 8.5249, "step": 174400 }, { "epoch": 0.870983045768933, "grad_norm": 0.09239810705184937, "learning_rate": 3.8801972514956574e-06, "loss": 8.5204, "step": 174410 }, { "epoch": 0.8710329845938725, "grad_norm": 0.08711476624011993, "learning_rate": 3.878695336554107e-06, "loss": 8.5171, "step": 174420 }, { "epoch": 0.871082923418812, "grad_norm": 0.08488985896110535, "learning_rate": 3.877193421612556e-06, "loss": 8.5197, "step": 174430 }, { "epoch": 0.8711328622437514, "grad_norm": 0.0881032943725586, "learning_rate": 3.8756915066710054e-06, "loss": 8.5149, "step": 174440 }, { "epoch": 0.8711828010686908, "grad_norm": 0.0867091566324234, "learning_rate": 3.874189591729455e-06, "loss": 8.5106, "step": 174450 }, { "epoch": 0.8712327398936303, "grad_norm": 0.09190431237220764, "learning_rate": 3.872687676787905e-06, "loss": 8.524, "step": 174460 }, { "epoch": 0.8712826787185698, "grad_norm": 0.08907434344291687, "learning_rate": 3.871185761846354e-06, "loss": 8.5324, "step": 174470 }, { "epoch": 0.8713326175435092, "grad_norm": 0.09729236364364624, "learning_rate": 3.869683846904804e-06, "loss": 8.5182, "step": 174480 }, { "epoch": 0.8713825563684486, "grad_norm": 0.09291893243789673, "learning_rate": 3.868181931963253e-06, "loss": 8.5277, "step": 174490 }, { "epoch": 0.8714324951933881, "grad_norm": 0.08960501104593277, "learning_rate": 3.866680017021703e-06, "loss": 8.5335, "step": 174500 }, { "epoch": 0.8714824340183276, "grad_norm": 0.09359270334243774, "learning_rate": 3.8651781020801524e-06, "loss": 8.5333, "step": 174510 }, { "epoch": 0.871532372843267, "grad_norm": 0.09425162523984909, "learning_rate": 3.863676187138602e-06, "loss": 8.5144, "step": 174520 }, { "epoch": 0.8715823116682064, "grad_norm": 0.09045587480068207, "learning_rate": 3.862174272197051e-06, "loss": 8.5378, "step": 174530 }, { "epoch": 0.8716322504931459, "grad_norm": 0.0939302071928978, "learning_rate": 3.8606723572555004e-06, "loss": 8.5233, "step": 174540 }, { "epoch": 0.8716821893180854, "grad_norm": 0.08632202446460724, "learning_rate": 3.859170442313951e-06, "loss": 8.534, "step": 174550 }, { "epoch": 0.8717321281430248, "grad_norm": 0.08903735131025314, "learning_rate": 3.8576685273724e-06, "loss": 8.537, "step": 174560 }, { "epoch": 0.8717820669679642, "grad_norm": 0.08908429741859436, "learning_rate": 3.856166612430849e-06, "loss": 8.5324, "step": 174570 }, { "epoch": 0.8718320057929037, "grad_norm": 0.08882272243499756, "learning_rate": 3.8546646974892995e-06, "loss": 8.5367, "step": 174580 }, { "epoch": 0.8718819446178432, "grad_norm": 0.08850658684968948, "learning_rate": 3.853162782547748e-06, "loss": 8.5214, "step": 174590 }, { "epoch": 0.8719318834427826, "grad_norm": 0.08655815571546555, "learning_rate": 3.851660867606198e-06, "loss": 8.5412, "step": 174600 }, { "epoch": 0.871981822267722, "grad_norm": 0.08702993392944336, "learning_rate": 3.8501589526646474e-06, "loss": 8.5364, "step": 174610 }, { "epoch": 0.8720317610926615, "grad_norm": 0.08738213032484055, "learning_rate": 3.848657037723097e-06, "loss": 8.517, "step": 174620 }, { "epoch": 0.872081699917601, "grad_norm": 0.09153701364994049, "learning_rate": 3.847155122781547e-06, "loss": 8.504, "step": 174630 }, { "epoch": 0.8721316387425404, "grad_norm": 0.09358830749988556, "learning_rate": 3.8456532078399954e-06, "loss": 8.5159, "step": 174640 }, { "epoch": 0.8721815775674798, "grad_norm": 0.08956989645957947, "learning_rate": 3.844151292898446e-06, "loss": 8.5174, "step": 174650 }, { "epoch": 0.8722315163924192, "grad_norm": 0.09366779029369354, "learning_rate": 3.842649377956896e-06, "loss": 8.5287, "step": 174660 }, { "epoch": 0.8722814552173588, "grad_norm": 0.09194490313529968, "learning_rate": 3.841147463015344e-06, "loss": 8.5269, "step": 174670 }, { "epoch": 0.8723313940422982, "grad_norm": 0.08473306894302368, "learning_rate": 3.8396455480737945e-06, "loss": 8.5332, "step": 174680 }, { "epoch": 0.8723813328672376, "grad_norm": 0.09393809735774994, "learning_rate": 3.838143633132243e-06, "loss": 8.5116, "step": 174690 }, { "epoch": 0.872431271692177, "grad_norm": 0.08729804307222366, "learning_rate": 3.836641718190693e-06, "loss": 8.5261, "step": 174700 }, { "epoch": 0.8724812105171166, "grad_norm": 0.08915986865758896, "learning_rate": 3.835139803249143e-06, "loss": 8.5319, "step": 174710 }, { "epoch": 0.872531149342056, "grad_norm": 0.08928072452545166, "learning_rate": 3.833637888307592e-06, "loss": 8.5346, "step": 174720 }, { "epoch": 0.8725810881669954, "grad_norm": 0.09394170343875885, "learning_rate": 3.832135973366042e-06, "loss": 8.5138, "step": 174730 }, { "epoch": 0.8726310269919348, "grad_norm": 0.09060081094503403, "learning_rate": 3.830634058424491e-06, "loss": 8.5273, "step": 174740 }, { "epoch": 0.8726809658168744, "grad_norm": 0.08914122730493546, "learning_rate": 3.829132143482941e-06, "loss": 8.5229, "step": 174750 }, { "epoch": 0.8727309046418138, "grad_norm": 0.09032674878835678, "learning_rate": 3.827630228541391e-06, "loss": 8.5249, "step": 174760 }, { "epoch": 0.8727808434667532, "grad_norm": 0.09034202992916107, "learning_rate": 3.826128313599839e-06, "loss": 8.5362, "step": 174770 }, { "epoch": 0.8728307822916926, "grad_norm": 0.08692754805088043, "learning_rate": 3.8246263986582895e-06, "loss": 8.5089, "step": 174780 }, { "epoch": 0.8728807211166322, "grad_norm": 0.09234897792339325, "learning_rate": 3.82312448371674e-06, "loss": 8.5188, "step": 174790 }, { "epoch": 0.8729306599415716, "grad_norm": 0.09195923805236816, "learning_rate": 3.821622568775188e-06, "loss": 8.5322, "step": 174800 }, { "epoch": 0.872980598766511, "grad_norm": 0.0930572971701622, "learning_rate": 3.820120653833638e-06, "loss": 8.538, "step": 174810 }, { "epoch": 0.8730305375914504, "grad_norm": 0.086077980697155, "learning_rate": 3.818618738892088e-06, "loss": 8.5311, "step": 174820 }, { "epoch": 0.87308047641639, "grad_norm": 0.09503497928380966, "learning_rate": 3.817116823950537e-06, "loss": 8.5202, "step": 174830 }, { "epoch": 0.8731304152413294, "grad_norm": 0.09077981114387512, "learning_rate": 3.815614909008987e-06, "loss": 8.5122, "step": 174840 }, { "epoch": 0.8731803540662688, "grad_norm": 0.08733651787042618, "learning_rate": 3.8141129940674356e-06, "loss": 8.5313, "step": 174850 }, { "epoch": 0.8732302928912082, "grad_norm": 0.08977235853672028, "learning_rate": 3.8126110791258854e-06, "loss": 8.5195, "step": 174860 }, { "epoch": 0.8732802317161478, "grad_norm": 0.09327967464923859, "learning_rate": 3.8111091641843356e-06, "loss": 8.5095, "step": 174870 }, { "epoch": 0.8733301705410872, "grad_norm": 0.08643508702516556, "learning_rate": 3.8096072492427845e-06, "loss": 8.5356, "step": 174880 }, { "epoch": 0.8733801093660266, "grad_norm": 0.09138485044240952, "learning_rate": 3.8081053343012342e-06, "loss": 8.519, "step": 174890 }, { "epoch": 0.873430048190966, "grad_norm": 0.09083333611488342, "learning_rate": 3.806603419359684e-06, "loss": 8.5285, "step": 174900 }, { "epoch": 0.8734799870159056, "grad_norm": 0.09481485933065414, "learning_rate": 3.805101504418133e-06, "loss": 8.5086, "step": 174910 }, { "epoch": 0.873529925840845, "grad_norm": 0.0904790461063385, "learning_rate": 3.803599589476583e-06, "loss": 8.5069, "step": 174920 }, { "epoch": 0.8735798646657844, "grad_norm": 0.08950541913509369, "learning_rate": 3.802097674535032e-06, "loss": 8.5393, "step": 174930 }, { "epoch": 0.8736298034907238, "grad_norm": 0.09654703736305237, "learning_rate": 3.8005957595934817e-06, "loss": 8.5304, "step": 174940 }, { "epoch": 0.8736797423156634, "grad_norm": 0.09147506207227707, "learning_rate": 3.7990938446519315e-06, "loss": 8.5209, "step": 174950 }, { "epoch": 0.8737296811406028, "grad_norm": 0.09071029722690582, "learning_rate": 3.7975919297103804e-06, "loss": 8.5118, "step": 174960 }, { "epoch": 0.8737796199655422, "grad_norm": 0.0901302620768547, "learning_rate": 3.7960900147688306e-06, "loss": 8.5154, "step": 174970 }, { "epoch": 0.8738295587904816, "grad_norm": 0.0890451967716217, "learning_rate": 3.7945880998272803e-06, "loss": 8.5242, "step": 174980 }, { "epoch": 0.8738794976154212, "grad_norm": 0.09417781978845596, "learning_rate": 3.7930861848857292e-06, "loss": 8.5159, "step": 174990 }, { "epoch": 0.8739294364403606, "grad_norm": 0.09130019694566727, "learning_rate": 3.791584269944179e-06, "loss": 8.4936, "step": 175000 }, { "epoch": 0.8739793752653, "grad_norm": 0.09282857924699783, "learning_rate": 3.7900823550026283e-06, "loss": 8.5298, "step": 175010 }, { "epoch": 0.8740293140902394, "grad_norm": 0.09820152819156647, "learning_rate": 3.788580440061078e-06, "loss": 8.5198, "step": 175020 }, { "epoch": 0.8740792529151789, "grad_norm": 0.0940333679318428, "learning_rate": 3.787078525119528e-06, "loss": 8.5207, "step": 175030 }, { "epoch": 0.8741291917401184, "grad_norm": 0.09212444722652435, "learning_rate": 3.7855766101779767e-06, "loss": 8.5327, "step": 175040 }, { "epoch": 0.8741791305650578, "grad_norm": 0.09330432116985321, "learning_rate": 3.7840746952364265e-06, "loss": 8.5186, "step": 175050 }, { "epoch": 0.8742290693899972, "grad_norm": 0.09266900271177292, "learning_rate": 3.7825727802948767e-06, "loss": 8.5161, "step": 175060 }, { "epoch": 0.8742790082149366, "grad_norm": 0.09173017740249634, "learning_rate": 3.7810708653533256e-06, "loss": 8.5348, "step": 175070 }, { "epoch": 0.8743289470398762, "grad_norm": 0.09216035157442093, "learning_rate": 3.7795689504117753e-06, "loss": 8.5296, "step": 175080 }, { "epoch": 0.8743788858648156, "grad_norm": 0.08925978094339371, "learning_rate": 3.7780670354702242e-06, "loss": 8.5101, "step": 175090 }, { "epoch": 0.874428824689755, "grad_norm": 0.09558253735303879, "learning_rate": 3.776565120528674e-06, "loss": 8.5213, "step": 175100 }, { "epoch": 0.8744787635146944, "grad_norm": 0.09404308348894119, "learning_rate": 3.775063205587124e-06, "loss": 8.5122, "step": 175110 }, { "epoch": 0.874528702339634, "grad_norm": 0.09096416085958481, "learning_rate": 3.773561290645573e-06, "loss": 8.5048, "step": 175120 }, { "epoch": 0.8745786411645734, "grad_norm": 0.0968070700764656, "learning_rate": 3.772059375704023e-06, "loss": 8.5088, "step": 175130 }, { "epoch": 0.8746285799895128, "grad_norm": 0.09349347651004791, "learning_rate": 3.7705574607624726e-06, "loss": 8.5247, "step": 175140 }, { "epoch": 0.8746785188144522, "grad_norm": 0.09110265225172043, "learning_rate": 3.7690555458209215e-06, "loss": 8.5225, "step": 175150 }, { "epoch": 0.8747284576393918, "grad_norm": 0.09158271551132202, "learning_rate": 3.7675536308793717e-06, "loss": 8.5216, "step": 175160 }, { "epoch": 0.8747783964643312, "grad_norm": 0.09609760344028473, "learning_rate": 3.7660517159378206e-06, "loss": 8.5151, "step": 175170 }, { "epoch": 0.8748283352892706, "grad_norm": 0.09342726320028305, "learning_rate": 3.7645498009962703e-06, "loss": 8.5056, "step": 175180 }, { "epoch": 0.87487827411421, "grad_norm": 0.09422183781862259, "learning_rate": 3.76304788605472e-06, "loss": 8.5344, "step": 175190 }, { "epoch": 0.8749282129391496, "grad_norm": 0.0885339081287384, "learning_rate": 3.761545971113169e-06, "loss": 8.5224, "step": 175200 }, { "epoch": 0.874978151764089, "grad_norm": 0.09695085138082504, "learning_rate": 3.760044056171619e-06, "loss": 8.5084, "step": 175210 }, { "epoch": 0.8750280905890284, "grad_norm": 0.08846613019704819, "learning_rate": 3.758542141230069e-06, "loss": 8.5175, "step": 175220 }, { "epoch": 0.8750780294139678, "grad_norm": 0.09065619111061096, "learning_rate": 3.757040226288518e-06, "loss": 8.5186, "step": 175230 }, { "epoch": 0.8751279682389074, "grad_norm": 0.09855975955724716, "learning_rate": 3.7555383113469676e-06, "loss": 8.5197, "step": 175240 }, { "epoch": 0.8751779070638468, "grad_norm": 0.09044459462165833, "learning_rate": 3.7540363964054165e-06, "loss": 8.5144, "step": 175250 }, { "epoch": 0.8752278458887862, "grad_norm": 0.08905865252017975, "learning_rate": 3.7525344814638667e-06, "loss": 8.5222, "step": 175260 }, { "epoch": 0.8752777847137256, "grad_norm": 0.11130794882774353, "learning_rate": 3.7510325665223164e-06, "loss": 8.5262, "step": 175270 }, { "epoch": 0.8753277235386652, "grad_norm": 0.08949120342731476, "learning_rate": 3.7495306515807658e-06, "loss": 8.5224, "step": 175280 }, { "epoch": 0.8753776623636046, "grad_norm": 0.09382175654172897, "learning_rate": 3.748028736639215e-06, "loss": 8.5169, "step": 175290 }, { "epoch": 0.875427601188544, "grad_norm": 0.09268946945667267, "learning_rate": 3.7465268216976644e-06, "loss": 8.5137, "step": 175300 }, { "epoch": 0.8754775400134834, "grad_norm": 0.09075185656547546, "learning_rate": 3.745024906756114e-06, "loss": 8.5204, "step": 175310 }, { "epoch": 0.875527478838423, "grad_norm": 0.0939631536602974, "learning_rate": 3.743522991814564e-06, "loss": 8.519, "step": 175320 }, { "epoch": 0.8755774176633624, "grad_norm": 0.08865179121494293, "learning_rate": 3.7420210768730133e-06, "loss": 8.5212, "step": 175330 }, { "epoch": 0.8756273564883018, "grad_norm": 0.0853448212146759, "learning_rate": 3.7405191619314626e-06, "loss": 8.5186, "step": 175340 }, { "epoch": 0.8756772953132412, "grad_norm": 0.09132332354784012, "learning_rate": 3.739017246989912e-06, "loss": 8.5169, "step": 175350 }, { "epoch": 0.8757272341381808, "grad_norm": 0.08930841088294983, "learning_rate": 3.737515332048362e-06, "loss": 8.5192, "step": 175360 }, { "epoch": 0.8757771729631202, "grad_norm": 0.09462074190378189, "learning_rate": 3.7360134171068115e-06, "loss": 8.521, "step": 175370 }, { "epoch": 0.8758271117880596, "grad_norm": 0.09071394056081772, "learning_rate": 3.7345115021652608e-06, "loss": 8.5251, "step": 175380 }, { "epoch": 0.875877050612999, "grad_norm": 0.09404830634593964, "learning_rate": 3.73300958722371e-06, "loss": 8.5084, "step": 175390 }, { "epoch": 0.8759269894379386, "grad_norm": 0.09391729533672333, "learning_rate": 3.73150767228216e-06, "loss": 8.5464, "step": 175400 }, { "epoch": 0.875976928262878, "grad_norm": 0.08704748004674911, "learning_rate": 3.7300057573406096e-06, "loss": 8.516, "step": 175410 }, { "epoch": 0.8760268670878174, "grad_norm": 0.096518374979496, "learning_rate": 3.728503842399059e-06, "loss": 8.5001, "step": 175420 }, { "epoch": 0.8760768059127568, "grad_norm": 0.09533105045557022, "learning_rate": 3.7270019274575083e-06, "loss": 8.517, "step": 175430 }, { "epoch": 0.8761267447376964, "grad_norm": 0.09067413210868835, "learning_rate": 3.725500012515958e-06, "loss": 8.5323, "step": 175440 }, { "epoch": 0.8761766835626358, "grad_norm": 0.09444481879472733, "learning_rate": 3.7239980975744074e-06, "loss": 8.5037, "step": 175450 }, { "epoch": 0.8762266223875752, "grad_norm": 0.08779852837324142, "learning_rate": 3.722496182632857e-06, "loss": 8.5255, "step": 175460 }, { "epoch": 0.8762765612125146, "grad_norm": 0.08670119941234589, "learning_rate": 3.7209942676913065e-06, "loss": 8.5018, "step": 175470 }, { "epoch": 0.8763265000374542, "grad_norm": 0.09427984058856964, "learning_rate": 3.719492352749756e-06, "loss": 8.5081, "step": 175480 }, { "epoch": 0.8763764388623936, "grad_norm": 0.09233436733484268, "learning_rate": 3.7179904378082055e-06, "loss": 8.5266, "step": 175490 }, { "epoch": 0.876426377687333, "grad_norm": 0.08909424394369125, "learning_rate": 3.716488522866655e-06, "loss": 8.5278, "step": 175500 }, { "epoch": 0.8764763165122724, "grad_norm": 0.08954276144504547, "learning_rate": 3.7149866079251046e-06, "loss": 8.5341, "step": 175510 }, { "epoch": 0.876526255337212, "grad_norm": 0.09071674197912216, "learning_rate": 3.713484692983554e-06, "loss": 8.5145, "step": 175520 }, { "epoch": 0.8765761941621514, "grad_norm": 0.08852982521057129, "learning_rate": 3.7119827780420037e-06, "loss": 8.519, "step": 175530 }, { "epoch": 0.8766261329870908, "grad_norm": 0.08784948289394379, "learning_rate": 3.710480863100453e-06, "loss": 8.5251, "step": 175540 }, { "epoch": 0.8766760718120302, "grad_norm": 0.09313932061195374, "learning_rate": 3.708978948158903e-06, "loss": 8.5231, "step": 175550 }, { "epoch": 0.8767260106369698, "grad_norm": 0.09138453006744385, "learning_rate": 3.707477033217352e-06, "loss": 8.5293, "step": 175560 }, { "epoch": 0.8767759494619092, "grad_norm": 0.08735091984272003, "learning_rate": 3.705975118275802e-06, "loss": 8.5286, "step": 175570 }, { "epoch": 0.8768258882868486, "grad_norm": 0.09122058749198914, "learning_rate": 3.7044732033342512e-06, "loss": 8.4957, "step": 175580 }, { "epoch": 0.876875827111788, "grad_norm": 0.0900067389011383, "learning_rate": 3.7029712883927006e-06, "loss": 8.5311, "step": 175590 }, { "epoch": 0.8769257659367276, "grad_norm": 0.08420617878437042, "learning_rate": 3.7014693734511503e-06, "loss": 8.5117, "step": 175600 }, { "epoch": 0.876975704761667, "grad_norm": 0.08814489841461182, "learning_rate": 3.6999674585096e-06, "loss": 8.5307, "step": 175610 }, { "epoch": 0.8770256435866064, "grad_norm": 0.09584464132785797, "learning_rate": 3.6984655435680494e-06, "loss": 8.525, "step": 175620 }, { "epoch": 0.8770755824115458, "grad_norm": 0.08932922780513763, "learning_rate": 3.6969636286264987e-06, "loss": 8.5171, "step": 175630 }, { "epoch": 0.8771255212364854, "grad_norm": 0.08885538578033447, "learning_rate": 3.695461713684948e-06, "loss": 8.5202, "step": 175640 }, { "epoch": 0.8771754600614248, "grad_norm": 0.09274578839540482, "learning_rate": 3.6939597987433982e-06, "loss": 8.5244, "step": 175650 }, { "epoch": 0.8772253988863642, "grad_norm": 0.09348073601722717, "learning_rate": 3.6924578838018476e-06, "loss": 8.5243, "step": 175660 }, { "epoch": 0.8772753377113036, "grad_norm": 0.0866040512919426, "learning_rate": 3.690955968860297e-06, "loss": 8.5139, "step": 175670 }, { "epoch": 0.8773252765362431, "grad_norm": 0.08929452300071716, "learning_rate": 3.6894540539187462e-06, "loss": 8.5253, "step": 175680 }, { "epoch": 0.8773752153611826, "grad_norm": 0.0889781191945076, "learning_rate": 3.687952138977196e-06, "loss": 8.5166, "step": 175690 }, { "epoch": 0.877425154186122, "grad_norm": 0.09169498085975647, "learning_rate": 3.6864502240356457e-06, "loss": 8.5326, "step": 175700 }, { "epoch": 0.8774750930110614, "grad_norm": 0.08967631310224533, "learning_rate": 3.684948309094095e-06, "loss": 8.5351, "step": 175710 }, { "epoch": 0.877525031836001, "grad_norm": 0.09231317043304443, "learning_rate": 3.6834463941525444e-06, "loss": 8.4988, "step": 175720 }, { "epoch": 0.8775749706609404, "grad_norm": 0.09632280468940735, "learning_rate": 3.681944479210994e-06, "loss": 8.5177, "step": 175730 }, { "epoch": 0.8776249094858798, "grad_norm": 0.0874035581946373, "learning_rate": 3.6804425642694435e-06, "loss": 8.5378, "step": 175740 }, { "epoch": 0.8776748483108192, "grad_norm": 0.08852864801883698, "learning_rate": 3.6789406493278932e-06, "loss": 8.522, "step": 175750 }, { "epoch": 0.8777247871357587, "grad_norm": 0.09550099074840546, "learning_rate": 3.6774387343863426e-06, "loss": 8.5171, "step": 175760 }, { "epoch": 0.8777747259606982, "grad_norm": 0.09109804779291153, "learning_rate": 3.6759368194447923e-06, "loss": 8.5383, "step": 175770 }, { "epoch": 0.8778246647856376, "grad_norm": 0.08961420506238937, "learning_rate": 3.6744349045032417e-06, "loss": 8.5162, "step": 175780 }, { "epoch": 0.877874603610577, "grad_norm": 0.0872698426246643, "learning_rate": 3.672932989561691e-06, "loss": 8.5116, "step": 175790 }, { "epoch": 0.8779245424355165, "grad_norm": 0.0968591645359993, "learning_rate": 3.6714310746201407e-06, "loss": 8.5413, "step": 175800 }, { "epoch": 0.877974481260456, "grad_norm": 0.09531614184379578, "learning_rate": 3.6699291596785905e-06, "loss": 8.5404, "step": 175810 }, { "epoch": 0.8780244200853954, "grad_norm": 0.09465590864419937, "learning_rate": 3.66842724473704e-06, "loss": 8.5129, "step": 175820 }, { "epoch": 0.8780743589103348, "grad_norm": 0.09236373007297516, "learning_rate": 3.666925329795489e-06, "loss": 8.5317, "step": 175830 }, { "epoch": 0.8781242977352743, "grad_norm": 0.09544330835342407, "learning_rate": 3.6654234148539385e-06, "loss": 8.5133, "step": 175840 }, { "epoch": 0.8781742365602138, "grad_norm": 0.08793754130601883, "learning_rate": 3.6639214999123887e-06, "loss": 8.519, "step": 175850 }, { "epoch": 0.8782241753851532, "grad_norm": 0.09597655385732651, "learning_rate": 3.662419584970838e-06, "loss": 8.5266, "step": 175860 }, { "epoch": 0.8782741142100926, "grad_norm": 0.09199637174606323, "learning_rate": 3.6609176700292873e-06, "loss": 8.5188, "step": 175870 }, { "epoch": 0.8783240530350321, "grad_norm": 0.09712479263544083, "learning_rate": 3.6594157550877367e-06, "loss": 8.5235, "step": 175880 }, { "epoch": 0.8783739918599716, "grad_norm": 0.08960530161857605, "learning_rate": 3.6579138401461864e-06, "loss": 8.5275, "step": 175890 }, { "epoch": 0.878423930684911, "grad_norm": 0.08847316354513168, "learning_rate": 3.656411925204636e-06, "loss": 8.5109, "step": 175900 }, { "epoch": 0.8784738695098504, "grad_norm": 0.09034876525402069, "learning_rate": 3.6549100102630855e-06, "loss": 8.54, "step": 175910 }, { "epoch": 0.8785238083347899, "grad_norm": 0.09454880654811859, "learning_rate": 3.653408095321535e-06, "loss": 8.5137, "step": 175920 }, { "epoch": 0.8785737471597294, "grad_norm": 0.08720461279153824, "learning_rate": 3.6519061803799846e-06, "loss": 8.5258, "step": 175930 }, { "epoch": 0.8786236859846688, "grad_norm": 0.08554758876562119, "learning_rate": 3.650404265438434e-06, "loss": 8.514, "step": 175940 }, { "epoch": 0.8786736248096082, "grad_norm": 0.09344278275966644, "learning_rate": 3.6489023504968837e-06, "loss": 8.5226, "step": 175950 }, { "epoch": 0.8787235636345477, "grad_norm": 0.09505718946456909, "learning_rate": 3.647400435555333e-06, "loss": 8.4881, "step": 175960 }, { "epoch": 0.8787735024594872, "grad_norm": 0.08501995354890823, "learning_rate": 3.6458985206137828e-06, "loss": 8.5071, "step": 175970 }, { "epoch": 0.8788234412844266, "grad_norm": 0.09370040893554688, "learning_rate": 3.644396605672232e-06, "loss": 8.5162, "step": 175980 }, { "epoch": 0.878873380109366, "grad_norm": 0.08761072903871536, "learning_rate": 3.6428946907306814e-06, "loss": 8.5417, "step": 175990 }, { "epoch": 0.8789233189343055, "grad_norm": 0.08839986473321915, "learning_rate": 3.641392775789131e-06, "loss": 8.5306, "step": 176000 }, { "epoch": 0.878973257759245, "grad_norm": 0.09286805242300034, "learning_rate": 3.639890860847581e-06, "loss": 8.526, "step": 176010 }, { "epoch": 0.8790231965841844, "grad_norm": 0.08983197808265686, "learning_rate": 3.6383889459060303e-06, "loss": 8.5081, "step": 176020 }, { "epoch": 0.8790731354091238, "grad_norm": 0.09328821301460266, "learning_rate": 3.6368870309644796e-06, "loss": 8.5201, "step": 176030 }, { "epoch": 0.8791230742340632, "grad_norm": 0.08646514266729355, "learning_rate": 3.635385116022929e-06, "loss": 8.522, "step": 176040 }, { "epoch": 0.8791730130590028, "grad_norm": 0.09557259827852249, "learning_rate": 3.633883201081379e-06, "loss": 8.5124, "step": 176050 }, { "epoch": 0.8792229518839422, "grad_norm": 0.08780386298894882, "learning_rate": 3.6323812861398284e-06, "loss": 8.5364, "step": 176060 }, { "epoch": 0.8792728907088816, "grad_norm": 0.08918431401252747, "learning_rate": 3.6308793711982778e-06, "loss": 8.5319, "step": 176070 }, { "epoch": 0.879322829533821, "grad_norm": 0.09585704654455185, "learning_rate": 3.629377456256727e-06, "loss": 8.5099, "step": 176080 }, { "epoch": 0.8793727683587605, "grad_norm": 0.08608123660087585, "learning_rate": 3.627875541315177e-06, "loss": 8.5201, "step": 176090 }, { "epoch": 0.8794227071837, "grad_norm": 0.09179461002349854, "learning_rate": 3.6263736263736266e-06, "loss": 8.5454, "step": 176100 }, { "epoch": 0.8794726460086394, "grad_norm": 0.09087244421243668, "learning_rate": 3.624871711432076e-06, "loss": 8.5105, "step": 176110 }, { "epoch": 0.8795225848335788, "grad_norm": 0.09345267713069916, "learning_rate": 3.6233697964905253e-06, "loss": 8.5111, "step": 176120 }, { "epoch": 0.8795725236585183, "grad_norm": 0.09425700455904007, "learning_rate": 3.621867881548975e-06, "loss": 8.5227, "step": 176130 }, { "epoch": 0.8796224624834578, "grad_norm": 0.08979269117116928, "learning_rate": 3.6203659666074248e-06, "loss": 8.5186, "step": 176140 }, { "epoch": 0.8796724013083972, "grad_norm": 0.09118253737688065, "learning_rate": 3.618864051665874e-06, "loss": 8.5299, "step": 176150 }, { "epoch": 0.8797223401333366, "grad_norm": 0.09220730513334274, "learning_rate": 3.6173621367243234e-06, "loss": 8.5191, "step": 176160 }, { "epoch": 0.8797722789582761, "grad_norm": 0.08798353374004364, "learning_rate": 3.615860221782773e-06, "loss": 8.5048, "step": 176170 }, { "epoch": 0.8798222177832156, "grad_norm": 0.0894918367266655, "learning_rate": 3.6143583068412225e-06, "loss": 8.5088, "step": 176180 }, { "epoch": 0.879872156608155, "grad_norm": 0.09121794253587723, "learning_rate": 3.6128563918996723e-06, "loss": 8.5221, "step": 176190 }, { "epoch": 0.8799220954330944, "grad_norm": 0.09195500612258911, "learning_rate": 3.6113544769581216e-06, "loss": 8.5301, "step": 176200 }, { "epoch": 0.8799720342580339, "grad_norm": 0.08938493579626083, "learning_rate": 3.6098525620165714e-06, "loss": 8.5136, "step": 176210 }, { "epoch": 0.8800219730829734, "grad_norm": 0.08818230032920837, "learning_rate": 3.6083506470750207e-06, "loss": 8.5135, "step": 176220 }, { "epoch": 0.8800719119079128, "grad_norm": 0.09114837646484375, "learning_rate": 3.60684873213347e-06, "loss": 8.5182, "step": 176230 }, { "epoch": 0.8801218507328522, "grad_norm": 0.09198463708162308, "learning_rate": 3.60534681719192e-06, "loss": 8.5281, "step": 176240 }, { "epoch": 0.8801717895577917, "grad_norm": 0.08858832716941833, "learning_rate": 3.6038449022503695e-06, "loss": 8.5173, "step": 176250 }, { "epoch": 0.8802217283827312, "grad_norm": 0.09060419350862503, "learning_rate": 3.602342987308819e-06, "loss": 8.5146, "step": 176260 }, { "epoch": 0.8802716672076706, "grad_norm": 0.09478036314249039, "learning_rate": 3.600841072367268e-06, "loss": 8.5338, "step": 176270 }, { "epoch": 0.88032160603261, "grad_norm": 0.09485729038715363, "learning_rate": 3.5993391574257175e-06, "loss": 8.5161, "step": 176280 }, { "epoch": 0.8803715448575495, "grad_norm": 0.08923057466745377, "learning_rate": 3.5978372424841677e-06, "loss": 8.512, "step": 176290 }, { "epoch": 0.880421483682489, "grad_norm": 0.09072960168123245, "learning_rate": 3.596335327542617e-06, "loss": 8.5052, "step": 176300 }, { "epoch": 0.8804714225074284, "grad_norm": 0.08910852670669556, "learning_rate": 3.5948334126010664e-06, "loss": 8.511, "step": 176310 }, { "epoch": 0.8805213613323678, "grad_norm": 0.09066713601350784, "learning_rate": 3.5933314976595157e-06, "loss": 8.5136, "step": 176320 }, { "epoch": 0.8805713001573073, "grad_norm": 0.08695919066667557, "learning_rate": 3.5918295827179655e-06, "loss": 8.5367, "step": 176330 }, { "epoch": 0.8806212389822468, "grad_norm": 0.08876381814479828, "learning_rate": 3.5903276677764152e-06, "loss": 8.5279, "step": 176340 }, { "epoch": 0.8806711778071862, "grad_norm": 0.09524772316217422, "learning_rate": 3.5888257528348646e-06, "loss": 8.5178, "step": 176350 }, { "epoch": 0.8807211166321256, "grad_norm": 0.0915902629494667, "learning_rate": 3.587323837893314e-06, "loss": 8.5005, "step": 176360 }, { "epoch": 0.8807710554570651, "grad_norm": 0.091648168861866, "learning_rate": 3.5858219229517636e-06, "loss": 8.522, "step": 176370 }, { "epoch": 0.8808209942820046, "grad_norm": 0.08661641925573349, "learning_rate": 3.584320008010213e-06, "loss": 8.5298, "step": 176380 }, { "epoch": 0.880870933106944, "grad_norm": 0.09084923565387726, "learning_rate": 3.5828180930686627e-06, "loss": 8.5104, "step": 176390 }, { "epoch": 0.8809208719318834, "grad_norm": 0.09532788395881653, "learning_rate": 3.581316178127112e-06, "loss": 8.5164, "step": 176400 }, { "epoch": 0.8809708107568229, "grad_norm": 0.09041056036949158, "learning_rate": 3.579814263185562e-06, "loss": 8.5083, "step": 176410 }, { "epoch": 0.8810207495817624, "grad_norm": 0.0913330689072609, "learning_rate": 3.578312348244011e-06, "loss": 8.526, "step": 176420 }, { "epoch": 0.8810706884067018, "grad_norm": 0.09379500150680542, "learning_rate": 3.5768104333024605e-06, "loss": 8.5027, "step": 176430 }, { "epoch": 0.8811206272316412, "grad_norm": 0.09111961722373962, "learning_rate": 3.5753085183609102e-06, "loss": 8.5041, "step": 176440 }, { "epoch": 0.8811705660565807, "grad_norm": 0.09450194984674454, "learning_rate": 3.57380660341936e-06, "loss": 8.5164, "step": 176450 }, { "epoch": 0.8812205048815202, "grad_norm": 0.08781801164150238, "learning_rate": 3.5723046884778093e-06, "loss": 8.5286, "step": 176460 }, { "epoch": 0.8812704437064596, "grad_norm": 0.08742903918027878, "learning_rate": 3.5708027735362586e-06, "loss": 8.5282, "step": 176470 }, { "epoch": 0.881320382531399, "grad_norm": 0.09143572300672531, "learning_rate": 3.569300858594708e-06, "loss": 8.5142, "step": 176480 }, { "epoch": 0.8813703213563385, "grad_norm": 0.0917043462395668, "learning_rate": 3.567798943653158e-06, "loss": 8.5381, "step": 176490 }, { "epoch": 0.881420260181278, "grad_norm": 0.08833152055740356, "learning_rate": 3.5662970287116075e-06, "loss": 8.5399, "step": 176500 }, { "epoch": 0.8814701990062174, "grad_norm": 0.09015662968158722, "learning_rate": 3.564795113770057e-06, "loss": 8.5365, "step": 176510 }, { "epoch": 0.8815201378311568, "grad_norm": 0.0858858972787857, "learning_rate": 3.563293198828506e-06, "loss": 8.5222, "step": 176520 }, { "epoch": 0.8815700766560963, "grad_norm": 0.08976481109857559, "learning_rate": 3.561791283886956e-06, "loss": 8.5183, "step": 176530 }, { "epoch": 0.8816200154810357, "grad_norm": 0.08899136632680893, "learning_rate": 3.5602893689454057e-06, "loss": 8.5094, "step": 176540 }, { "epoch": 0.8816699543059752, "grad_norm": 0.09167985618114471, "learning_rate": 3.558787454003855e-06, "loss": 8.5039, "step": 176550 }, { "epoch": 0.8817198931309146, "grad_norm": 0.09655990451574326, "learning_rate": 3.5572855390623043e-06, "loss": 8.5208, "step": 176560 }, { "epoch": 0.8817698319558541, "grad_norm": 0.0897342711687088, "learning_rate": 3.555783624120754e-06, "loss": 8.511, "step": 176570 }, { "epoch": 0.8818197707807935, "grad_norm": 0.08929044008255005, "learning_rate": 3.5542817091792034e-06, "loss": 8.5278, "step": 176580 }, { "epoch": 0.881869709605733, "grad_norm": 0.09658628702163696, "learning_rate": 3.552779794237653e-06, "loss": 8.501, "step": 176590 }, { "epoch": 0.8819196484306724, "grad_norm": 0.09207036346197128, "learning_rate": 3.5512778792961025e-06, "loss": 8.5236, "step": 176600 }, { "epoch": 0.8819695872556119, "grad_norm": 0.09149431437253952, "learning_rate": 3.5497759643545523e-06, "loss": 8.5151, "step": 176610 }, { "epoch": 0.8820195260805513, "grad_norm": 0.08894215524196625, "learning_rate": 3.5482740494130016e-06, "loss": 8.5278, "step": 176620 }, { "epoch": 0.8820694649054908, "grad_norm": 0.09006328135728836, "learning_rate": 3.5467721344714513e-06, "loss": 8.5218, "step": 176630 }, { "epoch": 0.8821194037304302, "grad_norm": 0.09437132626771927, "learning_rate": 3.5452702195299007e-06, "loss": 8.5154, "step": 176640 }, { "epoch": 0.8821693425553697, "grad_norm": 0.09077361971139908, "learning_rate": 3.5437683045883504e-06, "loss": 8.5006, "step": 176650 }, { "epoch": 0.8822192813803091, "grad_norm": 0.08984161168336868, "learning_rate": 3.5422663896467998e-06, "loss": 8.5379, "step": 176660 }, { "epoch": 0.8822692202052486, "grad_norm": 0.09607677906751633, "learning_rate": 3.540764474705249e-06, "loss": 8.525, "step": 176670 }, { "epoch": 0.882319159030188, "grad_norm": 0.09277524054050446, "learning_rate": 3.539262559763699e-06, "loss": 8.5213, "step": 176680 }, { "epoch": 0.8823690978551275, "grad_norm": 0.09065360575914383, "learning_rate": 3.5377606448221486e-06, "loss": 8.5276, "step": 176690 }, { "epoch": 0.8824190366800669, "grad_norm": 0.10031012445688248, "learning_rate": 3.536258729880598e-06, "loss": 8.4984, "step": 176700 }, { "epoch": 0.8824689755050064, "grad_norm": 0.08632688969373703, "learning_rate": 3.5347568149390473e-06, "loss": 8.549, "step": 176710 }, { "epoch": 0.8825189143299458, "grad_norm": 0.09275210648775101, "learning_rate": 3.5332548999974966e-06, "loss": 8.5149, "step": 176720 }, { "epoch": 0.8825688531548853, "grad_norm": 0.08953607082366943, "learning_rate": 3.5317529850559468e-06, "loss": 8.5107, "step": 176730 }, { "epoch": 0.8826187919798247, "grad_norm": 0.09068858623504639, "learning_rate": 3.530251070114396e-06, "loss": 8.5048, "step": 176740 }, { "epoch": 0.8826687308047642, "grad_norm": 0.08825194090604782, "learning_rate": 3.5287491551728454e-06, "loss": 8.5169, "step": 176750 }, { "epoch": 0.8827186696297036, "grad_norm": 0.08667933940887451, "learning_rate": 3.5272472402312948e-06, "loss": 8.5104, "step": 176760 }, { "epoch": 0.8827686084546431, "grad_norm": 0.0986347422003746, "learning_rate": 3.5257453252897445e-06, "loss": 8.5012, "step": 176770 }, { "epoch": 0.8828185472795825, "grad_norm": 0.09399598091840744, "learning_rate": 3.5242434103481943e-06, "loss": 8.5245, "step": 176780 }, { "epoch": 0.882868486104522, "grad_norm": 0.0902981236577034, "learning_rate": 3.5227414954066436e-06, "loss": 8.5248, "step": 176790 }, { "epoch": 0.8829184249294614, "grad_norm": 0.09248172491788864, "learning_rate": 3.521239580465093e-06, "loss": 8.526, "step": 176800 }, { "epoch": 0.8829683637544009, "grad_norm": 0.08781624585390091, "learning_rate": 3.5197376655235427e-06, "loss": 8.5162, "step": 176810 }, { "epoch": 0.8830183025793403, "grad_norm": 0.09201288223266602, "learning_rate": 3.518235750581992e-06, "loss": 8.5082, "step": 176820 }, { "epoch": 0.8830682414042798, "grad_norm": 0.0937962606549263, "learning_rate": 3.5167338356404418e-06, "loss": 8.5262, "step": 176830 }, { "epoch": 0.8831181802292192, "grad_norm": 0.08981435745954514, "learning_rate": 3.515231920698891e-06, "loss": 8.5141, "step": 176840 }, { "epoch": 0.8831681190541587, "grad_norm": 0.09788450598716736, "learning_rate": 3.513730005757341e-06, "loss": 8.5209, "step": 176850 }, { "epoch": 0.8832180578790981, "grad_norm": 0.09376143664121628, "learning_rate": 3.51222809081579e-06, "loss": 8.5123, "step": 176860 }, { "epoch": 0.8832679967040376, "grad_norm": 0.09434860199689865, "learning_rate": 3.5107261758742395e-06, "loss": 8.5131, "step": 176870 }, { "epoch": 0.883317935528977, "grad_norm": 0.09667465090751648, "learning_rate": 3.5092242609326893e-06, "loss": 8.5213, "step": 176880 }, { "epoch": 0.8833678743539165, "grad_norm": 0.09229189902544022, "learning_rate": 3.507722345991139e-06, "loss": 8.5217, "step": 176890 }, { "epoch": 0.8834178131788559, "grad_norm": 0.08936712145805359, "learning_rate": 3.5062204310495884e-06, "loss": 8.5231, "step": 176900 }, { "epoch": 0.8834677520037953, "grad_norm": 0.08999984711408615, "learning_rate": 3.5047185161080377e-06, "loss": 8.5265, "step": 176910 }, { "epoch": 0.8835176908287348, "grad_norm": 0.08811944723129272, "learning_rate": 3.503216601166487e-06, "loss": 8.5266, "step": 176920 }, { "epoch": 0.8835676296536743, "grad_norm": 0.0963667631149292, "learning_rate": 3.501714686224937e-06, "loss": 8.5098, "step": 176930 }, { "epoch": 0.8836175684786137, "grad_norm": 0.09169621020555496, "learning_rate": 3.5002127712833865e-06, "loss": 8.5253, "step": 176940 }, { "epoch": 0.8836675073035531, "grad_norm": 0.095432348549366, "learning_rate": 3.498710856341836e-06, "loss": 8.5165, "step": 176950 }, { "epoch": 0.8837174461284926, "grad_norm": 0.09653273224830627, "learning_rate": 3.497208941400285e-06, "loss": 8.5344, "step": 176960 }, { "epoch": 0.8837673849534321, "grad_norm": 0.09007629007101059, "learning_rate": 3.495707026458735e-06, "loss": 8.5275, "step": 176970 }, { "epoch": 0.8838173237783715, "grad_norm": 0.09573765844106674, "learning_rate": 3.4942051115171847e-06, "loss": 8.5092, "step": 176980 }, { "epoch": 0.8838672626033109, "grad_norm": 0.09132372587919235, "learning_rate": 3.492703196575634e-06, "loss": 8.518, "step": 176990 }, { "epoch": 0.8839172014282504, "grad_norm": 0.09207233786582947, "learning_rate": 3.4912012816340834e-06, "loss": 8.5078, "step": 177000 }, { "epoch": 0.8839671402531898, "grad_norm": 0.09270022809505463, "learning_rate": 3.489699366692533e-06, "loss": 8.549, "step": 177010 }, { "epoch": 0.8840170790781293, "grad_norm": 0.09225805103778839, "learning_rate": 3.4881974517509825e-06, "loss": 8.518, "step": 177020 }, { "epoch": 0.8840670179030687, "grad_norm": 0.092227041721344, "learning_rate": 3.4866955368094322e-06, "loss": 8.5222, "step": 177030 }, { "epoch": 0.8841169567280082, "grad_norm": 0.08810020238161087, "learning_rate": 3.4851936218678815e-06, "loss": 8.5089, "step": 177040 }, { "epoch": 0.8841668955529476, "grad_norm": 0.08814115077257156, "learning_rate": 3.4836917069263313e-06, "loss": 8.5423, "step": 177050 }, { "epoch": 0.8842168343778871, "grad_norm": 0.09531360119581223, "learning_rate": 3.4821897919847806e-06, "loss": 8.5245, "step": 177060 }, { "epoch": 0.8842667732028265, "grad_norm": 0.08866927027702332, "learning_rate": 3.48068787704323e-06, "loss": 8.5009, "step": 177070 }, { "epoch": 0.884316712027766, "grad_norm": 0.08944374322891235, "learning_rate": 3.4791859621016797e-06, "loss": 8.5293, "step": 177080 }, { "epoch": 0.8843666508527054, "grad_norm": 0.08847851306200027, "learning_rate": 3.4776840471601295e-06, "loss": 8.5336, "step": 177090 }, { "epoch": 0.8844165896776449, "grad_norm": 0.0904935672879219, "learning_rate": 3.476182132218579e-06, "loss": 8.5157, "step": 177100 }, { "epoch": 0.8844665285025843, "grad_norm": 0.0903678685426712, "learning_rate": 3.474680217277028e-06, "loss": 8.517, "step": 177110 }, { "epoch": 0.8845164673275238, "grad_norm": 0.09201235324144363, "learning_rate": 3.4731783023354775e-06, "loss": 8.5221, "step": 177120 }, { "epoch": 0.8845664061524632, "grad_norm": 0.09328807145357132, "learning_rate": 3.4716763873939276e-06, "loss": 8.5328, "step": 177130 }, { "epoch": 0.8846163449774027, "grad_norm": 0.08826225250959396, "learning_rate": 3.470174472452377e-06, "loss": 8.5153, "step": 177140 }, { "epoch": 0.8846662838023421, "grad_norm": 0.09613511711359024, "learning_rate": 3.4686725575108263e-06, "loss": 8.5224, "step": 177150 }, { "epoch": 0.8847162226272816, "grad_norm": 0.0888667032122612, "learning_rate": 3.4671706425692756e-06, "loss": 8.5245, "step": 177160 }, { "epoch": 0.884766161452221, "grad_norm": 0.08986398577690125, "learning_rate": 3.4656687276277254e-06, "loss": 8.5181, "step": 177170 }, { "epoch": 0.8848161002771605, "grad_norm": 0.09408170729875565, "learning_rate": 3.464166812686175e-06, "loss": 8.5075, "step": 177180 }, { "epoch": 0.8848660391020999, "grad_norm": 0.08650843799114227, "learning_rate": 3.4626648977446245e-06, "loss": 8.5356, "step": 177190 }, { "epoch": 0.8849159779270394, "grad_norm": 0.09117493778467178, "learning_rate": 3.461162982803074e-06, "loss": 8.5275, "step": 177200 }, { "epoch": 0.8849659167519788, "grad_norm": 0.09649976342916489, "learning_rate": 3.4596610678615236e-06, "loss": 8.5083, "step": 177210 }, { "epoch": 0.8850158555769183, "grad_norm": 0.09083914011716843, "learning_rate": 3.4581591529199733e-06, "loss": 8.5271, "step": 177220 }, { "epoch": 0.8850657944018577, "grad_norm": 0.0903511494398117, "learning_rate": 3.4566572379784227e-06, "loss": 8.5062, "step": 177230 }, { "epoch": 0.8851157332267972, "grad_norm": 0.09016105532646179, "learning_rate": 3.455155323036872e-06, "loss": 8.5287, "step": 177240 }, { "epoch": 0.8851656720517366, "grad_norm": 0.09000012278556824, "learning_rate": 3.4536534080953217e-06, "loss": 8.4927, "step": 177250 }, { "epoch": 0.8852156108766761, "grad_norm": 0.088304802775383, "learning_rate": 3.452151493153771e-06, "loss": 8.5076, "step": 177260 }, { "epoch": 0.8852655497016155, "grad_norm": 0.09251948446035385, "learning_rate": 3.450649578212221e-06, "loss": 8.5103, "step": 177270 }, { "epoch": 0.885315488526555, "grad_norm": 0.08819442987442017, "learning_rate": 3.44914766327067e-06, "loss": 8.5039, "step": 177280 }, { "epoch": 0.8853654273514944, "grad_norm": 0.0890979915857315, "learning_rate": 3.44764574832912e-06, "loss": 8.5171, "step": 177290 }, { "epoch": 0.8854153661764339, "grad_norm": 0.0990390032529831, "learning_rate": 3.4461438333875692e-06, "loss": 8.52, "step": 177300 }, { "epoch": 0.8854653050013733, "grad_norm": 0.08952021598815918, "learning_rate": 3.4446419184460186e-06, "loss": 8.5213, "step": 177310 }, { "epoch": 0.8855152438263127, "grad_norm": 0.08724192529916763, "learning_rate": 3.4431400035044683e-06, "loss": 8.5327, "step": 177320 }, { "epoch": 0.8855651826512522, "grad_norm": 0.09806641191244125, "learning_rate": 3.441638088562918e-06, "loss": 8.5187, "step": 177330 }, { "epoch": 0.8856151214761917, "grad_norm": 0.08996843546628952, "learning_rate": 3.4401361736213674e-06, "loss": 8.5578, "step": 177340 }, { "epoch": 0.8856650603011311, "grad_norm": 0.09567862004041672, "learning_rate": 3.4386342586798167e-06, "loss": 8.5224, "step": 177350 }, { "epoch": 0.8857149991260705, "grad_norm": 0.08767600357532501, "learning_rate": 3.437132343738266e-06, "loss": 8.5191, "step": 177360 }, { "epoch": 0.88576493795101, "grad_norm": 0.09355778992176056, "learning_rate": 3.4356304287967163e-06, "loss": 8.5066, "step": 177370 }, { "epoch": 0.8858148767759495, "grad_norm": 0.09189339727163315, "learning_rate": 3.4341285138551656e-06, "loss": 8.5076, "step": 177380 }, { "epoch": 0.8858648156008889, "grad_norm": 0.08906829357147217, "learning_rate": 3.432626598913615e-06, "loss": 8.5052, "step": 177390 }, { "epoch": 0.8859147544258283, "grad_norm": 0.08834553509950638, "learning_rate": 3.4311246839720643e-06, "loss": 8.5115, "step": 177400 }, { "epoch": 0.8859646932507678, "grad_norm": 0.09251053631305695, "learning_rate": 3.429622769030514e-06, "loss": 8.5233, "step": 177410 }, { "epoch": 0.8860146320757073, "grad_norm": 0.08647915720939636, "learning_rate": 3.4281208540889638e-06, "loss": 8.5078, "step": 177420 }, { "epoch": 0.8860645709006467, "grad_norm": 0.09425395727157593, "learning_rate": 3.426618939147413e-06, "loss": 8.5261, "step": 177430 }, { "epoch": 0.8861145097255861, "grad_norm": 0.08929485827684402, "learning_rate": 3.4251170242058624e-06, "loss": 8.5276, "step": 177440 }, { "epoch": 0.8861644485505256, "grad_norm": 0.09134992957115173, "learning_rate": 3.423615109264312e-06, "loss": 8.531, "step": 177450 }, { "epoch": 0.8862143873754651, "grad_norm": 0.08963406831026077, "learning_rate": 3.4221131943227615e-06, "loss": 8.517, "step": 177460 }, { "epoch": 0.8862643262004045, "grad_norm": 0.09403959661722183, "learning_rate": 3.4206112793812113e-06, "loss": 8.5015, "step": 177470 }, { "epoch": 0.8863142650253439, "grad_norm": 0.08884259313344955, "learning_rate": 3.4191093644396606e-06, "loss": 8.511, "step": 177480 }, { "epoch": 0.8863642038502834, "grad_norm": 0.09389010816812515, "learning_rate": 3.4176074494981104e-06, "loss": 8.4983, "step": 177490 }, { "epoch": 0.8864141426752229, "grad_norm": 0.08853202313184738, "learning_rate": 3.4161055345565597e-06, "loss": 8.5133, "step": 177500 }, { "epoch": 0.8864640815001623, "grad_norm": 0.09294895827770233, "learning_rate": 3.414603619615009e-06, "loss": 8.5259, "step": 177510 }, { "epoch": 0.8865140203251017, "grad_norm": 0.09223725646734238, "learning_rate": 3.4131017046734588e-06, "loss": 8.5059, "step": 177520 }, { "epoch": 0.8865639591500412, "grad_norm": 0.09366583824157715, "learning_rate": 3.4115997897319085e-06, "loss": 8.5378, "step": 177530 }, { "epoch": 0.8866138979749807, "grad_norm": 0.08734884113073349, "learning_rate": 3.410097874790358e-06, "loss": 8.5117, "step": 177540 }, { "epoch": 0.8866638367999201, "grad_norm": 0.09199695289134979, "learning_rate": 3.408595959848807e-06, "loss": 8.5281, "step": 177550 }, { "epoch": 0.8867137756248595, "grad_norm": 0.09172987192869186, "learning_rate": 3.4070940449072565e-06, "loss": 8.523, "step": 177560 }, { "epoch": 0.886763714449799, "grad_norm": 0.09687057882547379, "learning_rate": 3.4055921299657067e-06, "loss": 8.532, "step": 177570 }, { "epoch": 0.8868136532747385, "grad_norm": 0.09877201914787292, "learning_rate": 3.404090215024156e-06, "loss": 8.534, "step": 177580 }, { "epoch": 0.8868635920996779, "grad_norm": 0.09122820198535919, "learning_rate": 3.4025883000826054e-06, "loss": 8.4999, "step": 177590 }, { "epoch": 0.8869135309246173, "grad_norm": 0.09195327013731003, "learning_rate": 3.4010863851410547e-06, "loss": 8.5035, "step": 177600 }, { "epoch": 0.8869634697495568, "grad_norm": 0.09369145333766937, "learning_rate": 3.3995844701995044e-06, "loss": 8.4957, "step": 177610 }, { "epoch": 0.8870134085744963, "grad_norm": 0.09592372179031372, "learning_rate": 3.398082555257954e-06, "loss": 8.5032, "step": 177620 }, { "epoch": 0.8870633473994357, "grad_norm": 0.09076791256666183, "learning_rate": 3.3965806403164035e-06, "loss": 8.5275, "step": 177630 }, { "epoch": 0.8871132862243751, "grad_norm": 0.0895276591181755, "learning_rate": 3.395078725374853e-06, "loss": 8.5322, "step": 177640 }, { "epoch": 0.8871632250493146, "grad_norm": 0.09077776968479156, "learning_rate": 3.3935768104333026e-06, "loss": 8.5201, "step": 177650 }, { "epoch": 0.8872131638742541, "grad_norm": 0.09358912706375122, "learning_rate": 3.392074895491752e-06, "loss": 8.5083, "step": 177660 }, { "epoch": 0.8872631026991935, "grad_norm": 0.09124220907688141, "learning_rate": 3.3905729805502017e-06, "loss": 8.514, "step": 177670 }, { "epoch": 0.8873130415241329, "grad_norm": 0.09009285271167755, "learning_rate": 3.389071065608651e-06, "loss": 8.5199, "step": 177680 }, { "epoch": 0.8873629803490723, "grad_norm": 0.08989504724740982, "learning_rate": 3.3875691506671008e-06, "loss": 8.525, "step": 177690 }, { "epoch": 0.8874129191740119, "grad_norm": 0.09324032813310623, "learning_rate": 3.38606723572555e-06, "loss": 8.5225, "step": 177700 }, { "epoch": 0.8874628579989513, "grad_norm": 0.09039938449859619, "learning_rate": 3.3845653207839995e-06, "loss": 8.5127, "step": 177710 }, { "epoch": 0.8875127968238907, "grad_norm": 0.09255453944206238, "learning_rate": 3.383063405842449e-06, "loss": 8.5293, "step": 177720 }, { "epoch": 0.8875627356488301, "grad_norm": 0.0964113101363182, "learning_rate": 3.381561490900899e-06, "loss": 8.5023, "step": 177730 }, { "epoch": 0.8876126744737697, "grad_norm": 0.09030985832214355, "learning_rate": 3.3800595759593483e-06, "loss": 8.5003, "step": 177740 }, { "epoch": 0.8876626132987091, "grad_norm": 0.08826965093612671, "learning_rate": 3.3785576610177976e-06, "loss": 8.5222, "step": 177750 }, { "epoch": 0.8877125521236485, "grad_norm": 0.09713491797447205, "learning_rate": 3.3770557460762474e-06, "loss": 8.5149, "step": 177760 }, { "epoch": 0.887762490948588, "grad_norm": 0.09569582343101501, "learning_rate": 3.375553831134697e-06, "loss": 8.5064, "step": 177770 }, { "epoch": 0.8878124297735275, "grad_norm": 0.09315156936645508, "learning_rate": 3.3740519161931465e-06, "loss": 8.5055, "step": 177780 }, { "epoch": 0.8878623685984669, "grad_norm": 0.09295808523893356, "learning_rate": 3.372550001251596e-06, "loss": 8.5233, "step": 177790 }, { "epoch": 0.8879123074234063, "grad_norm": 0.08866126090288162, "learning_rate": 3.371048086310045e-06, "loss": 8.5343, "step": 177800 }, { "epoch": 0.8879622462483457, "grad_norm": 0.09473447501659393, "learning_rate": 3.3695461713684953e-06, "loss": 8.5097, "step": 177810 }, { "epoch": 0.8880121850732853, "grad_norm": 0.09259150177240372, "learning_rate": 3.3680442564269446e-06, "loss": 8.5188, "step": 177820 }, { "epoch": 0.8880621238982247, "grad_norm": 0.09014898538589478, "learning_rate": 3.366542341485394e-06, "loss": 8.5231, "step": 177830 }, { "epoch": 0.8881120627231641, "grad_norm": 0.09090849757194519, "learning_rate": 3.3650404265438433e-06, "loss": 8.5345, "step": 177840 }, { "epoch": 0.8881620015481035, "grad_norm": 0.09438339620828629, "learning_rate": 3.363538511602293e-06, "loss": 8.5103, "step": 177850 }, { "epoch": 0.8882119403730431, "grad_norm": 0.09024190902709961, "learning_rate": 3.362036596660743e-06, "loss": 8.5295, "step": 177860 }, { "epoch": 0.8882618791979825, "grad_norm": 0.0944947823882103, "learning_rate": 3.360534681719192e-06, "loss": 8.5038, "step": 177870 }, { "epoch": 0.8883118180229219, "grad_norm": 0.09072814136743546, "learning_rate": 3.3590327667776415e-06, "loss": 8.5217, "step": 177880 }, { "epoch": 0.8883617568478613, "grad_norm": 0.09160330891609192, "learning_rate": 3.3575308518360912e-06, "loss": 8.4917, "step": 177890 }, { "epoch": 0.8884116956728009, "grad_norm": 0.08163774758577347, "learning_rate": 3.3560289368945406e-06, "loss": 8.5104, "step": 177900 }, { "epoch": 0.8884616344977403, "grad_norm": 0.09547600150108337, "learning_rate": 3.3545270219529903e-06, "loss": 8.5128, "step": 177910 }, { "epoch": 0.8885115733226797, "grad_norm": 0.08949603140354156, "learning_rate": 3.3530251070114396e-06, "loss": 8.5251, "step": 177920 }, { "epoch": 0.8885615121476191, "grad_norm": 0.09252443164587021, "learning_rate": 3.3515231920698894e-06, "loss": 8.5098, "step": 177930 }, { "epoch": 0.8886114509725587, "grad_norm": 0.09023504704236984, "learning_rate": 3.3500212771283387e-06, "loss": 8.5021, "step": 177940 }, { "epoch": 0.8886613897974981, "grad_norm": 0.0936330258846283, "learning_rate": 3.348519362186788e-06, "loss": 8.5215, "step": 177950 }, { "epoch": 0.8887113286224375, "grad_norm": 0.09109587222337723, "learning_rate": 3.347017447245238e-06, "loss": 8.5083, "step": 177960 }, { "epoch": 0.8887612674473769, "grad_norm": 0.08897558599710464, "learning_rate": 3.3455155323036876e-06, "loss": 8.5156, "step": 177970 }, { "epoch": 0.8888112062723165, "grad_norm": 0.09321697801351547, "learning_rate": 3.344013617362137e-06, "loss": 8.5194, "step": 177980 }, { "epoch": 0.8888611450972559, "grad_norm": 0.08879639208316803, "learning_rate": 3.3425117024205862e-06, "loss": 8.5025, "step": 177990 }, { "epoch": 0.8889110839221953, "grad_norm": 0.08868400007486343, "learning_rate": 3.3410097874790356e-06, "loss": 8.524, "step": 178000 }, { "epoch": 0.8889610227471347, "grad_norm": 0.0897117480635643, "learning_rate": 3.3395078725374857e-06, "loss": 8.5224, "step": 178010 }, { "epoch": 0.8890109615720742, "grad_norm": 0.08525160700082779, "learning_rate": 3.338005957595935e-06, "loss": 8.5154, "step": 178020 }, { "epoch": 0.8890609003970137, "grad_norm": 0.09332958608865738, "learning_rate": 3.3365040426543844e-06, "loss": 8.5251, "step": 178030 }, { "epoch": 0.8891108392219531, "grad_norm": 0.08514176309108734, "learning_rate": 3.3350021277128337e-06, "loss": 8.5127, "step": 178040 }, { "epoch": 0.8891607780468925, "grad_norm": 0.08755916357040405, "learning_rate": 3.3335002127712835e-06, "loss": 8.4974, "step": 178050 }, { "epoch": 0.889210716871832, "grad_norm": 0.08407943695783615, "learning_rate": 3.3319982978297332e-06, "loss": 8.5274, "step": 178060 }, { "epoch": 0.8892606556967715, "grad_norm": 0.08439040929079056, "learning_rate": 3.3304963828881826e-06, "loss": 8.5451, "step": 178070 }, { "epoch": 0.8893105945217109, "grad_norm": 0.09115412086248398, "learning_rate": 3.328994467946632e-06, "loss": 8.515, "step": 178080 }, { "epoch": 0.8893605333466503, "grad_norm": 0.0921141728758812, "learning_rate": 3.3274925530050817e-06, "loss": 8.5164, "step": 178090 }, { "epoch": 0.8894104721715897, "grad_norm": 0.09111848473548889, "learning_rate": 3.325990638063531e-06, "loss": 8.5319, "step": 178100 }, { "epoch": 0.8894604109965293, "grad_norm": 0.0888875350356102, "learning_rate": 3.3244887231219808e-06, "loss": 8.4937, "step": 178110 }, { "epoch": 0.8895103498214687, "grad_norm": 0.08856623619794846, "learning_rate": 3.32298680818043e-06, "loss": 8.5252, "step": 178120 }, { "epoch": 0.8895602886464081, "grad_norm": 0.0940069705247879, "learning_rate": 3.32148489323888e-06, "loss": 8.514, "step": 178130 }, { "epoch": 0.8896102274713475, "grad_norm": 0.09143373370170593, "learning_rate": 3.319982978297329e-06, "loss": 8.5126, "step": 178140 }, { "epoch": 0.8896601662962871, "grad_norm": 0.08722451329231262, "learning_rate": 3.3184810633557785e-06, "loss": 8.5131, "step": 178150 }, { "epoch": 0.8897101051212265, "grad_norm": 0.09241468459367752, "learning_rate": 3.3169791484142283e-06, "loss": 8.5162, "step": 178160 }, { "epoch": 0.8897600439461659, "grad_norm": 0.09892044216394424, "learning_rate": 3.315477233472678e-06, "loss": 8.5069, "step": 178170 }, { "epoch": 0.8898099827711053, "grad_norm": 0.09388883411884308, "learning_rate": 3.3139753185311273e-06, "loss": 8.5245, "step": 178180 }, { "epoch": 0.8898599215960449, "grad_norm": 0.08809326589107513, "learning_rate": 3.3124734035895767e-06, "loss": 8.5341, "step": 178190 }, { "epoch": 0.8899098604209843, "grad_norm": 0.09530996531248093, "learning_rate": 3.310971488648026e-06, "loss": 8.4991, "step": 178200 }, { "epoch": 0.8899597992459237, "grad_norm": 0.09561774879693985, "learning_rate": 3.309469573706476e-06, "loss": 8.5052, "step": 178210 }, { "epoch": 0.8900097380708631, "grad_norm": 0.09873489290475845, "learning_rate": 3.3079676587649255e-06, "loss": 8.5081, "step": 178220 }, { "epoch": 0.8900596768958027, "grad_norm": 0.08937592804431915, "learning_rate": 3.306465743823375e-06, "loss": 8.5305, "step": 178230 }, { "epoch": 0.8901096157207421, "grad_norm": 0.09414834529161453, "learning_rate": 3.304963828881824e-06, "loss": 8.5262, "step": 178240 }, { "epoch": 0.8901595545456815, "grad_norm": 0.09100335836410522, "learning_rate": 3.303461913940274e-06, "loss": 8.5105, "step": 178250 }, { "epoch": 0.8902094933706209, "grad_norm": 0.09098581969738007, "learning_rate": 3.3019599989987237e-06, "loss": 8.521, "step": 178260 }, { "epoch": 0.8902594321955605, "grad_norm": 0.09130746126174927, "learning_rate": 3.300458084057173e-06, "loss": 8.5235, "step": 178270 }, { "epoch": 0.8903093710204999, "grad_norm": 0.08628986030817032, "learning_rate": 3.2989561691156223e-06, "loss": 8.5126, "step": 178280 }, { "epoch": 0.8903593098454393, "grad_norm": 0.09175537526607513, "learning_rate": 3.297454254174072e-06, "loss": 8.5197, "step": 178290 }, { "epoch": 0.8904092486703787, "grad_norm": 0.09056618064641953, "learning_rate": 3.295952339232522e-06, "loss": 8.5097, "step": 178300 }, { "epoch": 0.8904591874953183, "grad_norm": 0.09293998777866364, "learning_rate": 3.294450424290971e-06, "loss": 8.5344, "step": 178310 }, { "epoch": 0.8905091263202577, "grad_norm": 0.09091665595769882, "learning_rate": 3.2929485093494205e-06, "loss": 8.5299, "step": 178320 }, { "epoch": 0.8905590651451971, "grad_norm": 0.0961182564496994, "learning_rate": 3.2914465944078703e-06, "loss": 8.5307, "step": 178330 }, { "epoch": 0.8906090039701365, "grad_norm": 0.09689652919769287, "learning_rate": 3.2899446794663196e-06, "loss": 8.5363, "step": 178340 }, { "epoch": 0.8906589427950761, "grad_norm": 0.09379088878631592, "learning_rate": 3.2884427645247694e-06, "loss": 8.5213, "step": 178350 }, { "epoch": 0.8907088816200155, "grad_norm": 0.09888619929552078, "learning_rate": 3.2869408495832187e-06, "loss": 8.5056, "step": 178360 }, { "epoch": 0.8907588204449549, "grad_norm": 0.09750135242938995, "learning_rate": 3.2854389346416684e-06, "loss": 8.5178, "step": 178370 }, { "epoch": 0.8908087592698943, "grad_norm": 0.09058021754026413, "learning_rate": 3.2839370197001178e-06, "loss": 8.5104, "step": 178380 }, { "epoch": 0.8908586980948339, "grad_norm": 0.08557943254709244, "learning_rate": 3.282435104758567e-06, "loss": 8.5229, "step": 178390 }, { "epoch": 0.8909086369197733, "grad_norm": 0.09042729437351227, "learning_rate": 3.280933189817017e-06, "loss": 8.5308, "step": 178400 }, { "epoch": 0.8909585757447127, "grad_norm": 0.08974535018205643, "learning_rate": 3.279431274875466e-06, "loss": 8.5126, "step": 178410 }, { "epoch": 0.8910085145696521, "grad_norm": 0.0879034698009491, "learning_rate": 3.277929359933916e-06, "loss": 8.5104, "step": 178420 }, { "epoch": 0.8910584533945917, "grad_norm": 0.08987148851156235, "learning_rate": 3.2764274449923653e-06, "loss": 8.4953, "step": 178430 }, { "epoch": 0.8911083922195311, "grad_norm": 0.09033453464508057, "learning_rate": 3.2749255300508146e-06, "loss": 8.5089, "step": 178440 }, { "epoch": 0.8911583310444705, "grad_norm": 0.09760011732578278, "learning_rate": 3.2734236151092644e-06, "loss": 8.4875, "step": 178450 }, { "epoch": 0.8912082698694099, "grad_norm": 0.09189391136169434, "learning_rate": 3.271921700167714e-06, "loss": 8.5124, "step": 178460 }, { "epoch": 0.8912582086943495, "grad_norm": 0.09732663631439209, "learning_rate": 3.2704197852261635e-06, "loss": 8.5091, "step": 178470 }, { "epoch": 0.8913081475192889, "grad_norm": 0.0918707624077797, "learning_rate": 3.2689178702846128e-06, "loss": 8.5142, "step": 178480 }, { "epoch": 0.8913580863442283, "grad_norm": 0.09437326341867447, "learning_rate": 3.267415955343062e-06, "loss": 8.5027, "step": 178490 }, { "epoch": 0.8914080251691677, "grad_norm": 0.09226923435926437, "learning_rate": 3.2659140404015123e-06, "loss": 8.5185, "step": 178500 }, { "epoch": 0.8914579639941073, "grad_norm": 0.09034575521945953, "learning_rate": 3.2644121254599616e-06, "loss": 8.5148, "step": 178510 }, { "epoch": 0.8915079028190467, "grad_norm": 0.09127431362867355, "learning_rate": 3.262910210518411e-06, "loss": 8.5053, "step": 178520 }, { "epoch": 0.8915578416439861, "grad_norm": 0.09689009934663773, "learning_rate": 3.2614082955768603e-06, "loss": 8.4981, "step": 178530 }, { "epoch": 0.8916077804689255, "grad_norm": 0.09404323995113373, "learning_rate": 3.25990638063531e-06, "loss": 8.5289, "step": 178540 }, { "epoch": 0.8916577192938651, "grad_norm": 0.08964677900075912, "learning_rate": 3.25840446569376e-06, "loss": 8.5246, "step": 178550 }, { "epoch": 0.8917076581188045, "grad_norm": 0.08872044086456299, "learning_rate": 3.256902550752209e-06, "loss": 8.5265, "step": 178560 }, { "epoch": 0.8917575969437439, "grad_norm": 0.08949794620275497, "learning_rate": 3.2554006358106585e-06, "loss": 8.5037, "step": 178570 }, { "epoch": 0.8918075357686833, "grad_norm": 0.08609029650688171, "learning_rate": 3.2538987208691082e-06, "loss": 8.5024, "step": 178580 }, { "epoch": 0.8918574745936229, "grad_norm": 0.09221512079238892, "learning_rate": 3.2523968059275575e-06, "loss": 8.5054, "step": 178590 }, { "epoch": 0.8919074134185623, "grad_norm": 0.08988411724567413, "learning_rate": 3.2508948909860073e-06, "loss": 8.5123, "step": 178600 }, { "epoch": 0.8919573522435017, "grad_norm": 0.08725704252719879, "learning_rate": 3.2493929760444566e-06, "loss": 8.525, "step": 178610 }, { "epoch": 0.8920072910684411, "grad_norm": 0.09931132942438126, "learning_rate": 3.2478910611029064e-06, "loss": 8.514, "step": 178620 }, { "epoch": 0.8920572298933807, "grad_norm": 0.09435875713825226, "learning_rate": 3.2463891461613557e-06, "loss": 8.5047, "step": 178630 }, { "epoch": 0.8921071687183201, "grad_norm": 0.092837855219841, "learning_rate": 3.244887231219805e-06, "loss": 8.5033, "step": 178640 }, { "epoch": 0.8921571075432595, "grad_norm": 0.09155397117137909, "learning_rate": 3.243385316278255e-06, "loss": 8.5026, "step": 178650 }, { "epoch": 0.8922070463681989, "grad_norm": 0.08951444178819656, "learning_rate": 3.2418834013367046e-06, "loss": 8.5213, "step": 178660 }, { "epoch": 0.8922569851931385, "grad_norm": 0.08599022775888443, "learning_rate": 3.240381486395154e-06, "loss": 8.5207, "step": 178670 }, { "epoch": 0.8923069240180779, "grad_norm": 0.08356813341379166, "learning_rate": 3.2388795714536032e-06, "loss": 8.5287, "step": 178680 }, { "epoch": 0.8923568628430173, "grad_norm": 0.09023530781269073, "learning_rate": 3.2373776565120526e-06, "loss": 8.525, "step": 178690 }, { "epoch": 0.8924068016679567, "grad_norm": 0.09185218065977097, "learning_rate": 3.2358757415705027e-06, "loss": 8.5036, "step": 178700 }, { "epoch": 0.8924567404928962, "grad_norm": 0.09013650566339493, "learning_rate": 3.234373826628952e-06, "loss": 8.5288, "step": 178710 }, { "epoch": 0.8925066793178357, "grad_norm": 0.0958433449268341, "learning_rate": 3.2328719116874014e-06, "loss": 8.5167, "step": 178720 }, { "epoch": 0.8925566181427751, "grad_norm": 0.08722110837697983, "learning_rate": 3.2313699967458507e-06, "loss": 8.5105, "step": 178730 }, { "epoch": 0.8926065569677145, "grad_norm": 0.09434662014245987, "learning_rate": 3.2298680818043005e-06, "loss": 8.5244, "step": 178740 }, { "epoch": 0.892656495792654, "grad_norm": 0.09160378575325012, "learning_rate": 3.2283661668627502e-06, "loss": 8.5077, "step": 178750 }, { "epoch": 0.8927064346175935, "grad_norm": 0.09734846651554108, "learning_rate": 3.2268642519211996e-06, "loss": 8.517, "step": 178760 }, { "epoch": 0.8927563734425329, "grad_norm": 0.0917469784617424, "learning_rate": 3.225362336979649e-06, "loss": 8.5156, "step": 178770 }, { "epoch": 0.8928063122674723, "grad_norm": 0.09718631953001022, "learning_rate": 3.2238604220380987e-06, "loss": 8.4938, "step": 178780 }, { "epoch": 0.8928562510924118, "grad_norm": 0.09104308485984802, "learning_rate": 3.222358507096548e-06, "loss": 8.5254, "step": 178790 }, { "epoch": 0.8929061899173513, "grad_norm": 0.09059741348028183, "learning_rate": 3.2208565921549977e-06, "loss": 8.529, "step": 178800 }, { "epoch": 0.8929561287422907, "grad_norm": 0.0890093445777893, "learning_rate": 3.219354677213447e-06, "loss": 8.5212, "step": 178810 }, { "epoch": 0.8930060675672301, "grad_norm": 0.09546619653701782, "learning_rate": 3.217852762271897e-06, "loss": 8.5247, "step": 178820 }, { "epoch": 0.8930560063921696, "grad_norm": 0.09299060702323914, "learning_rate": 3.216350847330346e-06, "loss": 8.517, "step": 178830 }, { "epoch": 0.8931059452171091, "grad_norm": 0.09568481147289276, "learning_rate": 3.214848932388796e-06, "loss": 8.5188, "step": 178840 }, { "epoch": 0.8931558840420485, "grad_norm": 0.09317091107368469, "learning_rate": 3.2133470174472452e-06, "loss": 8.5156, "step": 178850 }, { "epoch": 0.8932058228669879, "grad_norm": 0.08648492395877838, "learning_rate": 3.211845102505695e-06, "loss": 8.5408, "step": 178860 }, { "epoch": 0.8932557616919274, "grad_norm": 0.09608660638332367, "learning_rate": 3.2103431875641443e-06, "loss": 8.5176, "step": 178870 }, { "epoch": 0.8933057005168669, "grad_norm": 0.09281721711158752, "learning_rate": 3.2088412726225937e-06, "loss": 8.5124, "step": 178880 }, { "epoch": 0.8933556393418063, "grad_norm": 0.08907417207956314, "learning_rate": 3.2073393576810434e-06, "loss": 8.5096, "step": 178890 }, { "epoch": 0.8934055781667457, "grad_norm": 0.09067781269550323, "learning_rate": 3.205837442739493e-06, "loss": 8.5257, "step": 178900 }, { "epoch": 0.8934555169916852, "grad_norm": 0.08892224729061127, "learning_rate": 3.2043355277979425e-06, "loss": 8.5168, "step": 178910 }, { "epoch": 0.8935054558166247, "grad_norm": 0.08958274871110916, "learning_rate": 3.202833612856392e-06, "loss": 8.5153, "step": 178920 }, { "epoch": 0.8935553946415641, "grad_norm": 0.08994346857070923, "learning_rate": 3.201331697914841e-06, "loss": 8.5179, "step": 178930 }, { "epoch": 0.8936053334665035, "grad_norm": 0.09714482724666595, "learning_rate": 3.1998297829732913e-06, "loss": 8.5238, "step": 178940 }, { "epoch": 0.893655272291443, "grad_norm": 0.09184648096561432, "learning_rate": 3.1983278680317407e-06, "loss": 8.5161, "step": 178950 }, { "epoch": 0.8937052111163825, "grad_norm": 0.0893314927816391, "learning_rate": 3.19682595309019e-06, "loss": 8.5033, "step": 178960 }, { "epoch": 0.8937551499413219, "grad_norm": 0.09436094015836716, "learning_rate": 3.1953240381486393e-06, "loss": 8.4939, "step": 178970 }, { "epoch": 0.8938050887662613, "grad_norm": 0.08903580904006958, "learning_rate": 3.193822123207089e-06, "loss": 8.5137, "step": 178980 }, { "epoch": 0.8938550275912007, "grad_norm": 0.089007668197155, "learning_rate": 3.192320208265539e-06, "loss": 8.5193, "step": 178990 }, { "epoch": 0.8939049664161403, "grad_norm": 0.09373140335083008, "learning_rate": 3.190818293323988e-06, "loss": 8.4969, "step": 179000 }, { "epoch": 0.8939549052410797, "grad_norm": 0.08744634687900543, "learning_rate": 3.1893163783824375e-06, "loss": 8.5342, "step": 179010 }, { "epoch": 0.8940048440660191, "grad_norm": 0.0939723551273346, "learning_rate": 3.1878144634408873e-06, "loss": 8.5059, "step": 179020 }, { "epoch": 0.8940547828909585, "grad_norm": 0.09691902250051498, "learning_rate": 3.1863125484993366e-06, "loss": 8.5022, "step": 179030 }, { "epoch": 0.894104721715898, "grad_norm": 0.09357941895723343, "learning_rate": 3.1848106335577864e-06, "loss": 8.5155, "step": 179040 }, { "epoch": 0.8941546605408375, "grad_norm": 0.09528280794620514, "learning_rate": 3.1833087186162357e-06, "loss": 8.5088, "step": 179050 }, { "epoch": 0.8942045993657769, "grad_norm": 0.09074392914772034, "learning_rate": 3.1818068036746854e-06, "loss": 8.4962, "step": 179060 }, { "epoch": 0.8942545381907163, "grad_norm": 0.09400563687086105, "learning_rate": 3.1803048887331348e-06, "loss": 8.5115, "step": 179070 }, { "epoch": 0.8943044770156559, "grad_norm": 0.09290362149477005, "learning_rate": 3.178802973791584e-06, "loss": 8.5178, "step": 179080 }, { "epoch": 0.8943544158405953, "grad_norm": 0.09567950665950775, "learning_rate": 3.177301058850034e-06, "loss": 8.5186, "step": 179090 }, { "epoch": 0.8944043546655347, "grad_norm": 0.09070657938718796, "learning_rate": 3.1757991439084836e-06, "loss": 8.5161, "step": 179100 }, { "epoch": 0.8944542934904741, "grad_norm": 0.08824972063302994, "learning_rate": 3.174297228966933e-06, "loss": 8.5189, "step": 179110 }, { "epoch": 0.8945042323154136, "grad_norm": 0.09949322789907455, "learning_rate": 3.1727953140253823e-06, "loss": 8.5051, "step": 179120 }, { "epoch": 0.8945541711403531, "grad_norm": 0.09183038771152496, "learning_rate": 3.1712933990838316e-06, "loss": 8.5139, "step": 179130 }, { "epoch": 0.8946041099652925, "grad_norm": 0.09553587436676025, "learning_rate": 3.1697914841422818e-06, "loss": 8.4899, "step": 179140 }, { "epoch": 0.8946540487902319, "grad_norm": 0.089163638651371, "learning_rate": 3.168289569200731e-06, "loss": 8.5241, "step": 179150 }, { "epoch": 0.8947039876151714, "grad_norm": 0.09962226450443268, "learning_rate": 3.1667876542591804e-06, "loss": 8.5203, "step": 179160 }, { "epoch": 0.8947539264401109, "grad_norm": 0.09414087980985641, "learning_rate": 3.1652857393176298e-06, "loss": 8.5183, "step": 179170 }, { "epoch": 0.8948038652650503, "grad_norm": 0.09427635371685028, "learning_rate": 3.1637838243760795e-06, "loss": 8.5292, "step": 179180 }, { "epoch": 0.8948538040899897, "grad_norm": 0.08661588281393051, "learning_rate": 3.1622819094345293e-06, "loss": 8.5173, "step": 179190 }, { "epoch": 0.8949037429149292, "grad_norm": 0.09423363953828812, "learning_rate": 3.1607799944929786e-06, "loss": 8.5142, "step": 179200 }, { "epoch": 0.8949536817398687, "grad_norm": 0.08995388448238373, "learning_rate": 3.159278079551428e-06, "loss": 8.529, "step": 179210 }, { "epoch": 0.8950036205648081, "grad_norm": 0.09153537452220917, "learning_rate": 3.1577761646098777e-06, "loss": 8.5241, "step": 179220 }, { "epoch": 0.8950535593897475, "grad_norm": 0.09005282819271088, "learning_rate": 3.156274249668327e-06, "loss": 8.5244, "step": 179230 }, { "epoch": 0.895103498214687, "grad_norm": 0.08991970866918564, "learning_rate": 3.154772334726777e-06, "loss": 8.5312, "step": 179240 }, { "epoch": 0.8951534370396265, "grad_norm": 0.08929390460252762, "learning_rate": 3.153270419785226e-06, "loss": 8.5027, "step": 179250 }, { "epoch": 0.8952033758645659, "grad_norm": 0.08898353576660156, "learning_rate": 3.151768504843676e-06, "loss": 8.5349, "step": 179260 }, { "epoch": 0.8952533146895053, "grad_norm": 0.08719191700220108, "learning_rate": 3.150266589902125e-06, "loss": 8.5089, "step": 179270 }, { "epoch": 0.8953032535144448, "grad_norm": 0.09334076195955276, "learning_rate": 3.1487646749605745e-06, "loss": 8.5229, "step": 179280 }, { "epoch": 0.8953531923393843, "grad_norm": 0.09189531207084656, "learning_rate": 3.1472627600190243e-06, "loss": 8.518, "step": 179290 }, { "epoch": 0.8954031311643237, "grad_norm": 0.08680342137813568, "learning_rate": 3.145760845077474e-06, "loss": 8.5283, "step": 179300 }, { "epoch": 0.8954530699892631, "grad_norm": 0.0863579586148262, "learning_rate": 3.1442589301359234e-06, "loss": 8.5031, "step": 179310 }, { "epoch": 0.8955030088142026, "grad_norm": 0.09513013064861298, "learning_rate": 3.1427570151943727e-06, "loss": 8.5172, "step": 179320 }, { "epoch": 0.8955529476391421, "grad_norm": 0.08853243291378021, "learning_rate": 3.1412551002528225e-06, "loss": 8.5315, "step": 179330 }, { "epoch": 0.8956028864640815, "grad_norm": 0.09000103175640106, "learning_rate": 3.1397531853112722e-06, "loss": 8.5139, "step": 179340 }, { "epoch": 0.8956528252890209, "grad_norm": 0.0949261337518692, "learning_rate": 3.1382512703697216e-06, "loss": 8.5104, "step": 179350 }, { "epoch": 0.8957027641139604, "grad_norm": 0.09614192694425583, "learning_rate": 3.136749355428171e-06, "loss": 8.5061, "step": 179360 }, { "epoch": 0.8957527029388999, "grad_norm": 0.09426548331975937, "learning_rate": 3.1352474404866202e-06, "loss": 8.5072, "step": 179370 }, { "epoch": 0.8958026417638393, "grad_norm": 0.09127119183540344, "learning_rate": 3.1337455255450704e-06, "loss": 8.5092, "step": 179380 }, { "epoch": 0.8958525805887787, "grad_norm": 0.09280092269182205, "learning_rate": 3.1322436106035197e-06, "loss": 8.5003, "step": 179390 }, { "epoch": 0.8959025194137182, "grad_norm": 0.09022334963083267, "learning_rate": 3.130741695661969e-06, "loss": 8.5157, "step": 179400 }, { "epoch": 0.8959524582386577, "grad_norm": 0.08918463438749313, "learning_rate": 3.1292397807204184e-06, "loss": 8.5252, "step": 179410 }, { "epoch": 0.8960023970635971, "grad_norm": 0.09359756857156754, "learning_rate": 3.127737865778868e-06, "loss": 8.5137, "step": 179420 }, { "epoch": 0.8960523358885365, "grad_norm": 0.08726957440376282, "learning_rate": 3.126235950837318e-06, "loss": 8.5027, "step": 179430 }, { "epoch": 0.896102274713476, "grad_norm": 0.08867673575878143, "learning_rate": 3.1247340358957672e-06, "loss": 8.5117, "step": 179440 }, { "epoch": 0.8961522135384155, "grad_norm": 0.0853889063000679, "learning_rate": 3.1232321209542166e-06, "loss": 8.5262, "step": 179450 }, { "epoch": 0.8962021523633549, "grad_norm": 0.08606785535812378, "learning_rate": 3.1217302060126663e-06, "loss": 8.5221, "step": 179460 }, { "epoch": 0.8962520911882943, "grad_norm": 0.08745083957910538, "learning_rate": 3.1202282910711156e-06, "loss": 8.5107, "step": 179470 }, { "epoch": 0.8963020300132338, "grad_norm": 0.09155963361263275, "learning_rate": 3.1187263761295654e-06, "loss": 8.4951, "step": 179480 }, { "epoch": 0.8963519688381733, "grad_norm": 0.09494645148515701, "learning_rate": 3.1172244611880147e-06, "loss": 8.509, "step": 179490 }, { "epoch": 0.8964019076631127, "grad_norm": 0.0906292051076889, "learning_rate": 3.1157225462464645e-06, "loss": 8.5103, "step": 179500 }, { "epoch": 0.8964518464880521, "grad_norm": 0.08988522738218307, "learning_rate": 3.114220631304914e-06, "loss": 8.5154, "step": 179510 }, { "epoch": 0.8965017853129916, "grad_norm": 0.09074413031339645, "learning_rate": 3.112718716363363e-06, "loss": 8.5289, "step": 179520 }, { "epoch": 0.896551724137931, "grad_norm": 0.0895250216126442, "learning_rate": 3.111216801421813e-06, "loss": 8.5203, "step": 179530 }, { "epoch": 0.8966016629628705, "grad_norm": 0.09052935242652893, "learning_rate": 3.1097148864802627e-06, "loss": 8.5309, "step": 179540 }, { "epoch": 0.8966516017878099, "grad_norm": 0.09444441646337509, "learning_rate": 3.108212971538712e-06, "loss": 8.515, "step": 179550 }, { "epoch": 0.8967015406127494, "grad_norm": 0.09313017874956131, "learning_rate": 3.1067110565971613e-06, "loss": 8.5094, "step": 179560 }, { "epoch": 0.8967514794376888, "grad_norm": 0.09455044567584991, "learning_rate": 3.1052091416556107e-06, "loss": 8.5063, "step": 179570 }, { "epoch": 0.8968014182626283, "grad_norm": 0.09298877418041229, "learning_rate": 3.103707226714061e-06, "loss": 8.5096, "step": 179580 }, { "epoch": 0.8968513570875677, "grad_norm": 0.09229998290538788, "learning_rate": 3.10220531177251e-06, "loss": 8.5201, "step": 179590 }, { "epoch": 0.8969012959125072, "grad_norm": 0.08736059069633484, "learning_rate": 3.1007033968309595e-06, "loss": 8.4993, "step": 179600 }, { "epoch": 0.8969512347374466, "grad_norm": 0.09250369668006897, "learning_rate": 3.099201481889409e-06, "loss": 8.494, "step": 179610 }, { "epoch": 0.8970011735623861, "grad_norm": 0.08871186524629593, "learning_rate": 3.0976995669478586e-06, "loss": 8.5137, "step": 179620 }, { "epoch": 0.8970511123873255, "grad_norm": 0.09203743189573288, "learning_rate": 3.0961976520063083e-06, "loss": 8.5138, "step": 179630 }, { "epoch": 0.897101051212265, "grad_norm": 0.09076462686061859, "learning_rate": 3.0946957370647577e-06, "loss": 8.5117, "step": 179640 }, { "epoch": 0.8971509900372044, "grad_norm": 0.09100484848022461, "learning_rate": 3.093193822123207e-06, "loss": 8.5129, "step": 179650 }, { "epoch": 0.8972009288621439, "grad_norm": 0.0954102948307991, "learning_rate": 3.0916919071816568e-06, "loss": 8.4978, "step": 179660 }, { "epoch": 0.8972508676870833, "grad_norm": 0.09269285947084427, "learning_rate": 3.090189992240106e-06, "loss": 8.5116, "step": 179670 }, { "epoch": 0.8973008065120228, "grad_norm": 0.08553628623485565, "learning_rate": 3.088688077298556e-06, "loss": 8.5297, "step": 179680 }, { "epoch": 0.8973507453369622, "grad_norm": 0.08914821594953537, "learning_rate": 3.087186162357005e-06, "loss": 8.5084, "step": 179690 }, { "epoch": 0.8974006841619017, "grad_norm": 0.08601313084363937, "learning_rate": 3.085684247415455e-06, "loss": 8.507, "step": 179700 }, { "epoch": 0.8974506229868411, "grad_norm": 0.09413706511259079, "learning_rate": 3.0841823324739043e-06, "loss": 8.5197, "step": 179710 }, { "epoch": 0.8975005618117806, "grad_norm": 0.09138408303260803, "learning_rate": 3.0826804175323536e-06, "loss": 8.5177, "step": 179720 }, { "epoch": 0.89755050063672, "grad_norm": 0.09180725365877151, "learning_rate": 3.0811785025908033e-06, "loss": 8.5083, "step": 179730 }, { "epoch": 0.8976004394616595, "grad_norm": 0.09291674196720123, "learning_rate": 3.079676587649253e-06, "loss": 8.5193, "step": 179740 }, { "epoch": 0.8976503782865989, "grad_norm": 0.08994516730308533, "learning_rate": 3.0781746727077024e-06, "loss": 8.519, "step": 179750 }, { "epoch": 0.8977003171115384, "grad_norm": 0.09701023995876312, "learning_rate": 3.0766727577661518e-06, "loss": 8.5088, "step": 179760 }, { "epoch": 0.8977502559364778, "grad_norm": 0.0889933779835701, "learning_rate": 3.075170842824601e-06, "loss": 8.5115, "step": 179770 }, { "epoch": 0.8978001947614173, "grad_norm": 0.09396040439605713, "learning_rate": 3.0736689278830513e-06, "loss": 8.5349, "step": 179780 }, { "epoch": 0.8978501335863567, "grad_norm": 0.09631121158599854, "learning_rate": 3.0721670129415006e-06, "loss": 8.4926, "step": 179790 }, { "epoch": 0.8979000724112962, "grad_norm": 0.08764296770095825, "learning_rate": 3.07066509799995e-06, "loss": 8.524, "step": 179800 }, { "epoch": 0.8979500112362356, "grad_norm": 0.09530488401651382, "learning_rate": 3.0691631830583993e-06, "loss": 8.513, "step": 179810 }, { "epoch": 0.8979999500611751, "grad_norm": 0.08981040865182877, "learning_rate": 3.067661268116849e-06, "loss": 8.5024, "step": 179820 }, { "epoch": 0.8980498888861145, "grad_norm": 0.09306181967258453, "learning_rate": 3.0661593531752988e-06, "loss": 8.5005, "step": 179830 }, { "epoch": 0.898099827711054, "grad_norm": 0.09456732124090195, "learning_rate": 3.064657438233748e-06, "loss": 8.5144, "step": 179840 }, { "epoch": 0.8981497665359934, "grad_norm": 0.08984339982271194, "learning_rate": 3.0631555232921974e-06, "loss": 8.5388, "step": 179850 }, { "epoch": 0.8981997053609329, "grad_norm": 0.08446503430604935, "learning_rate": 3.061653608350647e-06, "loss": 8.5152, "step": 179860 }, { "epoch": 0.8982496441858723, "grad_norm": 0.09314480423927307, "learning_rate": 3.0601516934090965e-06, "loss": 8.5108, "step": 179870 }, { "epoch": 0.8982995830108118, "grad_norm": 0.09413187950849533, "learning_rate": 3.0586497784675463e-06, "loss": 8.5027, "step": 179880 }, { "epoch": 0.8983495218357512, "grad_norm": 0.09056896716356277, "learning_rate": 3.0571478635259956e-06, "loss": 8.5067, "step": 179890 }, { "epoch": 0.8983994606606907, "grad_norm": 0.08975179493427277, "learning_rate": 3.0556459485844454e-06, "loss": 8.5157, "step": 179900 }, { "epoch": 0.8984493994856301, "grad_norm": 0.08911077678203583, "learning_rate": 3.0541440336428947e-06, "loss": 8.516, "step": 179910 }, { "epoch": 0.8984993383105696, "grad_norm": 0.09924951940774918, "learning_rate": 3.0526421187013444e-06, "loss": 8.4984, "step": 179920 }, { "epoch": 0.898549277135509, "grad_norm": 0.09128329157829285, "learning_rate": 3.0511402037597938e-06, "loss": 8.5007, "step": 179930 }, { "epoch": 0.8985992159604484, "grad_norm": 0.08956824243068695, "learning_rate": 3.0496382888182435e-06, "loss": 8.5084, "step": 179940 }, { "epoch": 0.8986491547853879, "grad_norm": 0.09009304642677307, "learning_rate": 3.048136373876693e-06, "loss": 8.5131, "step": 179950 }, { "epoch": 0.8986990936103274, "grad_norm": 0.0977887287735939, "learning_rate": 3.046634458935142e-06, "loss": 8.5111, "step": 179960 }, { "epoch": 0.8987490324352668, "grad_norm": 0.08121143281459808, "learning_rate": 3.045132543993592e-06, "loss": 8.512, "step": 179970 }, { "epoch": 0.8987989712602062, "grad_norm": 0.09262192249298096, "learning_rate": 3.0436306290520417e-06, "loss": 8.523, "step": 179980 }, { "epoch": 0.8988489100851457, "grad_norm": 0.09229442477226257, "learning_rate": 3.042128714110491e-06, "loss": 8.5225, "step": 179990 }, { "epoch": 0.8988988489100851, "grad_norm": 0.092495396733284, "learning_rate": 3.0406267991689404e-06, "loss": 8.5128, "step": 180000 }, { "epoch": 0.8989487877350246, "grad_norm": 0.09082264453172684, "learning_rate": 3.0391248842273897e-06, "loss": 8.5133, "step": 180010 }, { "epoch": 0.898998726559964, "grad_norm": 0.09642348438501358, "learning_rate": 3.03762296928584e-06, "loss": 8.4977, "step": 180020 }, { "epoch": 0.8990486653849035, "grad_norm": 0.08731796592473984, "learning_rate": 3.036121054344289e-06, "loss": 8.5145, "step": 180030 }, { "epoch": 0.8990986042098429, "grad_norm": 0.0900919958949089, "learning_rate": 3.0346191394027385e-06, "loss": 8.5136, "step": 180040 }, { "epoch": 0.8991485430347824, "grad_norm": 0.09062129259109497, "learning_rate": 3.033117224461188e-06, "loss": 8.5201, "step": 180050 }, { "epoch": 0.8991984818597218, "grad_norm": 0.09137134999036789, "learning_rate": 3.0316153095196376e-06, "loss": 8.5163, "step": 180060 }, { "epoch": 0.8992484206846613, "grad_norm": 0.0882347971200943, "learning_rate": 3.0301133945780874e-06, "loss": 8.516, "step": 180070 }, { "epoch": 0.8992983595096007, "grad_norm": 0.08691363036632538, "learning_rate": 3.0286114796365367e-06, "loss": 8.5077, "step": 180080 }, { "epoch": 0.8993482983345402, "grad_norm": 0.08639916777610779, "learning_rate": 3.027109564694986e-06, "loss": 8.5125, "step": 180090 }, { "epoch": 0.8993982371594796, "grad_norm": 0.08904005587100983, "learning_rate": 3.025607649753436e-06, "loss": 8.5207, "step": 180100 }, { "epoch": 0.8994481759844191, "grad_norm": 0.09132896363735199, "learning_rate": 3.024105734811885e-06, "loss": 8.5062, "step": 180110 }, { "epoch": 0.8994981148093585, "grad_norm": 0.08482132852077484, "learning_rate": 3.022603819870335e-06, "loss": 8.5185, "step": 180120 }, { "epoch": 0.899548053634298, "grad_norm": 0.08881242573261261, "learning_rate": 3.0211019049287842e-06, "loss": 8.5312, "step": 180130 }, { "epoch": 0.8995979924592374, "grad_norm": 0.08985382318496704, "learning_rate": 3.019599989987234e-06, "loss": 8.5021, "step": 180140 }, { "epoch": 0.8996479312841769, "grad_norm": 0.08999388664960861, "learning_rate": 3.0180980750456833e-06, "loss": 8.4929, "step": 180150 }, { "epoch": 0.8996978701091163, "grad_norm": 0.092741958796978, "learning_rate": 3.0165961601041326e-06, "loss": 8.5206, "step": 180160 }, { "epoch": 0.8997478089340558, "grad_norm": 0.08737698942422867, "learning_rate": 3.0150942451625824e-06, "loss": 8.516, "step": 180170 }, { "epoch": 0.8997977477589952, "grad_norm": 0.09328185021877289, "learning_rate": 3.013592330221032e-06, "loss": 8.5143, "step": 180180 }, { "epoch": 0.8998476865839347, "grad_norm": 0.09485041350126266, "learning_rate": 3.0120904152794815e-06, "loss": 8.522, "step": 180190 }, { "epoch": 0.8998976254088741, "grad_norm": 0.0960816740989685, "learning_rate": 3.010588500337931e-06, "loss": 8.5241, "step": 180200 }, { "epoch": 0.8999475642338136, "grad_norm": 0.08661402761936188, "learning_rate": 3.00908658539638e-06, "loss": 8.5088, "step": 180210 }, { "epoch": 0.899997503058753, "grad_norm": 0.08585493266582489, "learning_rate": 3.0075846704548303e-06, "loss": 8.5145, "step": 180220 }, { "epoch": 0.9000474418836925, "grad_norm": 0.09099698811769485, "learning_rate": 3.0060827555132796e-06, "loss": 8.5141, "step": 180230 }, { "epoch": 0.9000973807086319, "grad_norm": 0.0931340754032135, "learning_rate": 3.004580840571729e-06, "loss": 8.4905, "step": 180240 }, { "epoch": 0.9001473195335714, "grad_norm": 0.08374540507793427, "learning_rate": 3.0030789256301783e-06, "loss": 8.5174, "step": 180250 }, { "epoch": 0.9001972583585108, "grad_norm": 0.09065917134284973, "learning_rate": 3.001577010688628e-06, "loss": 8.5069, "step": 180260 }, { "epoch": 0.9002471971834503, "grad_norm": 0.09299372136592865, "learning_rate": 3.000075095747078e-06, "loss": 8.5176, "step": 180270 }, { "epoch": 0.9002971360083897, "grad_norm": 0.08948089927434921, "learning_rate": 2.998573180805527e-06, "loss": 8.4984, "step": 180280 }, { "epoch": 0.9003470748333292, "grad_norm": 0.0947575643658638, "learning_rate": 2.9970712658639765e-06, "loss": 8.5057, "step": 180290 }, { "epoch": 0.9003970136582686, "grad_norm": 0.09377171099185944, "learning_rate": 2.9955693509224262e-06, "loss": 8.5169, "step": 180300 }, { "epoch": 0.900446952483208, "grad_norm": 0.08988793194293976, "learning_rate": 2.9940674359808756e-06, "loss": 8.4968, "step": 180310 }, { "epoch": 0.9004968913081475, "grad_norm": 0.09155865758657455, "learning_rate": 2.9925655210393253e-06, "loss": 8.5183, "step": 180320 }, { "epoch": 0.900546830133087, "grad_norm": 0.09261929988861084, "learning_rate": 2.9910636060977747e-06, "loss": 8.4968, "step": 180330 }, { "epoch": 0.9005967689580264, "grad_norm": 0.09239667654037476, "learning_rate": 2.9895616911562244e-06, "loss": 8.5143, "step": 180340 }, { "epoch": 0.9006467077829658, "grad_norm": 0.09450769424438477, "learning_rate": 2.9880597762146737e-06, "loss": 8.504, "step": 180350 }, { "epoch": 0.9006966466079053, "grad_norm": 0.0904548168182373, "learning_rate": 2.986557861273123e-06, "loss": 8.5121, "step": 180360 }, { "epoch": 0.9007465854328448, "grad_norm": 0.09444373100996017, "learning_rate": 2.985055946331573e-06, "loss": 8.5214, "step": 180370 }, { "epoch": 0.9007965242577842, "grad_norm": 0.09165211021900177, "learning_rate": 2.9835540313900226e-06, "loss": 8.5255, "step": 180380 }, { "epoch": 0.9008464630827236, "grad_norm": 0.09335034340620041, "learning_rate": 2.982052116448472e-06, "loss": 8.4975, "step": 180390 }, { "epoch": 0.9008964019076631, "grad_norm": 0.09302797168493271, "learning_rate": 2.9805502015069212e-06, "loss": 8.5096, "step": 180400 }, { "epoch": 0.9009463407326026, "grad_norm": 0.09198260307312012, "learning_rate": 2.9790482865653706e-06, "loss": 8.4975, "step": 180410 }, { "epoch": 0.900996279557542, "grad_norm": 0.08965457230806351, "learning_rate": 2.9775463716238208e-06, "loss": 8.5025, "step": 180420 }, { "epoch": 0.9010462183824814, "grad_norm": 0.08751804381608963, "learning_rate": 2.97604445668227e-06, "loss": 8.4958, "step": 180430 }, { "epoch": 0.9010961572074209, "grad_norm": 0.09645684063434601, "learning_rate": 2.9745425417407194e-06, "loss": 8.5389, "step": 180440 }, { "epoch": 0.9011460960323604, "grad_norm": 0.09055258333683014, "learning_rate": 2.9730406267991687e-06, "loss": 8.501, "step": 180450 }, { "epoch": 0.9011960348572998, "grad_norm": 0.0941043570637703, "learning_rate": 2.971538711857619e-06, "loss": 8.5124, "step": 180460 }, { "epoch": 0.9012459736822392, "grad_norm": 0.09053096920251846, "learning_rate": 2.9700367969160683e-06, "loss": 8.4932, "step": 180470 }, { "epoch": 0.9012959125071787, "grad_norm": 0.08705537766218185, "learning_rate": 2.9685348819745176e-06, "loss": 8.5147, "step": 180480 }, { "epoch": 0.9013458513321182, "grad_norm": 0.09120982885360718, "learning_rate": 2.967032967032967e-06, "loss": 8.536, "step": 180490 }, { "epoch": 0.9013957901570576, "grad_norm": 0.09626072645187378, "learning_rate": 2.9655310520914167e-06, "loss": 8.5223, "step": 180500 }, { "epoch": 0.901445728981997, "grad_norm": 0.09363730996847153, "learning_rate": 2.9640291371498664e-06, "loss": 8.5234, "step": 180510 }, { "epoch": 0.9014956678069365, "grad_norm": 0.09183649718761444, "learning_rate": 2.9625272222083158e-06, "loss": 8.5249, "step": 180520 }, { "epoch": 0.901545606631876, "grad_norm": 0.08634661138057709, "learning_rate": 2.961025307266765e-06, "loss": 8.514, "step": 180530 }, { "epoch": 0.9015955454568154, "grad_norm": 0.08858415484428406, "learning_rate": 2.959523392325215e-06, "loss": 8.5206, "step": 180540 }, { "epoch": 0.9016454842817548, "grad_norm": 0.08977922797203064, "learning_rate": 2.958021477383664e-06, "loss": 8.5043, "step": 180550 }, { "epoch": 0.9016954231066943, "grad_norm": 0.0880313590168953, "learning_rate": 2.956519562442114e-06, "loss": 8.5144, "step": 180560 }, { "epoch": 0.9017453619316338, "grad_norm": 0.08856451511383057, "learning_rate": 2.9550176475005633e-06, "loss": 8.5266, "step": 180570 }, { "epoch": 0.9017953007565732, "grad_norm": 0.09039363265037537, "learning_rate": 2.953515732559013e-06, "loss": 8.5042, "step": 180580 }, { "epoch": 0.9018452395815126, "grad_norm": 0.08735457062721252, "learning_rate": 2.9520138176174624e-06, "loss": 8.5157, "step": 180590 }, { "epoch": 0.9018951784064521, "grad_norm": 0.0854557529091835, "learning_rate": 2.9505119026759117e-06, "loss": 8.514, "step": 180600 }, { "epoch": 0.9019451172313916, "grad_norm": 0.086369588971138, "learning_rate": 2.9490099877343614e-06, "loss": 8.5198, "step": 180610 }, { "epoch": 0.901995056056331, "grad_norm": 0.096776083111763, "learning_rate": 2.947508072792811e-06, "loss": 8.4991, "step": 180620 }, { "epoch": 0.9020449948812704, "grad_norm": 0.08912768959999084, "learning_rate": 2.9460061578512605e-06, "loss": 8.5191, "step": 180630 }, { "epoch": 0.9020949337062099, "grad_norm": 0.09278588742017746, "learning_rate": 2.94450424290971e-06, "loss": 8.5229, "step": 180640 }, { "epoch": 0.9021448725311494, "grad_norm": 0.09282492101192474, "learning_rate": 2.943002327968159e-06, "loss": 8.5159, "step": 180650 }, { "epoch": 0.9021948113560888, "grad_norm": 0.08819557726383209, "learning_rate": 2.9415004130266094e-06, "loss": 8.5337, "step": 180660 }, { "epoch": 0.9022447501810282, "grad_norm": 0.08956322073936462, "learning_rate": 2.9399984980850587e-06, "loss": 8.5141, "step": 180670 }, { "epoch": 0.9022946890059677, "grad_norm": 0.0941721498966217, "learning_rate": 2.938496583143508e-06, "loss": 8.5227, "step": 180680 }, { "epoch": 0.9023446278309072, "grad_norm": 0.09089160710573196, "learning_rate": 2.9369946682019574e-06, "loss": 8.5036, "step": 180690 }, { "epoch": 0.9023945666558466, "grad_norm": 0.09959226101636887, "learning_rate": 2.935492753260407e-06, "loss": 8.4941, "step": 180700 }, { "epoch": 0.902444505480786, "grad_norm": 0.0919126570224762, "learning_rate": 2.933990838318857e-06, "loss": 8.4893, "step": 180710 }, { "epoch": 0.9024944443057255, "grad_norm": 0.0832803025841713, "learning_rate": 2.932488923377306e-06, "loss": 8.5169, "step": 180720 }, { "epoch": 0.902544383130665, "grad_norm": 0.09311427175998688, "learning_rate": 2.9309870084357555e-06, "loss": 8.4991, "step": 180730 }, { "epoch": 0.9025943219556044, "grad_norm": 0.09170939028263092, "learning_rate": 2.9294850934942053e-06, "loss": 8.4904, "step": 180740 }, { "epoch": 0.9026442607805438, "grad_norm": 0.09490250796079636, "learning_rate": 2.9279831785526546e-06, "loss": 8.5021, "step": 180750 }, { "epoch": 0.9026941996054832, "grad_norm": 0.09353826195001602, "learning_rate": 2.9264812636111044e-06, "loss": 8.517, "step": 180760 }, { "epoch": 0.9027441384304228, "grad_norm": 0.09115730971097946, "learning_rate": 2.9249793486695537e-06, "loss": 8.4986, "step": 180770 }, { "epoch": 0.9027940772553622, "grad_norm": 0.09605268388986588, "learning_rate": 2.9234774337280035e-06, "loss": 8.5088, "step": 180780 }, { "epoch": 0.9028440160803016, "grad_norm": 0.09390175342559814, "learning_rate": 2.921975518786453e-06, "loss": 8.5063, "step": 180790 }, { "epoch": 0.902893954905241, "grad_norm": 0.09228114783763885, "learning_rate": 2.920473603844902e-06, "loss": 8.5095, "step": 180800 }, { "epoch": 0.9029438937301806, "grad_norm": 0.09767407178878784, "learning_rate": 2.918971688903352e-06, "loss": 8.5066, "step": 180810 }, { "epoch": 0.90299383255512, "grad_norm": 0.08856954425573349, "learning_rate": 2.9174697739618016e-06, "loss": 8.5368, "step": 180820 }, { "epoch": 0.9030437713800594, "grad_norm": 0.09355857968330383, "learning_rate": 2.915967859020251e-06, "loss": 8.5208, "step": 180830 }, { "epoch": 0.9030937102049988, "grad_norm": 0.08886077255010605, "learning_rate": 2.9144659440787003e-06, "loss": 8.5253, "step": 180840 }, { "epoch": 0.9031436490299384, "grad_norm": 0.09166674315929413, "learning_rate": 2.9129640291371496e-06, "loss": 8.5249, "step": 180850 }, { "epoch": 0.9031935878548778, "grad_norm": 0.08610928058624268, "learning_rate": 2.9114621141956e-06, "loss": 8.5264, "step": 180860 }, { "epoch": 0.9032435266798172, "grad_norm": 0.08989940583705902, "learning_rate": 2.909960199254049e-06, "loss": 8.5241, "step": 180870 }, { "epoch": 0.9032934655047566, "grad_norm": 0.09518139809370041, "learning_rate": 2.9084582843124985e-06, "loss": 8.5228, "step": 180880 }, { "epoch": 0.9033434043296962, "grad_norm": 0.09098067134618759, "learning_rate": 2.906956369370948e-06, "loss": 8.5047, "step": 180890 }, { "epoch": 0.9033933431546356, "grad_norm": 0.0919727236032486, "learning_rate": 2.9054544544293976e-06, "loss": 8.5145, "step": 180900 }, { "epoch": 0.903443281979575, "grad_norm": 0.09157610684633255, "learning_rate": 2.9039525394878473e-06, "loss": 8.4891, "step": 180910 }, { "epoch": 0.9034932208045144, "grad_norm": 0.09556107968091965, "learning_rate": 2.9024506245462966e-06, "loss": 8.5144, "step": 180920 }, { "epoch": 0.903543159629454, "grad_norm": 0.08390878885984421, "learning_rate": 2.900948709604746e-06, "loss": 8.5181, "step": 180930 }, { "epoch": 0.9035930984543934, "grad_norm": 0.09191958606243134, "learning_rate": 2.8994467946631957e-06, "loss": 8.5095, "step": 180940 }, { "epoch": 0.9036430372793328, "grad_norm": 0.09150879085063934, "learning_rate": 2.897944879721645e-06, "loss": 8.5215, "step": 180950 }, { "epoch": 0.9036929761042722, "grad_norm": 0.0931938886642456, "learning_rate": 2.896442964780095e-06, "loss": 8.4941, "step": 180960 }, { "epoch": 0.9037429149292117, "grad_norm": 0.09147433936595917, "learning_rate": 2.894941049838544e-06, "loss": 8.5119, "step": 180970 }, { "epoch": 0.9037928537541512, "grad_norm": 0.09065255522727966, "learning_rate": 2.893439134896994e-06, "loss": 8.5166, "step": 180980 }, { "epoch": 0.9038427925790906, "grad_norm": 0.0912211537361145, "learning_rate": 2.8919372199554432e-06, "loss": 8.4907, "step": 180990 }, { "epoch": 0.90389273140403, "grad_norm": 0.09441815316677094, "learning_rate": 2.890435305013893e-06, "loss": 8.5097, "step": 181000 }, { "epoch": 0.9039426702289695, "grad_norm": 0.08536705374717712, "learning_rate": 2.8889333900723423e-06, "loss": 8.5144, "step": 181010 }, { "epoch": 0.903992609053909, "grad_norm": 0.08860597014427185, "learning_rate": 2.887431475130792e-06, "loss": 8.5068, "step": 181020 }, { "epoch": 0.9040425478788484, "grad_norm": 0.09018053114414215, "learning_rate": 2.8859295601892414e-06, "loss": 8.5083, "step": 181030 }, { "epoch": 0.9040924867037878, "grad_norm": 0.08966843783855438, "learning_rate": 2.8844276452476907e-06, "loss": 8.4964, "step": 181040 }, { "epoch": 0.9041424255287273, "grad_norm": 0.08810800313949585, "learning_rate": 2.8829257303061405e-06, "loss": 8.5215, "step": 181050 }, { "epoch": 0.9041923643536668, "grad_norm": 0.0902119129896164, "learning_rate": 2.8814238153645902e-06, "loss": 8.5246, "step": 181060 }, { "epoch": 0.9042423031786062, "grad_norm": 0.09436272829771042, "learning_rate": 2.8799219004230396e-06, "loss": 8.4939, "step": 181070 }, { "epoch": 0.9042922420035456, "grad_norm": 0.09024659544229507, "learning_rate": 2.878419985481489e-06, "loss": 8.5095, "step": 181080 }, { "epoch": 0.904342180828485, "grad_norm": 0.089921735227108, "learning_rate": 2.8769180705399382e-06, "loss": 8.5008, "step": 181090 }, { "epoch": 0.9043921196534246, "grad_norm": 0.08670993894338608, "learning_rate": 2.8754161555983884e-06, "loss": 8.5017, "step": 181100 }, { "epoch": 0.904442058478364, "grad_norm": 0.09145168215036392, "learning_rate": 2.8739142406568377e-06, "loss": 8.5277, "step": 181110 }, { "epoch": 0.9044919973033034, "grad_norm": 0.09542462974786758, "learning_rate": 2.872412325715287e-06, "loss": 8.5008, "step": 181120 }, { "epoch": 0.9045419361282429, "grad_norm": 0.09122487902641296, "learning_rate": 2.8709104107737364e-06, "loss": 8.5092, "step": 181130 }, { "epoch": 0.9045918749531824, "grad_norm": 0.08779449760913849, "learning_rate": 2.869408495832186e-06, "loss": 8.4968, "step": 181140 }, { "epoch": 0.9046418137781218, "grad_norm": 0.08945740014314651, "learning_rate": 2.867906580890636e-06, "loss": 8.5206, "step": 181150 }, { "epoch": 0.9046917526030612, "grad_norm": 0.08815411478281021, "learning_rate": 2.8664046659490852e-06, "loss": 8.512, "step": 181160 }, { "epoch": 0.9047416914280006, "grad_norm": 0.09432817250490189, "learning_rate": 2.8649027510075346e-06, "loss": 8.5159, "step": 181170 }, { "epoch": 0.9047916302529402, "grad_norm": 0.09125339239835739, "learning_rate": 2.8634008360659843e-06, "loss": 8.5162, "step": 181180 }, { "epoch": 0.9048415690778796, "grad_norm": 0.08769851177930832, "learning_rate": 2.8618989211244337e-06, "loss": 8.5112, "step": 181190 }, { "epoch": 0.904891507902819, "grad_norm": 0.09156162291765213, "learning_rate": 2.8603970061828834e-06, "loss": 8.5177, "step": 181200 }, { "epoch": 0.9049414467277584, "grad_norm": 0.095248281955719, "learning_rate": 2.8588950912413328e-06, "loss": 8.4942, "step": 181210 }, { "epoch": 0.904991385552698, "grad_norm": 0.09011649340391159, "learning_rate": 2.8573931762997825e-06, "loss": 8.5229, "step": 181220 }, { "epoch": 0.9050413243776374, "grad_norm": 0.08675964921712875, "learning_rate": 2.855891261358232e-06, "loss": 8.5231, "step": 181230 }, { "epoch": 0.9050912632025768, "grad_norm": 0.0876312181353569, "learning_rate": 2.854389346416681e-06, "loss": 8.5145, "step": 181240 }, { "epoch": 0.9051412020275162, "grad_norm": 0.0970059260725975, "learning_rate": 2.852887431475131e-06, "loss": 8.5047, "step": 181250 }, { "epoch": 0.9051911408524558, "grad_norm": 0.0930013656616211, "learning_rate": 2.8513855165335807e-06, "loss": 8.4996, "step": 181260 }, { "epoch": 0.9052410796773952, "grad_norm": 0.09447172284126282, "learning_rate": 2.84988360159203e-06, "loss": 8.5009, "step": 181270 }, { "epoch": 0.9052910185023346, "grad_norm": 0.0933113843202591, "learning_rate": 2.8483816866504793e-06, "loss": 8.4953, "step": 181280 }, { "epoch": 0.905340957327274, "grad_norm": 0.09435321390628815, "learning_rate": 2.8468797717089287e-06, "loss": 8.4999, "step": 181290 }, { "epoch": 0.9053908961522136, "grad_norm": 0.08828049153089523, "learning_rate": 2.8453778567673784e-06, "loss": 8.5145, "step": 181300 }, { "epoch": 0.905440834977153, "grad_norm": 0.08913722634315491, "learning_rate": 2.843875941825828e-06, "loss": 8.4944, "step": 181310 }, { "epoch": 0.9054907738020924, "grad_norm": 0.09632635116577148, "learning_rate": 2.8423740268842775e-06, "loss": 8.493, "step": 181320 }, { "epoch": 0.9055407126270318, "grad_norm": 0.0897509828209877, "learning_rate": 2.840872111942727e-06, "loss": 8.4977, "step": 181330 }, { "epoch": 0.9055906514519714, "grad_norm": 0.09208657592535019, "learning_rate": 2.839370197001176e-06, "loss": 8.5314, "step": 181340 }, { "epoch": 0.9056405902769108, "grad_norm": 0.09006085991859436, "learning_rate": 2.8378682820596264e-06, "loss": 8.5163, "step": 181350 }, { "epoch": 0.9056905291018502, "grad_norm": 0.09176279604434967, "learning_rate": 2.8363663671180757e-06, "loss": 8.5148, "step": 181360 }, { "epoch": 0.9057404679267896, "grad_norm": 0.09576241672039032, "learning_rate": 2.834864452176525e-06, "loss": 8.5346, "step": 181370 }, { "epoch": 0.9057904067517292, "grad_norm": 0.09277977049350739, "learning_rate": 2.8333625372349744e-06, "loss": 8.5067, "step": 181380 }, { "epoch": 0.9058403455766686, "grad_norm": 0.0882072001695633, "learning_rate": 2.831860622293424e-06, "loss": 8.5046, "step": 181390 }, { "epoch": 0.905890284401608, "grad_norm": 0.08874956518411636, "learning_rate": 2.830358707351874e-06, "loss": 8.5382, "step": 181400 }, { "epoch": 0.9059402232265474, "grad_norm": 0.08898142725229263, "learning_rate": 2.828856792410323e-06, "loss": 8.5067, "step": 181410 }, { "epoch": 0.905990162051487, "grad_norm": 0.08590088039636612, "learning_rate": 2.8273548774687725e-06, "loss": 8.5023, "step": 181420 }, { "epoch": 0.9060401008764264, "grad_norm": 0.08650851249694824, "learning_rate": 2.8258529625272223e-06, "loss": 8.5202, "step": 181430 }, { "epoch": 0.9060900397013658, "grad_norm": 0.09431696683168411, "learning_rate": 2.8243510475856716e-06, "loss": 8.5401, "step": 181440 }, { "epoch": 0.9061399785263052, "grad_norm": 0.09654943645000458, "learning_rate": 2.8228491326441214e-06, "loss": 8.5078, "step": 181450 }, { "epoch": 0.9061899173512448, "grad_norm": 0.08821980655193329, "learning_rate": 2.8213472177025707e-06, "loss": 8.4975, "step": 181460 }, { "epoch": 0.9062398561761842, "grad_norm": 0.0911247506737709, "learning_rate": 2.8198453027610204e-06, "loss": 8.5129, "step": 181470 }, { "epoch": 0.9062897950011236, "grad_norm": 0.08649583905935287, "learning_rate": 2.8183433878194698e-06, "loss": 8.5025, "step": 181480 }, { "epoch": 0.906339733826063, "grad_norm": 0.09082172811031342, "learning_rate": 2.816841472877919e-06, "loss": 8.5101, "step": 181490 }, { "epoch": 0.9063896726510026, "grad_norm": 0.09340551495552063, "learning_rate": 2.815339557936369e-06, "loss": 8.5192, "step": 181500 }, { "epoch": 0.906439611475942, "grad_norm": 0.09474127739667892, "learning_rate": 2.8138376429948186e-06, "loss": 8.5036, "step": 181510 }, { "epoch": 0.9064895503008814, "grad_norm": 0.08885298669338226, "learning_rate": 2.812335728053268e-06, "loss": 8.4964, "step": 181520 }, { "epoch": 0.9065394891258208, "grad_norm": 0.09157950431108475, "learning_rate": 2.8108338131117173e-06, "loss": 8.4965, "step": 181530 }, { "epoch": 0.9065894279507604, "grad_norm": 0.08861027657985687, "learning_rate": 2.809331898170167e-06, "loss": 8.518, "step": 181540 }, { "epoch": 0.9066393667756998, "grad_norm": 0.0883537232875824, "learning_rate": 2.807829983228617e-06, "loss": 8.5024, "step": 181550 }, { "epoch": 0.9066893056006392, "grad_norm": 0.0927857756614685, "learning_rate": 2.806328068287066e-06, "loss": 8.4976, "step": 181560 }, { "epoch": 0.9067392444255786, "grad_norm": 0.09182663261890411, "learning_rate": 2.8048261533455155e-06, "loss": 8.4891, "step": 181570 }, { "epoch": 0.9067891832505182, "grad_norm": 0.09595274925231934, "learning_rate": 2.803324238403965e-06, "loss": 8.5132, "step": 181580 }, { "epoch": 0.9068391220754576, "grad_norm": 0.09273464232683182, "learning_rate": 2.801822323462415e-06, "loss": 8.5065, "step": 181590 }, { "epoch": 0.906889060900397, "grad_norm": 0.09522940963506699, "learning_rate": 2.8003204085208643e-06, "loss": 8.5137, "step": 181600 }, { "epoch": 0.9069389997253364, "grad_norm": 0.08875519037246704, "learning_rate": 2.7988184935793136e-06, "loss": 8.5101, "step": 181610 }, { "epoch": 0.906988938550276, "grad_norm": 0.09621170908212662, "learning_rate": 2.797316578637763e-06, "loss": 8.5001, "step": 181620 }, { "epoch": 0.9070388773752154, "grad_norm": 0.09134541451931, "learning_rate": 2.7958146636962127e-06, "loss": 8.5145, "step": 181630 }, { "epoch": 0.9070888162001548, "grad_norm": 0.09547372162342072, "learning_rate": 2.7943127487546625e-06, "loss": 8.4756, "step": 181640 }, { "epoch": 0.9071387550250942, "grad_norm": 0.09366821497678757, "learning_rate": 2.792810833813112e-06, "loss": 8.5362, "step": 181650 }, { "epoch": 0.9071886938500338, "grad_norm": 0.09829097241163254, "learning_rate": 2.791308918871561e-06, "loss": 8.5254, "step": 181660 }, { "epoch": 0.9072386326749732, "grad_norm": 0.09187762439250946, "learning_rate": 2.789807003930011e-06, "loss": 8.5192, "step": 181670 }, { "epoch": 0.9072885714999126, "grad_norm": 0.09273143857717514, "learning_rate": 2.7883050889884602e-06, "loss": 8.4973, "step": 181680 }, { "epoch": 0.907338510324852, "grad_norm": 0.09177732467651367, "learning_rate": 2.78680317404691e-06, "loss": 8.5055, "step": 181690 }, { "epoch": 0.9073884491497916, "grad_norm": 0.089385986328125, "learning_rate": 2.7853012591053593e-06, "loss": 8.5095, "step": 181700 }, { "epoch": 0.907438387974731, "grad_norm": 0.09638892114162445, "learning_rate": 2.783799344163809e-06, "loss": 8.4961, "step": 181710 }, { "epoch": 0.9074883267996704, "grad_norm": 0.08958081156015396, "learning_rate": 2.7822974292222584e-06, "loss": 8.5128, "step": 181720 }, { "epoch": 0.9075382656246098, "grad_norm": 0.09109731018543243, "learning_rate": 2.7807955142807077e-06, "loss": 8.5253, "step": 181730 }, { "epoch": 0.9075882044495494, "grad_norm": 0.08715473860502243, "learning_rate": 2.7792935993391575e-06, "loss": 8.5185, "step": 181740 }, { "epoch": 0.9076381432744888, "grad_norm": 0.09187250584363937, "learning_rate": 2.7777916843976072e-06, "loss": 8.5065, "step": 181750 }, { "epoch": 0.9076880820994282, "grad_norm": 0.09143325686454773, "learning_rate": 2.7762897694560566e-06, "loss": 8.4969, "step": 181760 }, { "epoch": 0.9077380209243676, "grad_norm": 0.08711639046669006, "learning_rate": 2.774787854514506e-06, "loss": 8.5134, "step": 181770 }, { "epoch": 0.9077879597493071, "grad_norm": 0.09443769603967667, "learning_rate": 2.7732859395729552e-06, "loss": 8.5037, "step": 181780 }, { "epoch": 0.9078378985742466, "grad_norm": 0.09021743386983871, "learning_rate": 2.7717840246314054e-06, "loss": 8.4901, "step": 181790 }, { "epoch": 0.907887837399186, "grad_norm": 0.08807482570409775, "learning_rate": 2.7702821096898547e-06, "loss": 8.529, "step": 181800 }, { "epoch": 0.9079377762241254, "grad_norm": 0.09367617219686508, "learning_rate": 2.768780194748304e-06, "loss": 8.534, "step": 181810 }, { "epoch": 0.907987715049065, "grad_norm": 0.09450183063745499, "learning_rate": 2.7672782798067534e-06, "loss": 8.5267, "step": 181820 }, { "epoch": 0.9080376538740044, "grad_norm": 0.09131249040365219, "learning_rate": 2.765776364865203e-06, "loss": 8.5342, "step": 181830 }, { "epoch": 0.9080875926989438, "grad_norm": 0.09016092121601105, "learning_rate": 2.764274449923653e-06, "loss": 8.5188, "step": 181840 }, { "epoch": 0.9081375315238832, "grad_norm": 0.09213612228631973, "learning_rate": 2.7627725349821022e-06, "loss": 8.5291, "step": 181850 }, { "epoch": 0.9081874703488227, "grad_norm": 0.09062368422746658, "learning_rate": 2.7612706200405516e-06, "loss": 8.5298, "step": 181860 }, { "epoch": 0.9082374091737622, "grad_norm": 0.08791705220937729, "learning_rate": 2.7597687050990013e-06, "loss": 8.5115, "step": 181870 }, { "epoch": 0.9082873479987016, "grad_norm": 0.09510977566242218, "learning_rate": 2.7582667901574507e-06, "loss": 8.5174, "step": 181880 }, { "epoch": 0.908337286823641, "grad_norm": 0.0934426486492157, "learning_rate": 2.7567648752159004e-06, "loss": 8.5261, "step": 181890 }, { "epoch": 0.9083872256485805, "grad_norm": 0.09243565052747726, "learning_rate": 2.7552629602743497e-06, "loss": 8.5155, "step": 181900 }, { "epoch": 0.90843716447352, "grad_norm": 0.09363988786935806, "learning_rate": 2.7537610453327995e-06, "loss": 8.5313, "step": 181910 }, { "epoch": 0.9084871032984594, "grad_norm": 0.09643617272377014, "learning_rate": 2.752259130391249e-06, "loss": 8.5019, "step": 181920 }, { "epoch": 0.9085370421233988, "grad_norm": 0.09652823954820633, "learning_rate": 2.750757215449698e-06, "loss": 8.5114, "step": 181930 }, { "epoch": 0.9085869809483383, "grad_norm": 0.0902838185429573, "learning_rate": 2.749255300508148e-06, "loss": 8.5336, "step": 181940 }, { "epoch": 0.9086369197732778, "grad_norm": 0.0898505225777626, "learning_rate": 2.7477533855665977e-06, "loss": 8.5068, "step": 181950 }, { "epoch": 0.9086868585982172, "grad_norm": 0.0900585874915123, "learning_rate": 2.746251470625047e-06, "loss": 8.4995, "step": 181960 }, { "epoch": 0.9087367974231566, "grad_norm": 0.09357380867004395, "learning_rate": 2.7447495556834963e-06, "loss": 8.5153, "step": 181970 }, { "epoch": 0.908786736248096, "grad_norm": 0.08812058717012405, "learning_rate": 2.7432476407419457e-06, "loss": 8.5055, "step": 181980 }, { "epoch": 0.9088366750730356, "grad_norm": 0.08812303096055984, "learning_rate": 2.741745725800396e-06, "loss": 8.5064, "step": 181990 }, { "epoch": 0.908886613897975, "grad_norm": 0.09171146154403687, "learning_rate": 2.740243810858845e-06, "loss": 8.5135, "step": 182000 }, { "epoch": 0.9089365527229144, "grad_norm": 0.09255418926477432, "learning_rate": 2.7387418959172945e-06, "loss": 8.5029, "step": 182010 }, { "epoch": 0.9089864915478538, "grad_norm": 0.08891849964857101, "learning_rate": 2.737239980975744e-06, "loss": 8.5038, "step": 182020 }, { "epoch": 0.9090364303727934, "grad_norm": 0.09265652298927307, "learning_rate": 2.7357380660341936e-06, "loss": 8.5129, "step": 182030 }, { "epoch": 0.9090863691977328, "grad_norm": 0.08749819546937943, "learning_rate": 2.7342361510926433e-06, "loss": 8.5183, "step": 182040 }, { "epoch": 0.9091363080226722, "grad_norm": 0.08859850466251373, "learning_rate": 2.7327342361510927e-06, "loss": 8.5137, "step": 182050 }, { "epoch": 0.9091862468476116, "grad_norm": 0.09206672012805939, "learning_rate": 2.731232321209542e-06, "loss": 8.5156, "step": 182060 }, { "epoch": 0.9092361856725512, "grad_norm": 0.08951518684625626, "learning_rate": 2.7297304062679918e-06, "loss": 8.4979, "step": 182070 }, { "epoch": 0.9092861244974906, "grad_norm": 0.09024099260568619, "learning_rate": 2.7282284913264415e-06, "loss": 8.5223, "step": 182080 }, { "epoch": 0.90933606332243, "grad_norm": 0.08952950686216354, "learning_rate": 2.726726576384891e-06, "loss": 8.4998, "step": 182090 }, { "epoch": 0.9093860021473694, "grad_norm": 0.09278613328933716, "learning_rate": 2.72522466144334e-06, "loss": 8.5078, "step": 182100 }, { "epoch": 0.909435940972309, "grad_norm": 0.08384092897176743, "learning_rate": 2.72372274650179e-06, "loss": 8.5027, "step": 182110 }, { "epoch": 0.9094858797972484, "grad_norm": 0.08967365324497223, "learning_rate": 2.7222208315602393e-06, "loss": 8.5171, "step": 182120 }, { "epoch": 0.9095358186221878, "grad_norm": 0.09536395221948624, "learning_rate": 2.720718916618689e-06, "loss": 8.5055, "step": 182130 }, { "epoch": 0.9095857574471272, "grad_norm": 0.09401460736989975, "learning_rate": 2.7192170016771384e-06, "loss": 8.5148, "step": 182140 }, { "epoch": 0.9096356962720668, "grad_norm": 0.0905858501791954, "learning_rate": 2.717715086735588e-06, "loss": 8.5112, "step": 182150 }, { "epoch": 0.9096856350970062, "grad_norm": 0.09035810828208923, "learning_rate": 2.7162131717940374e-06, "loss": 8.4981, "step": 182160 }, { "epoch": 0.9097355739219456, "grad_norm": 0.09156506508588791, "learning_rate": 2.7147112568524868e-06, "loss": 8.5072, "step": 182170 }, { "epoch": 0.909785512746885, "grad_norm": 0.09411493688821793, "learning_rate": 2.7132093419109365e-06, "loss": 8.5061, "step": 182180 }, { "epoch": 0.9098354515718245, "grad_norm": 0.08917105942964554, "learning_rate": 2.7117074269693863e-06, "loss": 8.5034, "step": 182190 }, { "epoch": 0.909885390396764, "grad_norm": 0.08715265989303589, "learning_rate": 2.7102055120278356e-06, "loss": 8.507, "step": 182200 }, { "epoch": 0.9099353292217034, "grad_norm": 0.09055981785058975, "learning_rate": 2.708703597086285e-06, "loss": 8.5144, "step": 182210 }, { "epoch": 0.9099852680466428, "grad_norm": 0.09121181070804596, "learning_rate": 2.7072016821447343e-06, "loss": 8.5191, "step": 182220 }, { "epoch": 0.9100352068715823, "grad_norm": 0.09437250345945358, "learning_rate": 2.7056997672031845e-06, "loss": 8.5132, "step": 182230 }, { "epoch": 0.9100851456965218, "grad_norm": 0.09657744318246841, "learning_rate": 2.7041978522616338e-06, "loss": 8.5328, "step": 182240 }, { "epoch": 0.9101350845214612, "grad_norm": 0.09253308176994324, "learning_rate": 2.702695937320083e-06, "loss": 8.5081, "step": 182250 }, { "epoch": 0.9101850233464006, "grad_norm": 0.09130608290433884, "learning_rate": 2.7011940223785324e-06, "loss": 8.4981, "step": 182260 }, { "epoch": 0.9102349621713401, "grad_norm": 0.08788429945707321, "learning_rate": 2.699692107436982e-06, "loss": 8.4895, "step": 182270 }, { "epoch": 0.9102849009962796, "grad_norm": 0.09099657833576202, "learning_rate": 2.698190192495432e-06, "loss": 8.508, "step": 182280 }, { "epoch": 0.910334839821219, "grad_norm": 0.08800596743822098, "learning_rate": 2.6966882775538813e-06, "loss": 8.5297, "step": 182290 }, { "epoch": 0.9103847786461584, "grad_norm": 0.09730488806962967, "learning_rate": 2.6951863626123306e-06, "loss": 8.5148, "step": 182300 }, { "epoch": 0.9104347174710979, "grad_norm": 0.09206937998533249, "learning_rate": 2.6936844476707804e-06, "loss": 8.5049, "step": 182310 }, { "epoch": 0.9104846562960374, "grad_norm": 0.08866451680660248, "learning_rate": 2.6921825327292297e-06, "loss": 8.512, "step": 182320 }, { "epoch": 0.9105345951209768, "grad_norm": 0.09090999513864517, "learning_rate": 2.6906806177876795e-06, "loss": 8.4796, "step": 182330 }, { "epoch": 0.9105845339459162, "grad_norm": 0.08853746205568314, "learning_rate": 2.689178702846129e-06, "loss": 8.5107, "step": 182340 }, { "epoch": 0.9106344727708557, "grad_norm": 0.09544672071933746, "learning_rate": 2.6876767879045785e-06, "loss": 8.5033, "step": 182350 }, { "epoch": 0.9106844115957952, "grad_norm": 0.09194538742303848, "learning_rate": 2.686174872963028e-06, "loss": 8.5087, "step": 182360 }, { "epoch": 0.9107343504207346, "grad_norm": 0.08848384767770767, "learning_rate": 2.684672958021477e-06, "loss": 8.5096, "step": 182370 }, { "epoch": 0.910784289245674, "grad_norm": 0.09186051785945892, "learning_rate": 2.683171043079927e-06, "loss": 8.5115, "step": 182380 }, { "epoch": 0.9108342280706135, "grad_norm": 0.09041396528482437, "learning_rate": 2.6816691281383767e-06, "loss": 8.5148, "step": 182390 }, { "epoch": 0.910884166895553, "grad_norm": 0.0917699784040451, "learning_rate": 2.680167213196826e-06, "loss": 8.5289, "step": 182400 }, { "epoch": 0.9109341057204924, "grad_norm": 0.09223365783691406, "learning_rate": 2.6786652982552754e-06, "loss": 8.509, "step": 182410 }, { "epoch": 0.9109840445454318, "grad_norm": 0.08908481150865555, "learning_rate": 2.6771633833137247e-06, "loss": 8.519, "step": 182420 }, { "epoch": 0.9110339833703713, "grad_norm": 0.08989152312278748, "learning_rate": 2.675661468372175e-06, "loss": 8.5255, "step": 182430 }, { "epoch": 0.9110839221953108, "grad_norm": 0.0932261049747467, "learning_rate": 2.6741595534306242e-06, "loss": 8.5118, "step": 182440 }, { "epoch": 0.9111338610202502, "grad_norm": 0.08758267015218735, "learning_rate": 2.6726576384890736e-06, "loss": 8.528, "step": 182450 }, { "epoch": 0.9111837998451896, "grad_norm": 0.09773239493370056, "learning_rate": 2.671155723547523e-06, "loss": 8.5038, "step": 182460 }, { "epoch": 0.9112337386701291, "grad_norm": 0.09452714025974274, "learning_rate": 2.6696538086059726e-06, "loss": 8.512, "step": 182470 }, { "epoch": 0.9112836774950686, "grad_norm": 0.09245564788579941, "learning_rate": 2.6681518936644224e-06, "loss": 8.5157, "step": 182480 }, { "epoch": 0.911333616320008, "grad_norm": 0.08789669722318649, "learning_rate": 2.6666499787228717e-06, "loss": 8.5091, "step": 182490 }, { "epoch": 0.9113835551449474, "grad_norm": 0.08953165262937546, "learning_rate": 2.665148063781321e-06, "loss": 8.4934, "step": 182500 }, { "epoch": 0.9114334939698869, "grad_norm": 0.09100998938083649, "learning_rate": 2.663646148839771e-06, "loss": 8.5173, "step": 182510 }, { "epoch": 0.9114834327948264, "grad_norm": 0.10075462609529495, "learning_rate": 2.66214423389822e-06, "loss": 8.5088, "step": 182520 }, { "epoch": 0.9115333716197658, "grad_norm": 0.09430301189422607, "learning_rate": 2.66064231895667e-06, "loss": 8.4971, "step": 182530 }, { "epoch": 0.9115833104447052, "grad_norm": 0.08746136724948883, "learning_rate": 2.6591404040151192e-06, "loss": 8.4993, "step": 182540 }, { "epoch": 0.9116332492696447, "grad_norm": 0.08813217282295227, "learning_rate": 2.657638489073569e-06, "loss": 8.5164, "step": 182550 }, { "epoch": 0.9116831880945842, "grad_norm": 0.08637049794197083, "learning_rate": 2.6561365741320183e-06, "loss": 8.508, "step": 182560 }, { "epoch": 0.9117331269195236, "grad_norm": 0.09196769446134567, "learning_rate": 2.6546346591904676e-06, "loss": 8.498, "step": 182570 }, { "epoch": 0.911783065744463, "grad_norm": 0.09665251523256302, "learning_rate": 2.6531327442489174e-06, "loss": 8.5179, "step": 182580 }, { "epoch": 0.9118330045694025, "grad_norm": 0.08948566764593124, "learning_rate": 2.651630829307367e-06, "loss": 8.5084, "step": 182590 }, { "epoch": 0.911882943394342, "grad_norm": 0.08373957872390747, "learning_rate": 2.6501289143658165e-06, "loss": 8.5356, "step": 182600 }, { "epoch": 0.9119328822192814, "grad_norm": 0.08785004913806915, "learning_rate": 2.648626999424266e-06, "loss": 8.5137, "step": 182610 }, { "epoch": 0.9119828210442208, "grad_norm": 0.09454427659511566, "learning_rate": 2.6471250844827156e-06, "loss": 8.503, "step": 182620 }, { "epoch": 0.9120327598691603, "grad_norm": 0.0859336405992508, "learning_rate": 2.6456231695411653e-06, "loss": 8.5194, "step": 182630 }, { "epoch": 0.9120826986940997, "grad_norm": 0.0904291421175003, "learning_rate": 2.6441212545996147e-06, "loss": 8.5232, "step": 182640 }, { "epoch": 0.9121326375190392, "grad_norm": 0.0902220755815506, "learning_rate": 2.642619339658064e-06, "loss": 8.4902, "step": 182650 }, { "epoch": 0.9121825763439786, "grad_norm": 0.09232915937900543, "learning_rate": 2.6411174247165133e-06, "loss": 8.5124, "step": 182660 }, { "epoch": 0.9122325151689181, "grad_norm": 0.0919727310538292, "learning_rate": 2.6396155097749635e-06, "loss": 8.5165, "step": 182670 }, { "epoch": 0.9122824539938575, "grad_norm": 0.09011544287204742, "learning_rate": 2.638113594833413e-06, "loss": 8.534, "step": 182680 }, { "epoch": 0.912332392818797, "grad_norm": 0.08686014264822006, "learning_rate": 2.636611679891862e-06, "loss": 8.5142, "step": 182690 }, { "epoch": 0.9123823316437364, "grad_norm": 0.09502139687538147, "learning_rate": 2.6351097649503115e-06, "loss": 8.5061, "step": 182700 }, { "epoch": 0.9124322704686759, "grad_norm": 0.08637500554323196, "learning_rate": 2.6336078500087613e-06, "loss": 8.5126, "step": 182710 }, { "epoch": 0.9124822092936153, "grad_norm": 0.08577697724103928, "learning_rate": 2.632105935067211e-06, "loss": 8.5109, "step": 182720 }, { "epoch": 0.9125321481185548, "grad_norm": 0.08816831558942795, "learning_rate": 2.6306040201256603e-06, "loss": 8.5045, "step": 182730 }, { "epoch": 0.9125820869434942, "grad_norm": 0.09027297049760818, "learning_rate": 2.6291021051841097e-06, "loss": 8.5062, "step": 182740 }, { "epoch": 0.9126320257684337, "grad_norm": 0.08729089051485062, "learning_rate": 2.6276001902425594e-06, "loss": 8.4982, "step": 182750 }, { "epoch": 0.9126819645933731, "grad_norm": 0.09011752158403397, "learning_rate": 2.6260982753010088e-06, "loss": 8.5129, "step": 182760 }, { "epoch": 0.9127319034183126, "grad_norm": 0.09230612218379974, "learning_rate": 2.6245963603594585e-06, "loss": 8.5134, "step": 182770 }, { "epoch": 0.912781842243252, "grad_norm": 0.08646968007087708, "learning_rate": 2.623094445417908e-06, "loss": 8.5196, "step": 182780 }, { "epoch": 0.9128317810681915, "grad_norm": 0.09204564988613129, "learning_rate": 2.6215925304763576e-06, "loss": 8.5168, "step": 182790 }, { "epoch": 0.9128817198931309, "grad_norm": 0.08996839821338654, "learning_rate": 2.620090615534807e-06, "loss": 8.5249, "step": 182800 }, { "epoch": 0.9129316587180704, "grad_norm": 0.09080091118812561, "learning_rate": 2.6185887005932563e-06, "loss": 8.5052, "step": 182810 }, { "epoch": 0.9129815975430098, "grad_norm": 0.09134030342102051, "learning_rate": 2.617086785651706e-06, "loss": 8.5065, "step": 182820 }, { "epoch": 0.9130315363679493, "grad_norm": 0.08714727312326431, "learning_rate": 2.6155848707101558e-06, "loss": 8.4966, "step": 182830 }, { "epoch": 0.9130814751928887, "grad_norm": 0.08802255988121033, "learning_rate": 2.614082955768605e-06, "loss": 8.5049, "step": 182840 }, { "epoch": 0.9131314140178282, "grad_norm": 0.091342493891716, "learning_rate": 2.6125810408270544e-06, "loss": 8.5057, "step": 182850 }, { "epoch": 0.9131813528427676, "grad_norm": 0.0987749993801117, "learning_rate": 2.6110791258855038e-06, "loss": 8.5085, "step": 182860 }, { "epoch": 0.9132312916677071, "grad_norm": 0.08858326077461243, "learning_rate": 2.609577210943954e-06, "loss": 8.5, "step": 182870 }, { "epoch": 0.9132812304926465, "grad_norm": 0.09258518368005753, "learning_rate": 2.6080752960024033e-06, "loss": 8.4916, "step": 182880 }, { "epoch": 0.913331169317586, "grad_norm": 0.09570000320672989, "learning_rate": 2.6065733810608526e-06, "loss": 8.5024, "step": 182890 }, { "epoch": 0.9133811081425254, "grad_norm": 0.09027928858995438, "learning_rate": 2.605071466119302e-06, "loss": 8.5172, "step": 182900 }, { "epoch": 0.9134310469674649, "grad_norm": 0.09241816401481628, "learning_rate": 2.6035695511777517e-06, "loss": 8.5002, "step": 182910 }, { "epoch": 0.9134809857924043, "grad_norm": 0.08846849203109741, "learning_rate": 2.6020676362362014e-06, "loss": 8.5361, "step": 182920 }, { "epoch": 0.9135309246173438, "grad_norm": 0.09381444752216339, "learning_rate": 2.6005657212946508e-06, "loss": 8.5218, "step": 182930 }, { "epoch": 0.9135808634422832, "grad_norm": 0.09875883162021637, "learning_rate": 2.5990638063531e-06, "loss": 8.5085, "step": 182940 }, { "epoch": 0.9136308022672226, "grad_norm": 0.0989549458026886, "learning_rate": 2.59756189141155e-06, "loss": 8.4929, "step": 182950 }, { "epoch": 0.9136807410921621, "grad_norm": 0.08980342745780945, "learning_rate": 2.596059976469999e-06, "loss": 8.501, "step": 182960 }, { "epoch": 0.9137306799171016, "grad_norm": 0.0909147560596466, "learning_rate": 2.594558061528449e-06, "loss": 8.5148, "step": 182970 }, { "epoch": 0.913780618742041, "grad_norm": 0.091719850897789, "learning_rate": 2.5930561465868983e-06, "loss": 8.5061, "step": 182980 }, { "epoch": 0.9138305575669804, "grad_norm": 0.09617502242326736, "learning_rate": 2.591554231645348e-06, "loss": 8.5005, "step": 182990 }, { "epoch": 0.9138804963919199, "grad_norm": 0.08993982523679733, "learning_rate": 2.5900523167037974e-06, "loss": 8.5062, "step": 183000 }, { "epoch": 0.9139304352168593, "grad_norm": 0.08975507318973541, "learning_rate": 2.5885504017622467e-06, "loss": 8.5052, "step": 183010 }, { "epoch": 0.9139803740417988, "grad_norm": 0.09407518804073334, "learning_rate": 2.5870484868206965e-06, "loss": 8.5038, "step": 183020 }, { "epoch": 0.9140303128667382, "grad_norm": 0.09323114156723022, "learning_rate": 2.585546571879146e-06, "loss": 8.5105, "step": 183030 }, { "epoch": 0.9140802516916777, "grad_norm": 0.08849375694990158, "learning_rate": 2.5840446569375955e-06, "loss": 8.5113, "step": 183040 }, { "epoch": 0.9141301905166171, "grad_norm": 0.08718482404947281, "learning_rate": 2.582542741996045e-06, "loss": 8.51, "step": 183050 }, { "epoch": 0.9141801293415566, "grad_norm": 0.09222862124443054, "learning_rate": 2.581040827054494e-06, "loss": 8.503, "step": 183060 }, { "epoch": 0.914230068166496, "grad_norm": 0.09302924573421478, "learning_rate": 2.5795389121129444e-06, "loss": 8.5162, "step": 183070 }, { "epoch": 0.9142800069914355, "grad_norm": 0.09386178851127625, "learning_rate": 2.5780369971713937e-06, "loss": 8.5088, "step": 183080 }, { "epoch": 0.914329945816375, "grad_norm": 0.09588906168937683, "learning_rate": 2.576535082229843e-06, "loss": 8.4878, "step": 183090 }, { "epoch": 0.9143798846413144, "grad_norm": 0.09002941846847534, "learning_rate": 2.5750331672882924e-06, "loss": 8.4933, "step": 183100 }, { "epoch": 0.9144298234662538, "grad_norm": 0.08975888043642044, "learning_rate": 2.573531252346742e-06, "loss": 8.4971, "step": 183110 }, { "epoch": 0.9144797622911933, "grad_norm": 0.09490811824798584, "learning_rate": 2.572029337405192e-06, "loss": 8.522, "step": 183120 }, { "epoch": 0.9145297011161327, "grad_norm": 0.09320612251758575, "learning_rate": 2.5705274224636412e-06, "loss": 8.5048, "step": 183130 }, { "epoch": 0.9145796399410722, "grad_norm": 0.08958461880683899, "learning_rate": 2.5690255075220905e-06, "loss": 8.511, "step": 183140 }, { "epoch": 0.9146295787660116, "grad_norm": 0.09119990468025208, "learning_rate": 2.5675235925805403e-06, "loss": 8.4993, "step": 183150 }, { "epoch": 0.9146795175909511, "grad_norm": 0.093448705971241, "learning_rate": 2.56602167763899e-06, "loss": 8.4941, "step": 183160 }, { "epoch": 0.9147294564158905, "grad_norm": 0.0903758853673935, "learning_rate": 2.5645197626974394e-06, "loss": 8.5067, "step": 183170 }, { "epoch": 0.91477939524083, "grad_norm": 0.09165302664041519, "learning_rate": 2.5630178477558887e-06, "loss": 8.5058, "step": 183180 }, { "epoch": 0.9148293340657694, "grad_norm": 0.08942514657974243, "learning_rate": 2.5615159328143385e-06, "loss": 8.5319, "step": 183190 }, { "epoch": 0.9148792728907089, "grad_norm": 0.08656205981969833, "learning_rate": 2.560014017872788e-06, "loss": 8.5003, "step": 183200 }, { "epoch": 0.9149292117156483, "grad_norm": 0.08952057361602783, "learning_rate": 2.5585121029312376e-06, "loss": 8.5137, "step": 183210 }, { "epoch": 0.9149791505405878, "grad_norm": 0.09631547331809998, "learning_rate": 2.557010187989687e-06, "loss": 8.5028, "step": 183220 }, { "epoch": 0.9150290893655272, "grad_norm": 0.0935741513967514, "learning_rate": 2.5555082730481366e-06, "loss": 8.5188, "step": 183230 }, { "epoch": 0.9150790281904667, "grad_norm": 0.09073397517204285, "learning_rate": 2.554006358106586e-06, "loss": 8.5276, "step": 183240 }, { "epoch": 0.9151289670154061, "grad_norm": 0.09225539863109589, "learning_rate": 2.5525044431650353e-06, "loss": 8.5081, "step": 183250 }, { "epoch": 0.9151789058403456, "grad_norm": 0.09920675307512283, "learning_rate": 2.551002528223485e-06, "loss": 8.5123, "step": 183260 }, { "epoch": 0.915228844665285, "grad_norm": 0.09345440566539764, "learning_rate": 2.549500613281935e-06, "loss": 8.505, "step": 183270 }, { "epoch": 0.9152787834902245, "grad_norm": 0.09154894202947617, "learning_rate": 2.547998698340384e-06, "loss": 8.4907, "step": 183280 }, { "epoch": 0.9153287223151639, "grad_norm": 0.09122487902641296, "learning_rate": 2.5464967833988335e-06, "loss": 8.5205, "step": 183290 }, { "epoch": 0.9153786611401034, "grad_norm": 0.09112253040075302, "learning_rate": 2.544994868457283e-06, "loss": 8.4846, "step": 183300 }, { "epoch": 0.9154285999650428, "grad_norm": 0.08982587605714798, "learning_rate": 2.543492953515733e-06, "loss": 8.5097, "step": 183310 }, { "epoch": 0.9154785387899823, "grad_norm": 0.09354843199253082, "learning_rate": 2.5419910385741823e-06, "loss": 8.5127, "step": 183320 }, { "epoch": 0.9155284776149217, "grad_norm": 0.08977293968200684, "learning_rate": 2.5404891236326317e-06, "loss": 8.5254, "step": 183330 }, { "epoch": 0.9155784164398612, "grad_norm": 0.0911603569984436, "learning_rate": 2.538987208691081e-06, "loss": 8.5047, "step": 183340 }, { "epoch": 0.9156283552648006, "grad_norm": 0.09315786510705948, "learning_rate": 2.5374852937495307e-06, "loss": 8.4967, "step": 183350 }, { "epoch": 0.9156782940897401, "grad_norm": 0.09075506776571274, "learning_rate": 2.5359833788079805e-06, "loss": 8.4941, "step": 183360 }, { "epoch": 0.9157282329146795, "grad_norm": 0.0923544391989708, "learning_rate": 2.53448146386643e-06, "loss": 8.5156, "step": 183370 }, { "epoch": 0.915778171739619, "grad_norm": 0.09413059055805206, "learning_rate": 2.532979548924879e-06, "loss": 8.4979, "step": 183380 }, { "epoch": 0.9158281105645584, "grad_norm": 0.09276200085878372, "learning_rate": 2.531477633983329e-06, "loss": 8.5112, "step": 183390 }, { "epoch": 0.9158780493894979, "grad_norm": 0.09218557178974152, "learning_rate": 2.5299757190417782e-06, "loss": 8.5049, "step": 183400 }, { "epoch": 0.9159279882144373, "grad_norm": 0.09127632528543472, "learning_rate": 2.528473804100228e-06, "loss": 8.4994, "step": 183410 }, { "epoch": 0.9159779270393767, "grad_norm": 0.09124505519866943, "learning_rate": 2.5269718891586773e-06, "loss": 8.5037, "step": 183420 }, { "epoch": 0.9160278658643162, "grad_norm": 0.09244653582572937, "learning_rate": 2.525469974217127e-06, "loss": 8.5091, "step": 183430 }, { "epoch": 0.9160778046892557, "grad_norm": 0.09359874576330185, "learning_rate": 2.5239680592755764e-06, "loss": 8.5033, "step": 183440 }, { "epoch": 0.9161277435141951, "grad_norm": 0.08860906213521957, "learning_rate": 2.5224661443340257e-06, "loss": 8.5175, "step": 183450 }, { "epoch": 0.9161776823391345, "grad_norm": 0.08711325377225876, "learning_rate": 2.5209642293924755e-06, "loss": 8.4991, "step": 183460 }, { "epoch": 0.916227621164074, "grad_norm": 0.0916765034198761, "learning_rate": 2.5194623144509253e-06, "loss": 8.5133, "step": 183470 }, { "epoch": 0.9162775599890135, "grad_norm": 0.0957622155547142, "learning_rate": 2.5179603995093746e-06, "loss": 8.5013, "step": 183480 }, { "epoch": 0.9163274988139529, "grad_norm": 0.08793076127767563, "learning_rate": 2.516458484567824e-06, "loss": 8.5154, "step": 183490 }, { "epoch": 0.9163774376388923, "grad_norm": 0.08785158395767212, "learning_rate": 2.5149565696262732e-06, "loss": 8.5007, "step": 183500 }, { "epoch": 0.9164273764638318, "grad_norm": 0.09278276562690735, "learning_rate": 2.5134546546847234e-06, "loss": 8.5113, "step": 183510 }, { "epoch": 0.9164773152887713, "grad_norm": 0.08464596420526505, "learning_rate": 2.5119527397431728e-06, "loss": 8.5296, "step": 183520 }, { "epoch": 0.9165272541137107, "grad_norm": 0.08865651488304138, "learning_rate": 2.510450824801622e-06, "loss": 8.5137, "step": 183530 }, { "epoch": 0.9165771929386501, "grad_norm": 0.09250415861606598, "learning_rate": 2.5089489098600714e-06, "loss": 8.5173, "step": 183540 }, { "epoch": 0.9166271317635896, "grad_norm": 0.09271103143692017, "learning_rate": 2.507446994918521e-06, "loss": 8.4864, "step": 183550 }, { "epoch": 0.9166770705885291, "grad_norm": 0.09764368087053299, "learning_rate": 2.505945079976971e-06, "loss": 8.5003, "step": 183560 }, { "epoch": 0.9167270094134685, "grad_norm": 0.09379107505083084, "learning_rate": 2.5044431650354203e-06, "loss": 8.5047, "step": 183570 }, { "epoch": 0.9167769482384079, "grad_norm": 0.08978167921304703, "learning_rate": 2.5029412500938696e-06, "loss": 8.5087, "step": 183580 }, { "epoch": 0.9168268870633474, "grad_norm": 0.0897134467959404, "learning_rate": 2.5014393351523193e-06, "loss": 8.5065, "step": 183590 }, { "epoch": 0.9168768258882869, "grad_norm": 0.09094201773405075, "learning_rate": 2.4999374202107687e-06, "loss": 8.5085, "step": 183600 }, { "epoch": 0.9169267647132263, "grad_norm": 0.09478742629289627, "learning_rate": 2.4984355052692184e-06, "loss": 8.5052, "step": 183610 }, { "epoch": 0.9169767035381657, "grad_norm": 0.0915999785065651, "learning_rate": 2.4969335903276678e-06, "loss": 8.5097, "step": 183620 }, { "epoch": 0.9170266423631052, "grad_norm": 0.08944950252771378, "learning_rate": 2.4954316753861175e-06, "loss": 8.4869, "step": 183630 }, { "epoch": 0.9170765811880447, "grad_norm": 0.09154719859361649, "learning_rate": 2.493929760444567e-06, "loss": 8.5146, "step": 183640 }, { "epoch": 0.9171265200129841, "grad_norm": 0.0902072861790657, "learning_rate": 2.492427845503016e-06, "loss": 8.5102, "step": 183650 }, { "epoch": 0.9171764588379235, "grad_norm": 0.09381358325481415, "learning_rate": 2.490925930561466e-06, "loss": 8.5281, "step": 183660 }, { "epoch": 0.917226397662863, "grad_norm": 0.0945509746670723, "learning_rate": 2.4894240156199157e-06, "loss": 8.5014, "step": 183670 }, { "epoch": 0.9172763364878025, "grad_norm": 0.09148582816123962, "learning_rate": 2.487922100678365e-06, "loss": 8.51, "step": 183680 }, { "epoch": 0.9173262753127419, "grad_norm": 0.09244761615991592, "learning_rate": 2.4864201857368144e-06, "loss": 8.5024, "step": 183690 }, { "epoch": 0.9173762141376813, "grad_norm": 0.08857100456953049, "learning_rate": 2.484918270795264e-06, "loss": 8.5167, "step": 183700 }, { "epoch": 0.9174261529626208, "grad_norm": 0.09274089336395264, "learning_rate": 2.483416355853714e-06, "loss": 8.5078, "step": 183710 }, { "epoch": 0.9174760917875603, "grad_norm": 0.08995862305164337, "learning_rate": 2.481914440912163e-06, "loss": 8.5171, "step": 183720 }, { "epoch": 0.9175260306124997, "grad_norm": 0.08714859187602997, "learning_rate": 2.4804125259706125e-06, "loss": 8.4972, "step": 183730 }, { "epoch": 0.9175759694374391, "grad_norm": 0.09085182845592499, "learning_rate": 2.478910611029062e-06, "loss": 8.5136, "step": 183740 }, { "epoch": 0.9176259082623786, "grad_norm": 0.09306159615516663, "learning_rate": 2.477408696087512e-06, "loss": 8.5094, "step": 183750 }, { "epoch": 0.9176758470873181, "grad_norm": 0.08627147227525711, "learning_rate": 2.4759067811459614e-06, "loss": 8.5231, "step": 183760 }, { "epoch": 0.9177257859122575, "grad_norm": 0.09129956364631653, "learning_rate": 2.4744048662044107e-06, "loss": 8.4941, "step": 183770 }, { "epoch": 0.9177757247371969, "grad_norm": 0.09526669234037399, "learning_rate": 2.47290295126286e-06, "loss": 8.4966, "step": 183780 }, { "epoch": 0.9178256635621364, "grad_norm": 0.09122246503829956, "learning_rate": 2.4714010363213098e-06, "loss": 8.4972, "step": 183790 }, { "epoch": 0.9178756023870759, "grad_norm": 0.09195857495069504, "learning_rate": 2.4698991213797595e-06, "loss": 8.5057, "step": 183800 }, { "epoch": 0.9179255412120153, "grad_norm": 0.090829037129879, "learning_rate": 2.468397206438209e-06, "loss": 8.5169, "step": 183810 }, { "epoch": 0.9179754800369547, "grad_norm": 0.09058675169944763, "learning_rate": 2.466895291496658e-06, "loss": 8.511, "step": 183820 }, { "epoch": 0.9180254188618941, "grad_norm": 0.08892668038606644, "learning_rate": 2.465393376555108e-06, "loss": 8.5127, "step": 183830 }, { "epoch": 0.9180753576868337, "grad_norm": 0.09829755127429962, "learning_rate": 2.4638914616135573e-06, "loss": 8.4872, "step": 183840 }, { "epoch": 0.9181252965117731, "grad_norm": 0.08881474286317825, "learning_rate": 2.462389546672007e-06, "loss": 8.4938, "step": 183850 }, { "epoch": 0.9181752353367125, "grad_norm": 0.08773206174373627, "learning_rate": 2.4608876317304564e-06, "loss": 8.5175, "step": 183860 }, { "epoch": 0.918225174161652, "grad_norm": 0.08635301142930984, "learning_rate": 2.459385716788906e-06, "loss": 8.5228, "step": 183870 }, { "epoch": 0.9182751129865915, "grad_norm": 0.08536515384912491, "learning_rate": 2.4578838018473555e-06, "loss": 8.5215, "step": 183880 }, { "epoch": 0.9183250518115309, "grad_norm": 0.08720611780881882, "learning_rate": 2.456381886905805e-06, "loss": 8.5034, "step": 183890 }, { "epoch": 0.9183749906364703, "grad_norm": 0.09371429681777954, "learning_rate": 2.4548799719642545e-06, "loss": 8.5029, "step": 183900 }, { "epoch": 0.9184249294614097, "grad_norm": 0.08567806333303452, "learning_rate": 2.4533780570227043e-06, "loss": 8.5001, "step": 183910 }, { "epoch": 0.9184748682863493, "grad_norm": 0.09422826766967773, "learning_rate": 2.4518761420811536e-06, "loss": 8.5079, "step": 183920 }, { "epoch": 0.9185248071112887, "grad_norm": 0.0892164334654808, "learning_rate": 2.450374227139603e-06, "loss": 8.4915, "step": 183930 }, { "epoch": 0.9185747459362281, "grad_norm": 0.09322153776884079, "learning_rate": 2.4488723121980523e-06, "loss": 8.5038, "step": 183940 }, { "epoch": 0.9186246847611675, "grad_norm": 0.0924592837691307, "learning_rate": 2.4473703972565025e-06, "loss": 8.5004, "step": 183950 }, { "epoch": 0.918674623586107, "grad_norm": 0.0903005376458168, "learning_rate": 2.445868482314952e-06, "loss": 8.5133, "step": 183960 }, { "epoch": 0.9187245624110465, "grad_norm": 0.09269312024116516, "learning_rate": 2.444366567373401e-06, "loss": 8.5266, "step": 183970 }, { "epoch": 0.9187745012359859, "grad_norm": 0.0908818319439888, "learning_rate": 2.4428646524318505e-06, "loss": 8.5057, "step": 183980 }, { "epoch": 0.9188244400609253, "grad_norm": 0.09337764978408813, "learning_rate": 2.4413627374903002e-06, "loss": 8.5112, "step": 183990 }, { "epoch": 0.9188743788858648, "grad_norm": 0.09528010338544846, "learning_rate": 2.43986082254875e-06, "loss": 8.503, "step": 184000 }, { "epoch": 0.9189243177108043, "grad_norm": 0.09190403670072556, "learning_rate": 2.4383589076071993e-06, "loss": 8.5116, "step": 184010 }, { "epoch": 0.9189742565357437, "grad_norm": 0.09150946140289307, "learning_rate": 2.4368569926656486e-06, "loss": 8.5047, "step": 184020 }, { "epoch": 0.9190241953606831, "grad_norm": 0.09026480466127396, "learning_rate": 2.4353550777240984e-06, "loss": 8.5045, "step": 184030 }, { "epoch": 0.9190741341856226, "grad_norm": 0.08819565176963806, "learning_rate": 2.4338531627825477e-06, "loss": 8.5203, "step": 184040 }, { "epoch": 0.9191240730105621, "grad_norm": 0.09232760965824127, "learning_rate": 2.4323512478409975e-06, "loss": 8.5237, "step": 184050 }, { "epoch": 0.9191740118355015, "grad_norm": 0.08861426264047623, "learning_rate": 2.430849332899447e-06, "loss": 8.4923, "step": 184060 }, { "epoch": 0.9192239506604409, "grad_norm": 0.09452666342258453, "learning_rate": 2.4293474179578966e-06, "loss": 8.513, "step": 184070 }, { "epoch": 0.9192738894853804, "grad_norm": 0.09179414063692093, "learning_rate": 2.427845503016346e-06, "loss": 8.4931, "step": 184080 }, { "epoch": 0.9193238283103199, "grad_norm": 0.09043210744857788, "learning_rate": 2.4263435880747952e-06, "loss": 8.5092, "step": 184090 }, { "epoch": 0.9193737671352593, "grad_norm": 0.09003657847642899, "learning_rate": 2.424841673133245e-06, "loss": 8.517, "step": 184100 }, { "epoch": 0.9194237059601987, "grad_norm": 0.09282027930021286, "learning_rate": 2.4233397581916947e-06, "loss": 8.5116, "step": 184110 }, { "epoch": 0.9194736447851382, "grad_norm": 0.08989115059375763, "learning_rate": 2.421837843250144e-06, "loss": 8.4954, "step": 184120 }, { "epoch": 0.9195235836100777, "grad_norm": 0.09200627356767654, "learning_rate": 2.4203359283085934e-06, "loss": 8.5153, "step": 184130 }, { "epoch": 0.9195735224350171, "grad_norm": 0.09356432408094406, "learning_rate": 2.4188340133670427e-06, "loss": 8.5144, "step": 184140 }, { "epoch": 0.9196234612599565, "grad_norm": 0.08702446520328522, "learning_rate": 2.417332098425493e-06, "loss": 8.4921, "step": 184150 }, { "epoch": 0.919673400084896, "grad_norm": 0.09164425730705261, "learning_rate": 2.4158301834839422e-06, "loss": 8.5054, "step": 184160 }, { "epoch": 0.9197233389098355, "grad_norm": 0.09433116018772125, "learning_rate": 2.4143282685423916e-06, "loss": 8.5058, "step": 184170 }, { "epoch": 0.9197732777347749, "grad_norm": 0.09576484560966492, "learning_rate": 2.412826353600841e-06, "loss": 8.5018, "step": 184180 }, { "epoch": 0.9198232165597143, "grad_norm": 0.09483747184276581, "learning_rate": 2.4113244386592907e-06, "loss": 8.5188, "step": 184190 }, { "epoch": 0.9198731553846538, "grad_norm": 0.09752367436885834, "learning_rate": 2.4098225237177404e-06, "loss": 8.5201, "step": 184200 }, { "epoch": 0.9199230942095933, "grad_norm": 0.0914745032787323, "learning_rate": 2.4083206087761897e-06, "loss": 8.5139, "step": 184210 }, { "epoch": 0.9199730330345327, "grad_norm": 0.08465293794870377, "learning_rate": 2.406818693834639e-06, "loss": 8.5177, "step": 184220 }, { "epoch": 0.9200229718594721, "grad_norm": 0.09186521172523499, "learning_rate": 2.4053167788930884e-06, "loss": 8.512, "step": 184230 }, { "epoch": 0.9200729106844115, "grad_norm": 0.09527092427015305, "learning_rate": 2.403814863951538e-06, "loss": 8.5174, "step": 184240 }, { "epoch": 0.9201228495093511, "grad_norm": 0.08546338975429535, "learning_rate": 2.402312949009988e-06, "loss": 8.5272, "step": 184250 }, { "epoch": 0.9201727883342905, "grad_norm": 0.09446820616722107, "learning_rate": 2.4008110340684373e-06, "loss": 8.5021, "step": 184260 }, { "epoch": 0.9202227271592299, "grad_norm": 0.09014095366001129, "learning_rate": 2.3993091191268866e-06, "loss": 8.5166, "step": 184270 }, { "epoch": 0.9202726659841693, "grad_norm": 0.09346702694892883, "learning_rate": 2.3978072041853363e-06, "loss": 8.5163, "step": 184280 }, { "epoch": 0.9203226048091089, "grad_norm": 0.09391672164201736, "learning_rate": 2.396305289243786e-06, "loss": 8.5082, "step": 184290 }, { "epoch": 0.9203725436340483, "grad_norm": 0.09350437670946121, "learning_rate": 2.3948033743022354e-06, "loss": 8.5018, "step": 184300 }, { "epoch": 0.9204224824589877, "grad_norm": 0.09567178785800934, "learning_rate": 2.3933014593606848e-06, "loss": 8.5052, "step": 184310 }, { "epoch": 0.9204724212839271, "grad_norm": 0.09135613590478897, "learning_rate": 2.3917995444191345e-06, "loss": 8.5153, "step": 184320 }, { "epoch": 0.9205223601088667, "grad_norm": 0.08975464105606079, "learning_rate": 2.390297629477584e-06, "loss": 8.5067, "step": 184330 }, { "epoch": 0.9205722989338061, "grad_norm": 0.0906573086977005, "learning_rate": 2.3887957145360336e-06, "loss": 8.5185, "step": 184340 }, { "epoch": 0.9206222377587455, "grad_norm": 0.09258557856082916, "learning_rate": 2.387293799594483e-06, "loss": 8.5008, "step": 184350 }, { "epoch": 0.9206721765836849, "grad_norm": 0.09056249260902405, "learning_rate": 2.3857918846529327e-06, "loss": 8.5061, "step": 184360 }, { "epoch": 0.9207221154086245, "grad_norm": 0.09652728587388992, "learning_rate": 2.384289969711382e-06, "loss": 8.4943, "step": 184370 }, { "epoch": 0.9207720542335639, "grad_norm": 0.08950930088758469, "learning_rate": 2.3827880547698313e-06, "loss": 8.5062, "step": 184380 }, { "epoch": 0.9208219930585033, "grad_norm": 0.09195245802402496, "learning_rate": 2.381286139828281e-06, "loss": 8.4987, "step": 184390 }, { "epoch": 0.9208719318834427, "grad_norm": 0.08976045995950699, "learning_rate": 2.379784224886731e-06, "loss": 8.5078, "step": 184400 }, { "epoch": 0.9209218707083823, "grad_norm": 0.08620437234640121, "learning_rate": 2.37828230994518e-06, "loss": 8.5356, "step": 184410 }, { "epoch": 0.9209718095333217, "grad_norm": 0.08748499304056168, "learning_rate": 2.3767803950036295e-06, "loss": 8.5128, "step": 184420 }, { "epoch": 0.9210217483582611, "grad_norm": 0.08978407084941864, "learning_rate": 2.375278480062079e-06, "loss": 8.508, "step": 184430 }, { "epoch": 0.9210716871832005, "grad_norm": 0.09201885014772415, "learning_rate": 2.373776565120529e-06, "loss": 8.4949, "step": 184440 }, { "epoch": 0.9211216260081401, "grad_norm": 0.09136571735143661, "learning_rate": 2.3722746501789784e-06, "loss": 8.5061, "step": 184450 }, { "epoch": 0.9211715648330795, "grad_norm": 0.09248650074005127, "learning_rate": 2.3707727352374277e-06, "loss": 8.4964, "step": 184460 }, { "epoch": 0.9212215036580189, "grad_norm": 0.08990224450826645, "learning_rate": 2.369270820295877e-06, "loss": 8.51, "step": 184470 }, { "epoch": 0.9212714424829583, "grad_norm": 0.09131959080696106, "learning_rate": 2.3677689053543268e-06, "loss": 8.5172, "step": 184480 }, { "epoch": 0.9213213813078979, "grad_norm": 0.09705401957035065, "learning_rate": 2.3662669904127765e-06, "loss": 8.5212, "step": 184490 }, { "epoch": 0.9213713201328373, "grad_norm": 0.09412173926830292, "learning_rate": 2.364765075471226e-06, "loss": 8.4953, "step": 184500 }, { "epoch": 0.9214212589577767, "grad_norm": 0.08390575647354126, "learning_rate": 2.363263160529675e-06, "loss": 8.5015, "step": 184510 }, { "epoch": 0.9214711977827161, "grad_norm": 0.09075505286455154, "learning_rate": 2.361761245588125e-06, "loss": 8.488, "step": 184520 }, { "epoch": 0.9215211366076557, "grad_norm": 0.08866789937019348, "learning_rate": 2.3602593306465743e-06, "loss": 8.5028, "step": 184530 }, { "epoch": 0.9215710754325951, "grad_norm": 0.09287255257368088, "learning_rate": 2.358757415705024e-06, "loss": 8.5005, "step": 184540 }, { "epoch": 0.9216210142575345, "grad_norm": 0.08919494599103928, "learning_rate": 2.3572555007634734e-06, "loss": 8.5096, "step": 184550 }, { "epoch": 0.9216709530824739, "grad_norm": 0.09100577980279922, "learning_rate": 2.355753585821923e-06, "loss": 8.4997, "step": 184560 }, { "epoch": 0.9217208919074135, "grad_norm": 0.09456155449151993, "learning_rate": 2.3542516708803725e-06, "loss": 8.4924, "step": 184570 }, { "epoch": 0.9217708307323529, "grad_norm": 0.09156771004199982, "learning_rate": 2.3527497559388218e-06, "loss": 8.5267, "step": 184580 }, { "epoch": 0.9218207695572923, "grad_norm": 0.08734113723039627, "learning_rate": 2.3512478409972715e-06, "loss": 8.4983, "step": 184590 }, { "epoch": 0.9218707083822317, "grad_norm": 0.09091001003980637, "learning_rate": 2.3497459260557213e-06, "loss": 8.518, "step": 184600 }, { "epoch": 0.9219206472071713, "grad_norm": 0.08664854615926743, "learning_rate": 2.3482440111141706e-06, "loss": 8.5167, "step": 184610 }, { "epoch": 0.9219705860321107, "grad_norm": 0.08995333313941956, "learning_rate": 2.34674209617262e-06, "loss": 8.4968, "step": 184620 }, { "epoch": 0.9220205248570501, "grad_norm": 0.09020540118217468, "learning_rate": 2.3452401812310693e-06, "loss": 8.4935, "step": 184630 }, { "epoch": 0.9220704636819895, "grad_norm": 0.08525863289833069, "learning_rate": 2.3437382662895195e-06, "loss": 8.512, "step": 184640 }, { "epoch": 0.9221204025069291, "grad_norm": 0.09016373753547668, "learning_rate": 2.342236351347969e-06, "loss": 8.4992, "step": 184650 }, { "epoch": 0.9221703413318685, "grad_norm": 0.09463771432638168, "learning_rate": 2.340734436406418e-06, "loss": 8.4842, "step": 184660 }, { "epoch": 0.9222202801568079, "grad_norm": 0.08823353797197342, "learning_rate": 2.3392325214648675e-06, "loss": 8.4996, "step": 184670 }, { "epoch": 0.9222702189817473, "grad_norm": 0.09164861589670181, "learning_rate": 2.3377306065233172e-06, "loss": 8.5169, "step": 184680 }, { "epoch": 0.9223201578066869, "grad_norm": 0.092039093375206, "learning_rate": 2.336228691581767e-06, "loss": 8.5056, "step": 184690 }, { "epoch": 0.9223700966316263, "grad_norm": 0.09081801027059555, "learning_rate": 2.3347267766402163e-06, "loss": 8.4922, "step": 184700 }, { "epoch": 0.9224200354565657, "grad_norm": 0.09290160983800888, "learning_rate": 2.3332248616986656e-06, "loss": 8.5182, "step": 184710 }, { "epoch": 0.9224699742815051, "grad_norm": 0.08207748085260391, "learning_rate": 2.3317229467571154e-06, "loss": 8.5093, "step": 184720 }, { "epoch": 0.9225199131064447, "grad_norm": 0.09553519636392593, "learning_rate": 2.3302210318155647e-06, "loss": 8.5143, "step": 184730 }, { "epoch": 0.9225698519313841, "grad_norm": 0.09908508509397507, "learning_rate": 2.3287191168740145e-06, "loss": 8.5059, "step": 184740 }, { "epoch": 0.9226197907563235, "grad_norm": 0.0906306803226471, "learning_rate": 2.327217201932464e-06, "loss": 8.5063, "step": 184750 }, { "epoch": 0.9226697295812629, "grad_norm": 0.09021761268377304, "learning_rate": 2.3257152869909136e-06, "loss": 8.51, "step": 184760 }, { "epoch": 0.9227196684062025, "grad_norm": 0.09584904462099075, "learning_rate": 2.324213372049363e-06, "loss": 8.4976, "step": 184770 }, { "epoch": 0.9227696072311419, "grad_norm": 0.08946247398853302, "learning_rate": 2.3227114571078126e-06, "loss": 8.5217, "step": 184780 }, { "epoch": 0.9228195460560813, "grad_norm": 0.09100887924432755, "learning_rate": 2.321209542166262e-06, "loss": 8.511, "step": 184790 }, { "epoch": 0.9228694848810207, "grad_norm": 0.08670665323734283, "learning_rate": 2.3197076272247117e-06, "loss": 8.5134, "step": 184800 }, { "epoch": 0.9229194237059603, "grad_norm": 0.09500810503959656, "learning_rate": 2.318205712283161e-06, "loss": 8.4904, "step": 184810 }, { "epoch": 0.9229693625308997, "grad_norm": 0.09684941917657852, "learning_rate": 2.3167037973416104e-06, "loss": 8.5025, "step": 184820 }, { "epoch": 0.9230193013558391, "grad_norm": 0.08873332291841507, "learning_rate": 2.31520188240006e-06, "loss": 8.4923, "step": 184830 }, { "epoch": 0.9230692401807785, "grad_norm": 0.0964055061340332, "learning_rate": 2.31369996745851e-06, "loss": 8.5126, "step": 184840 }, { "epoch": 0.923119179005718, "grad_norm": 0.09410259127616882, "learning_rate": 2.3121980525169592e-06, "loss": 8.5124, "step": 184850 }, { "epoch": 0.9231691178306575, "grad_norm": 0.09654883295297623, "learning_rate": 2.3106961375754086e-06, "loss": 8.5073, "step": 184860 }, { "epoch": 0.9232190566555969, "grad_norm": 0.08777954429388046, "learning_rate": 2.309194222633858e-06, "loss": 8.5067, "step": 184870 }, { "epoch": 0.9232689954805363, "grad_norm": 0.09537537395954132, "learning_rate": 2.307692307692308e-06, "loss": 8.4888, "step": 184880 }, { "epoch": 0.9233189343054758, "grad_norm": 0.08902040123939514, "learning_rate": 2.3061903927507574e-06, "loss": 8.5042, "step": 184890 }, { "epoch": 0.9233688731304153, "grad_norm": 0.09366223216056824, "learning_rate": 2.3046884778092067e-06, "loss": 8.5255, "step": 184900 }, { "epoch": 0.9234188119553547, "grad_norm": 0.08827183395624161, "learning_rate": 2.303186562867656e-06, "loss": 8.5054, "step": 184910 }, { "epoch": 0.9234687507802941, "grad_norm": 0.09339195489883423, "learning_rate": 2.301684647926106e-06, "loss": 8.5159, "step": 184920 }, { "epoch": 0.9235186896052335, "grad_norm": 0.09930474311113358, "learning_rate": 2.3001827329845556e-06, "loss": 8.4914, "step": 184930 }, { "epoch": 0.9235686284301731, "grad_norm": 0.09311912208795547, "learning_rate": 2.298680818043005e-06, "loss": 8.513, "step": 184940 }, { "epoch": 0.9236185672551125, "grad_norm": 0.09054923057556152, "learning_rate": 2.2971789031014542e-06, "loss": 8.5079, "step": 184950 }, { "epoch": 0.9236685060800519, "grad_norm": 0.0914381593465805, "learning_rate": 2.295676988159904e-06, "loss": 8.5086, "step": 184960 }, { "epoch": 0.9237184449049913, "grad_norm": 0.09014654159545898, "learning_rate": 2.2941750732183533e-06, "loss": 8.5171, "step": 184970 }, { "epoch": 0.9237683837299309, "grad_norm": 0.0892617329955101, "learning_rate": 2.292673158276803e-06, "loss": 8.5193, "step": 184980 }, { "epoch": 0.9238183225548703, "grad_norm": 0.09248284995555878, "learning_rate": 2.2911712433352524e-06, "loss": 8.5127, "step": 184990 }, { "epoch": 0.9238682613798097, "grad_norm": 0.08743773400783539, "learning_rate": 2.289669328393702e-06, "loss": 8.523, "step": 185000 }, { "epoch": 0.9239182002047491, "grad_norm": 0.08780092000961304, "learning_rate": 2.2881674134521515e-06, "loss": 8.5022, "step": 185010 }, { "epoch": 0.9239681390296887, "grad_norm": 0.09048131853342056, "learning_rate": 2.286665498510601e-06, "loss": 8.5004, "step": 185020 }, { "epoch": 0.9240180778546281, "grad_norm": 0.089464470744133, "learning_rate": 2.2851635835690506e-06, "loss": 8.51, "step": 185030 }, { "epoch": 0.9240680166795675, "grad_norm": 0.09030899405479431, "learning_rate": 2.2836616686275003e-06, "loss": 8.5044, "step": 185040 }, { "epoch": 0.9241179555045069, "grad_norm": 0.08874811232089996, "learning_rate": 2.2821597536859497e-06, "loss": 8.5056, "step": 185050 }, { "epoch": 0.9241678943294465, "grad_norm": 0.08877728879451752, "learning_rate": 2.280657838744399e-06, "loss": 8.5067, "step": 185060 }, { "epoch": 0.9242178331543859, "grad_norm": 0.09442177414894104, "learning_rate": 2.2791559238028483e-06, "loss": 8.5198, "step": 185070 }, { "epoch": 0.9242677719793253, "grad_norm": 0.09836707264184952, "learning_rate": 2.2776540088612985e-06, "loss": 8.4915, "step": 185080 }, { "epoch": 0.9243177108042647, "grad_norm": 0.08862404525279999, "learning_rate": 2.276152093919748e-06, "loss": 8.4892, "step": 185090 }, { "epoch": 0.9243676496292043, "grad_norm": 0.09015829116106033, "learning_rate": 2.274650178978197e-06, "loss": 8.5203, "step": 185100 }, { "epoch": 0.9244175884541437, "grad_norm": 0.0880509465932846, "learning_rate": 2.2731482640366465e-06, "loss": 8.5141, "step": 185110 }, { "epoch": 0.9244675272790831, "grad_norm": 0.09141357243061066, "learning_rate": 2.2716463490950963e-06, "loss": 8.5056, "step": 185120 }, { "epoch": 0.9245174661040225, "grad_norm": 0.09560871124267578, "learning_rate": 2.270144434153546e-06, "loss": 8.4859, "step": 185130 }, { "epoch": 0.9245674049289621, "grad_norm": 0.09067047387361526, "learning_rate": 2.2686425192119953e-06, "loss": 8.5161, "step": 185140 }, { "epoch": 0.9246173437539015, "grad_norm": 0.08690574020147324, "learning_rate": 2.2671406042704447e-06, "loss": 8.5297, "step": 185150 }, { "epoch": 0.9246672825788409, "grad_norm": 0.09214247763156891, "learning_rate": 2.2656386893288944e-06, "loss": 8.5163, "step": 185160 }, { "epoch": 0.9247172214037803, "grad_norm": 0.08881056308746338, "learning_rate": 2.2641367743873438e-06, "loss": 8.5007, "step": 185170 }, { "epoch": 0.9247671602287199, "grad_norm": 0.09248036891222, "learning_rate": 2.2626348594457935e-06, "loss": 8.4997, "step": 185180 }, { "epoch": 0.9248170990536593, "grad_norm": 0.08973632007837296, "learning_rate": 2.261132944504243e-06, "loss": 8.5202, "step": 185190 }, { "epoch": 0.9248670378785987, "grad_norm": 0.09075845032930374, "learning_rate": 2.2596310295626926e-06, "loss": 8.4875, "step": 185200 }, { "epoch": 0.9249169767035381, "grad_norm": 0.09289133548736572, "learning_rate": 2.258129114621142e-06, "loss": 8.5093, "step": 185210 }, { "epoch": 0.9249669155284777, "grad_norm": 0.0958157405257225, "learning_rate": 2.2566271996795913e-06, "loss": 8.5047, "step": 185220 }, { "epoch": 0.9250168543534171, "grad_norm": 0.09263189136981964, "learning_rate": 2.255125284738041e-06, "loss": 8.5116, "step": 185230 }, { "epoch": 0.9250667931783565, "grad_norm": 0.08901172876358032, "learning_rate": 2.2536233697964908e-06, "loss": 8.4862, "step": 185240 }, { "epoch": 0.9251167320032959, "grad_norm": 0.09158726781606674, "learning_rate": 2.25212145485494e-06, "loss": 8.5025, "step": 185250 }, { "epoch": 0.9251666708282354, "grad_norm": 0.08551083505153656, "learning_rate": 2.2506195399133894e-06, "loss": 8.5062, "step": 185260 }, { "epoch": 0.9252166096531749, "grad_norm": 0.09838327020406723, "learning_rate": 2.2491176249718388e-06, "loss": 8.5046, "step": 185270 }, { "epoch": 0.9252665484781143, "grad_norm": 0.0905366763472557, "learning_rate": 2.247615710030289e-06, "loss": 8.5041, "step": 185280 }, { "epoch": 0.9253164873030537, "grad_norm": 0.09381816536188126, "learning_rate": 2.2461137950887383e-06, "loss": 8.5001, "step": 185290 }, { "epoch": 0.9253664261279932, "grad_norm": 0.09462891519069672, "learning_rate": 2.2446118801471876e-06, "loss": 8.505, "step": 185300 }, { "epoch": 0.9254163649529327, "grad_norm": 0.09308251738548279, "learning_rate": 2.243109965205637e-06, "loss": 8.5048, "step": 185310 }, { "epoch": 0.9254663037778721, "grad_norm": 0.09172924607992172, "learning_rate": 2.2416080502640867e-06, "loss": 8.5165, "step": 185320 }, { "epoch": 0.9255162426028115, "grad_norm": 0.09043785184621811, "learning_rate": 2.2401061353225365e-06, "loss": 8.5263, "step": 185330 }, { "epoch": 0.925566181427751, "grad_norm": 0.08776098489761353, "learning_rate": 2.2386042203809858e-06, "loss": 8.5158, "step": 185340 }, { "epoch": 0.9256161202526905, "grad_norm": 0.09275338798761368, "learning_rate": 2.237102305439435e-06, "loss": 8.5181, "step": 185350 }, { "epoch": 0.9256660590776299, "grad_norm": 0.09385108202695847, "learning_rate": 2.235600390497885e-06, "loss": 8.4931, "step": 185360 }, { "epoch": 0.9257159979025693, "grad_norm": 0.0922423005104065, "learning_rate": 2.2340984755563346e-06, "loss": 8.5035, "step": 185370 }, { "epoch": 0.9257659367275088, "grad_norm": 0.08742625266313553, "learning_rate": 2.232596560614784e-06, "loss": 8.5218, "step": 185380 }, { "epoch": 0.9258158755524483, "grad_norm": 0.08818572014570236, "learning_rate": 2.2310946456732333e-06, "loss": 8.4944, "step": 185390 }, { "epoch": 0.9258658143773877, "grad_norm": 0.0831325426697731, "learning_rate": 2.229592730731683e-06, "loss": 8.5027, "step": 185400 }, { "epoch": 0.9259157532023271, "grad_norm": 0.09564302861690521, "learning_rate": 2.2280908157901324e-06, "loss": 8.485, "step": 185410 }, { "epoch": 0.9259656920272666, "grad_norm": 0.08953244984149933, "learning_rate": 2.226588900848582e-06, "loss": 8.5, "step": 185420 }, { "epoch": 0.9260156308522061, "grad_norm": 0.09346967190504074, "learning_rate": 2.2250869859070315e-06, "loss": 8.5088, "step": 185430 }, { "epoch": 0.9260655696771455, "grad_norm": 0.09185745567083359, "learning_rate": 2.2235850709654812e-06, "loss": 8.5048, "step": 185440 }, { "epoch": 0.9261155085020849, "grad_norm": 0.08855016529560089, "learning_rate": 2.2220831560239305e-06, "loss": 8.5062, "step": 185450 }, { "epoch": 0.9261654473270244, "grad_norm": 0.09117954969406128, "learning_rate": 2.22058124108238e-06, "loss": 8.5, "step": 185460 }, { "epoch": 0.9262153861519639, "grad_norm": 0.09156261384487152, "learning_rate": 2.2190793261408296e-06, "loss": 8.5025, "step": 185470 }, { "epoch": 0.9262653249769033, "grad_norm": 0.09079194068908691, "learning_rate": 2.2175774111992794e-06, "loss": 8.5112, "step": 185480 }, { "epoch": 0.9263152638018427, "grad_norm": 0.09392797201871872, "learning_rate": 2.2160754962577287e-06, "loss": 8.5097, "step": 185490 }, { "epoch": 0.9263652026267822, "grad_norm": 0.09081744402647018, "learning_rate": 2.214573581316178e-06, "loss": 8.5035, "step": 185500 }, { "epoch": 0.9264151414517217, "grad_norm": 0.09045954048633575, "learning_rate": 2.2130716663746274e-06, "loss": 8.4953, "step": 185510 }, { "epoch": 0.9264650802766611, "grad_norm": 0.08934304118156433, "learning_rate": 2.2115697514330776e-06, "loss": 8.4913, "step": 185520 }, { "epoch": 0.9265150191016005, "grad_norm": 0.0947798490524292, "learning_rate": 2.210067836491527e-06, "loss": 8.4957, "step": 185530 }, { "epoch": 0.92656495792654, "grad_norm": 0.09003657102584839, "learning_rate": 2.2085659215499762e-06, "loss": 8.4949, "step": 185540 }, { "epoch": 0.9266148967514795, "grad_norm": 0.08870958536863327, "learning_rate": 2.2070640066084256e-06, "loss": 8.5088, "step": 185550 }, { "epoch": 0.9266648355764189, "grad_norm": 0.09150302410125732, "learning_rate": 2.2055620916668753e-06, "loss": 8.5054, "step": 185560 }, { "epoch": 0.9267147744013583, "grad_norm": 0.08964120596647263, "learning_rate": 2.204060176725325e-06, "loss": 8.5195, "step": 185570 }, { "epoch": 0.9267647132262978, "grad_norm": 0.0831717923283577, "learning_rate": 2.2025582617837744e-06, "loss": 8.5152, "step": 185580 }, { "epoch": 0.9268146520512373, "grad_norm": 0.09838960319757462, "learning_rate": 2.2010563468422237e-06, "loss": 8.4911, "step": 185590 }, { "epoch": 0.9268645908761767, "grad_norm": 0.08773133903741837, "learning_rate": 2.1995544319006735e-06, "loss": 8.5124, "step": 185600 }, { "epoch": 0.9269145297011161, "grad_norm": 0.09229285269975662, "learning_rate": 2.198052516959123e-06, "loss": 8.4904, "step": 185610 }, { "epoch": 0.9269644685260556, "grad_norm": 0.087303526699543, "learning_rate": 2.1965506020175726e-06, "loss": 8.5135, "step": 185620 }, { "epoch": 0.927014407350995, "grad_norm": 0.09083783626556396, "learning_rate": 2.195048687076022e-06, "loss": 8.4835, "step": 185630 }, { "epoch": 0.9270643461759345, "grad_norm": 0.09213201701641083, "learning_rate": 2.1935467721344717e-06, "loss": 8.5163, "step": 185640 }, { "epoch": 0.9271142850008739, "grad_norm": 0.09071563184261322, "learning_rate": 2.192044857192921e-06, "loss": 8.5098, "step": 185650 }, { "epoch": 0.9271642238258134, "grad_norm": 0.08385904878377914, "learning_rate": 2.1905429422513703e-06, "loss": 8.503, "step": 185660 }, { "epoch": 0.9272141626507528, "grad_norm": 0.09102963656187057, "learning_rate": 2.18904102730982e-06, "loss": 8.4991, "step": 185670 }, { "epoch": 0.9272641014756923, "grad_norm": 0.08781067281961441, "learning_rate": 2.18753911236827e-06, "loss": 8.5112, "step": 185680 }, { "epoch": 0.9273140403006317, "grad_norm": 0.09064462780952454, "learning_rate": 2.186037197426719e-06, "loss": 8.5103, "step": 185690 }, { "epoch": 0.9273639791255712, "grad_norm": 0.09395837038755417, "learning_rate": 2.1845352824851685e-06, "loss": 8.5011, "step": 185700 }, { "epoch": 0.9274139179505106, "grad_norm": 0.08788859099149704, "learning_rate": 2.183033367543618e-06, "loss": 8.4993, "step": 185710 }, { "epoch": 0.9274638567754501, "grad_norm": 0.09015844762325287, "learning_rate": 2.181531452602068e-06, "loss": 8.5091, "step": 185720 }, { "epoch": 0.9275137956003895, "grad_norm": 0.08770440518856049, "learning_rate": 2.1800295376605173e-06, "loss": 8.4965, "step": 185730 }, { "epoch": 0.927563734425329, "grad_norm": 0.09498754143714905, "learning_rate": 2.1785276227189667e-06, "loss": 8.4999, "step": 185740 }, { "epoch": 0.9276136732502684, "grad_norm": 0.09174376726150513, "learning_rate": 2.177025707777416e-06, "loss": 8.509, "step": 185750 }, { "epoch": 0.9276636120752079, "grad_norm": 0.08917669951915741, "learning_rate": 2.1755237928358658e-06, "loss": 8.5181, "step": 185760 }, { "epoch": 0.9277135509001473, "grad_norm": 0.0941305160522461, "learning_rate": 2.1740218778943155e-06, "loss": 8.5051, "step": 185770 }, { "epoch": 0.9277634897250868, "grad_norm": 0.09079430997371674, "learning_rate": 2.172519962952765e-06, "loss": 8.515, "step": 185780 }, { "epoch": 0.9278134285500262, "grad_norm": 0.09007134288549423, "learning_rate": 2.171018048011214e-06, "loss": 8.4849, "step": 185790 }, { "epoch": 0.9278633673749657, "grad_norm": 0.09367641806602478, "learning_rate": 2.169516133069664e-06, "loss": 8.5065, "step": 185800 }, { "epoch": 0.9279133061999051, "grad_norm": 0.08902136236429214, "learning_rate": 2.1680142181281133e-06, "loss": 8.5059, "step": 185810 }, { "epoch": 0.9279632450248446, "grad_norm": 0.08988285809755325, "learning_rate": 2.166512303186563e-06, "loss": 8.5104, "step": 185820 }, { "epoch": 0.928013183849784, "grad_norm": 0.09199918806552887, "learning_rate": 2.1650103882450123e-06, "loss": 8.5187, "step": 185830 }, { "epoch": 0.9280631226747235, "grad_norm": 0.08583081513643265, "learning_rate": 2.163508473303462e-06, "loss": 8.4948, "step": 185840 }, { "epoch": 0.9281130614996629, "grad_norm": 0.09111568331718445, "learning_rate": 2.1620065583619114e-06, "loss": 8.5025, "step": 185850 }, { "epoch": 0.9281630003246024, "grad_norm": 0.08850061148405075, "learning_rate": 2.160504643420361e-06, "loss": 8.5003, "step": 185860 }, { "epoch": 0.9282129391495418, "grad_norm": 0.09337007999420166, "learning_rate": 2.1590027284788105e-06, "loss": 8.5119, "step": 185870 }, { "epoch": 0.9282628779744813, "grad_norm": 0.08924791216850281, "learning_rate": 2.1575008135372603e-06, "loss": 8.5174, "step": 185880 }, { "epoch": 0.9283128167994207, "grad_norm": 0.09130074083805084, "learning_rate": 2.1559988985957096e-06, "loss": 8.5219, "step": 185890 }, { "epoch": 0.9283627556243602, "grad_norm": 0.0915064811706543, "learning_rate": 2.154496983654159e-06, "loss": 8.5056, "step": 185900 }, { "epoch": 0.9284126944492996, "grad_norm": 0.09288337826728821, "learning_rate": 2.1529950687126087e-06, "loss": 8.4999, "step": 185910 }, { "epoch": 0.9284626332742391, "grad_norm": 0.08676610141992569, "learning_rate": 2.1514931537710584e-06, "loss": 8.514, "step": 185920 }, { "epoch": 0.9285125720991785, "grad_norm": 0.08682654052972794, "learning_rate": 2.1499912388295078e-06, "loss": 8.5044, "step": 185930 }, { "epoch": 0.9285625109241179, "grad_norm": 0.0899660736322403, "learning_rate": 2.148489323887957e-06, "loss": 8.5023, "step": 185940 }, { "epoch": 0.9286124497490574, "grad_norm": 0.09490537643432617, "learning_rate": 2.1469874089464064e-06, "loss": 8.4954, "step": 185950 }, { "epoch": 0.9286623885739969, "grad_norm": 0.08999119699001312, "learning_rate": 2.1454854940048566e-06, "loss": 8.4927, "step": 185960 }, { "epoch": 0.9287123273989363, "grad_norm": 0.09169169515371323, "learning_rate": 2.143983579063306e-06, "loss": 8.5092, "step": 185970 }, { "epoch": 0.9287622662238757, "grad_norm": 0.09515541046857834, "learning_rate": 2.1424816641217553e-06, "loss": 8.4973, "step": 185980 }, { "epoch": 0.9288122050488152, "grad_norm": 0.09675678610801697, "learning_rate": 2.1409797491802046e-06, "loss": 8.49, "step": 185990 }, { "epoch": 0.9288621438737547, "grad_norm": 0.08823744207620621, "learning_rate": 2.1394778342386544e-06, "loss": 8.512, "step": 186000 }, { "epoch": 0.9289120826986941, "grad_norm": 0.09296853095293045, "learning_rate": 2.137975919297104e-06, "loss": 8.5053, "step": 186010 }, { "epoch": 0.9289620215236335, "grad_norm": 0.08902104943990707, "learning_rate": 2.1364740043555534e-06, "loss": 8.5037, "step": 186020 }, { "epoch": 0.929011960348573, "grad_norm": 0.094472736120224, "learning_rate": 2.1349720894140028e-06, "loss": 8.4983, "step": 186030 }, { "epoch": 0.9290618991735125, "grad_norm": 0.09332358092069626, "learning_rate": 2.1334701744724525e-06, "loss": 8.5144, "step": 186040 }, { "epoch": 0.9291118379984519, "grad_norm": 0.08705665916204453, "learning_rate": 2.131968259530902e-06, "loss": 8.5012, "step": 186050 }, { "epoch": 0.9291617768233913, "grad_norm": 0.08535271137952805, "learning_rate": 2.1304663445893516e-06, "loss": 8.5141, "step": 186060 }, { "epoch": 0.9292117156483308, "grad_norm": 0.09060922265052795, "learning_rate": 2.128964429647801e-06, "loss": 8.507, "step": 186070 }, { "epoch": 0.9292616544732702, "grad_norm": 0.09175438433885574, "learning_rate": 2.1274625147062507e-06, "loss": 8.5066, "step": 186080 }, { "epoch": 0.9293115932982097, "grad_norm": 0.08872448652982712, "learning_rate": 2.1259605997647e-06, "loss": 8.5203, "step": 186090 }, { "epoch": 0.9293615321231491, "grad_norm": 0.0916357934474945, "learning_rate": 2.1244586848231494e-06, "loss": 8.5022, "step": 186100 }, { "epoch": 0.9294114709480886, "grad_norm": 0.08553195744752884, "learning_rate": 2.122956769881599e-06, "loss": 8.5101, "step": 186110 }, { "epoch": 0.929461409773028, "grad_norm": 0.09251870959997177, "learning_rate": 2.121454854940049e-06, "loss": 8.5139, "step": 186120 }, { "epoch": 0.9295113485979675, "grad_norm": 0.0898064374923706, "learning_rate": 2.119952939998498e-06, "loss": 8.5184, "step": 186130 }, { "epoch": 0.9295612874229069, "grad_norm": 0.09209618717432022, "learning_rate": 2.1184510250569475e-06, "loss": 8.5068, "step": 186140 }, { "epoch": 0.9296112262478464, "grad_norm": 0.08947913348674774, "learning_rate": 2.116949110115397e-06, "loss": 8.512, "step": 186150 }, { "epoch": 0.9296611650727858, "grad_norm": 0.08615361899137497, "learning_rate": 2.115447195173847e-06, "loss": 8.5051, "step": 186160 }, { "epoch": 0.9297111038977253, "grad_norm": 0.08832980692386627, "learning_rate": 2.1139452802322964e-06, "loss": 8.5106, "step": 186170 }, { "epoch": 0.9297610427226647, "grad_norm": 0.09645520150661469, "learning_rate": 2.1124433652907457e-06, "loss": 8.4926, "step": 186180 }, { "epoch": 0.9298109815476042, "grad_norm": 0.09340712428092957, "learning_rate": 2.110941450349195e-06, "loss": 8.4987, "step": 186190 }, { "epoch": 0.9298609203725436, "grad_norm": 0.08868103474378586, "learning_rate": 2.109439535407645e-06, "loss": 8.4996, "step": 186200 }, { "epoch": 0.9299108591974831, "grad_norm": 0.09115692973136902, "learning_rate": 2.1079376204660946e-06, "loss": 8.5084, "step": 186210 }, { "epoch": 0.9299607980224225, "grad_norm": 0.0927361398935318, "learning_rate": 2.106435705524544e-06, "loss": 8.5019, "step": 186220 }, { "epoch": 0.930010736847362, "grad_norm": 0.08556483685970306, "learning_rate": 2.1049337905829932e-06, "loss": 8.5105, "step": 186230 }, { "epoch": 0.9300606756723014, "grad_norm": 0.09068150818347931, "learning_rate": 2.103431875641443e-06, "loss": 8.5143, "step": 186240 }, { "epoch": 0.9301106144972409, "grad_norm": 0.09075603634119034, "learning_rate": 2.1019299606998923e-06, "loss": 8.5138, "step": 186250 }, { "epoch": 0.9301605533221803, "grad_norm": 0.09026709944009781, "learning_rate": 2.100428045758342e-06, "loss": 8.5206, "step": 186260 }, { "epoch": 0.9302104921471198, "grad_norm": 0.09280247241258621, "learning_rate": 2.0989261308167914e-06, "loss": 8.5057, "step": 186270 }, { "epoch": 0.9302604309720592, "grad_norm": 0.08981787413358688, "learning_rate": 2.097424215875241e-06, "loss": 8.5119, "step": 186280 }, { "epoch": 0.9303103697969987, "grad_norm": 0.09122589975595474, "learning_rate": 2.0959223009336905e-06, "loss": 8.4926, "step": 186290 }, { "epoch": 0.9303603086219381, "grad_norm": 0.09164507687091827, "learning_rate": 2.09442038599214e-06, "loss": 8.5077, "step": 186300 }, { "epoch": 0.9304102474468776, "grad_norm": 0.089156374335289, "learning_rate": 2.0929184710505896e-06, "loss": 8.5004, "step": 186310 }, { "epoch": 0.930460186271817, "grad_norm": 0.08851976692676544, "learning_rate": 2.0914165561090393e-06, "loss": 8.5076, "step": 186320 }, { "epoch": 0.9305101250967565, "grad_norm": 0.08928805589675903, "learning_rate": 2.0899146411674886e-06, "loss": 8.5166, "step": 186330 }, { "epoch": 0.9305600639216959, "grad_norm": 0.09302154183387756, "learning_rate": 2.088412726225938e-06, "loss": 8.504, "step": 186340 }, { "epoch": 0.9306100027466354, "grad_norm": 0.09193503111600876, "learning_rate": 2.0869108112843873e-06, "loss": 8.5065, "step": 186350 }, { "epoch": 0.9306599415715748, "grad_norm": 0.09432563930749893, "learning_rate": 2.0854088963428375e-06, "loss": 8.5003, "step": 186360 }, { "epoch": 0.9307098803965143, "grad_norm": 0.0901406854391098, "learning_rate": 2.083906981401287e-06, "loss": 8.5008, "step": 186370 }, { "epoch": 0.9307598192214537, "grad_norm": 0.09133270382881165, "learning_rate": 2.082405066459736e-06, "loss": 8.4996, "step": 186380 }, { "epoch": 0.9308097580463932, "grad_norm": 0.08889655023813248, "learning_rate": 2.0809031515181855e-06, "loss": 8.5021, "step": 186390 }, { "epoch": 0.9308596968713326, "grad_norm": 0.08690587431192398, "learning_rate": 2.0794012365766352e-06, "loss": 8.4902, "step": 186400 }, { "epoch": 0.930909635696272, "grad_norm": 0.08836494386196136, "learning_rate": 2.077899321635085e-06, "loss": 8.5141, "step": 186410 }, { "epoch": 0.9309595745212115, "grad_norm": 0.09551975876092911, "learning_rate": 2.0763974066935343e-06, "loss": 8.4937, "step": 186420 }, { "epoch": 0.931009513346151, "grad_norm": 0.08524799346923828, "learning_rate": 2.0748954917519837e-06, "loss": 8.5133, "step": 186430 }, { "epoch": 0.9310594521710904, "grad_norm": 0.09045319259166718, "learning_rate": 2.0733935768104334e-06, "loss": 8.5013, "step": 186440 }, { "epoch": 0.9311093909960299, "grad_norm": 0.08845016360282898, "learning_rate": 2.071891661868883e-06, "loss": 8.5065, "step": 186450 }, { "epoch": 0.9311593298209693, "grad_norm": 0.08880577981472015, "learning_rate": 2.0703897469273325e-06, "loss": 8.4971, "step": 186460 }, { "epoch": 0.9312092686459088, "grad_norm": 0.08805317431688309, "learning_rate": 2.068887831985782e-06, "loss": 8.4976, "step": 186470 }, { "epoch": 0.9312592074708482, "grad_norm": 0.08898577839136124, "learning_rate": 2.0673859170442316e-06, "loss": 8.4908, "step": 186480 }, { "epoch": 0.9313091462957876, "grad_norm": 0.08940175920724869, "learning_rate": 2.065884002102681e-06, "loss": 8.5027, "step": 186490 }, { "epoch": 0.9313590851207271, "grad_norm": 0.08669328689575195, "learning_rate": 2.0643820871611307e-06, "loss": 8.5035, "step": 186500 }, { "epoch": 0.9314090239456666, "grad_norm": 0.08564577996730804, "learning_rate": 2.06288017221958e-06, "loss": 8.4989, "step": 186510 }, { "epoch": 0.931458962770606, "grad_norm": 0.09756655246019363, "learning_rate": 2.0613782572780298e-06, "loss": 8.5036, "step": 186520 }, { "epoch": 0.9315089015955454, "grad_norm": 0.08926945179700851, "learning_rate": 2.059876342336479e-06, "loss": 8.5037, "step": 186530 }, { "epoch": 0.9315588404204849, "grad_norm": 0.08959157764911652, "learning_rate": 2.0583744273949284e-06, "loss": 8.492, "step": 186540 }, { "epoch": 0.9316087792454244, "grad_norm": 0.09301578253507614, "learning_rate": 2.056872512453378e-06, "loss": 8.5004, "step": 186550 }, { "epoch": 0.9316587180703638, "grad_norm": 0.09195592999458313, "learning_rate": 2.055370597511828e-06, "loss": 8.5058, "step": 186560 }, { "epoch": 0.9317086568953032, "grad_norm": 0.08996932953596115, "learning_rate": 2.0538686825702773e-06, "loss": 8.504, "step": 186570 }, { "epoch": 0.9317585957202427, "grad_norm": 0.09332447499036789, "learning_rate": 2.0523667676287266e-06, "loss": 8.4953, "step": 186580 }, { "epoch": 0.9318085345451822, "grad_norm": 0.08750541508197784, "learning_rate": 2.050864852687176e-06, "loss": 8.5028, "step": 186590 }, { "epoch": 0.9318584733701216, "grad_norm": 0.08709759265184402, "learning_rate": 2.049362937745626e-06, "loss": 8.5079, "step": 186600 }, { "epoch": 0.931908412195061, "grad_norm": 0.09282954037189484, "learning_rate": 2.0478610228040754e-06, "loss": 8.5121, "step": 186610 }, { "epoch": 0.9319583510200005, "grad_norm": 0.08732730150222778, "learning_rate": 2.0463591078625248e-06, "loss": 8.5083, "step": 186620 }, { "epoch": 0.93200828984494, "grad_norm": 0.09398452192544937, "learning_rate": 2.044857192920974e-06, "loss": 8.5088, "step": 186630 }, { "epoch": 0.9320582286698794, "grad_norm": 0.09803569316864014, "learning_rate": 2.043355277979424e-06, "loss": 8.5001, "step": 186640 }, { "epoch": 0.9321081674948188, "grad_norm": 0.08622153103351593, "learning_rate": 2.0418533630378736e-06, "loss": 8.5101, "step": 186650 }, { "epoch": 0.9321581063197583, "grad_norm": 0.0959283858537674, "learning_rate": 2.040351448096323e-06, "loss": 8.5072, "step": 186660 }, { "epoch": 0.9322080451446978, "grad_norm": 0.08934350311756134, "learning_rate": 2.0388495331547723e-06, "loss": 8.4896, "step": 186670 }, { "epoch": 0.9322579839696372, "grad_norm": 0.09462158381938934, "learning_rate": 2.037347618213222e-06, "loss": 8.5083, "step": 186680 }, { "epoch": 0.9323079227945766, "grad_norm": 0.09019871056079865, "learning_rate": 2.0358457032716714e-06, "loss": 8.5093, "step": 186690 }, { "epoch": 0.9323578616195161, "grad_norm": 0.0971764400601387, "learning_rate": 2.034343788330121e-06, "loss": 8.5032, "step": 186700 }, { "epoch": 0.9324078004444556, "grad_norm": 0.09243464469909668, "learning_rate": 2.0328418733885704e-06, "loss": 8.5077, "step": 186710 }, { "epoch": 0.932457739269395, "grad_norm": 0.09595301002264023, "learning_rate": 2.03133995844702e-06, "loss": 8.4983, "step": 186720 }, { "epoch": 0.9325076780943344, "grad_norm": 0.09078612178564072, "learning_rate": 2.0298380435054695e-06, "loss": 8.5082, "step": 186730 }, { "epoch": 0.9325576169192739, "grad_norm": 0.09375286102294922, "learning_rate": 2.028336128563919e-06, "loss": 8.518, "step": 186740 }, { "epoch": 0.9326075557442134, "grad_norm": 0.08845958113670349, "learning_rate": 2.0268342136223686e-06, "loss": 8.5098, "step": 186750 }, { "epoch": 0.9326574945691528, "grad_norm": 0.08810067176818848, "learning_rate": 2.0253322986808184e-06, "loss": 8.4912, "step": 186760 }, { "epoch": 0.9327074333940922, "grad_norm": 0.08972468227148056, "learning_rate": 2.0238303837392677e-06, "loss": 8.52, "step": 186770 }, { "epoch": 0.9327573722190317, "grad_norm": 0.09303756058216095, "learning_rate": 2.022328468797717e-06, "loss": 8.5024, "step": 186780 }, { "epoch": 0.9328073110439712, "grad_norm": 0.09837578982114792, "learning_rate": 2.0208265538561664e-06, "loss": 8.5024, "step": 186790 }, { "epoch": 0.9328572498689106, "grad_norm": 0.08584380894899368, "learning_rate": 2.0193246389146165e-06, "loss": 8.5302, "step": 186800 }, { "epoch": 0.93290718869385, "grad_norm": 0.09214811772108078, "learning_rate": 2.017822723973066e-06, "loss": 8.5084, "step": 186810 }, { "epoch": 0.9329571275187895, "grad_norm": 0.08620260655879974, "learning_rate": 2.016320809031515e-06, "loss": 8.5275, "step": 186820 }, { "epoch": 0.933007066343729, "grad_norm": 0.09670999646186829, "learning_rate": 2.0148188940899645e-06, "loss": 8.493, "step": 186830 }, { "epoch": 0.9330570051686684, "grad_norm": 0.08751846104860306, "learning_rate": 2.0133169791484143e-06, "loss": 8.5126, "step": 186840 }, { "epoch": 0.9331069439936078, "grad_norm": 0.0873984843492508, "learning_rate": 2.011815064206864e-06, "loss": 8.5206, "step": 186850 }, { "epoch": 0.9331568828185473, "grad_norm": 0.0933077484369278, "learning_rate": 2.0103131492653134e-06, "loss": 8.5069, "step": 186860 }, { "epoch": 0.9332068216434868, "grad_norm": 0.09100789576768875, "learning_rate": 2.0088112343237627e-06, "loss": 8.5076, "step": 186870 }, { "epoch": 0.9332567604684262, "grad_norm": 0.09495274722576141, "learning_rate": 2.0073093193822125e-06, "loss": 8.5025, "step": 186880 }, { "epoch": 0.9333066992933656, "grad_norm": 0.08778039366006851, "learning_rate": 2.005807404440662e-06, "loss": 8.5001, "step": 186890 }, { "epoch": 0.933356638118305, "grad_norm": 0.09381724148988724, "learning_rate": 2.0043054894991115e-06, "loss": 8.5296, "step": 186900 }, { "epoch": 0.9334065769432445, "grad_norm": 0.08771809935569763, "learning_rate": 2.002803574557561e-06, "loss": 8.5012, "step": 186910 }, { "epoch": 0.933456515768184, "grad_norm": 0.08947025239467621, "learning_rate": 2.0013016596160106e-06, "loss": 8.5203, "step": 186920 }, { "epoch": 0.9335064545931234, "grad_norm": 0.09270720183849335, "learning_rate": 1.99979974467446e-06, "loss": 8.5097, "step": 186930 }, { "epoch": 0.9335563934180628, "grad_norm": 0.09039568901062012, "learning_rate": 1.9982978297329093e-06, "loss": 8.4951, "step": 186940 }, { "epoch": 0.9336063322430023, "grad_norm": 0.09502692520618439, "learning_rate": 1.996795914791359e-06, "loss": 8.4986, "step": 186950 }, { "epoch": 0.9336562710679418, "grad_norm": 0.0894678607583046, "learning_rate": 1.995293999849809e-06, "loss": 8.5048, "step": 186960 }, { "epoch": 0.9337062098928812, "grad_norm": 0.08719871193170547, "learning_rate": 1.993792084908258e-06, "loss": 8.5154, "step": 186970 }, { "epoch": 0.9337561487178206, "grad_norm": 0.09486103057861328, "learning_rate": 1.9922901699667075e-06, "loss": 8.523, "step": 186980 }, { "epoch": 0.9338060875427601, "grad_norm": 0.09258001297712326, "learning_rate": 1.9907882550251572e-06, "loss": 8.5054, "step": 186990 }, { "epoch": 0.9338560263676996, "grad_norm": 0.08995097130537033, "learning_rate": 1.989286340083607e-06, "loss": 8.4992, "step": 187000 }, { "epoch": 0.933905965192639, "grad_norm": 0.09509983658790588, "learning_rate": 1.9877844251420563e-06, "loss": 8.5136, "step": 187010 }, { "epoch": 0.9339559040175784, "grad_norm": 0.09256593883037567, "learning_rate": 1.9862825102005056e-06, "loss": 8.524, "step": 187020 }, { "epoch": 0.9340058428425179, "grad_norm": 0.09413289278745651, "learning_rate": 1.984780595258955e-06, "loss": 8.4867, "step": 187030 }, { "epoch": 0.9340557816674574, "grad_norm": 0.09102970361709595, "learning_rate": 1.983278680317405e-06, "loss": 8.4949, "step": 187040 }, { "epoch": 0.9341057204923968, "grad_norm": 0.09890950471162796, "learning_rate": 1.9817767653758545e-06, "loss": 8.506, "step": 187050 }, { "epoch": 0.9341556593173362, "grad_norm": 0.09198985993862152, "learning_rate": 1.980274850434304e-06, "loss": 8.5197, "step": 187060 }, { "epoch": 0.9342055981422757, "grad_norm": 0.09256508946418762, "learning_rate": 1.978772935492753e-06, "loss": 8.501, "step": 187070 }, { "epoch": 0.9342555369672152, "grad_norm": 0.08654509484767914, "learning_rate": 1.977271020551203e-06, "loss": 8.5061, "step": 187080 }, { "epoch": 0.9343054757921546, "grad_norm": 0.08977734297513962, "learning_rate": 1.9757691056096527e-06, "loss": 8.5108, "step": 187090 }, { "epoch": 0.934355414617094, "grad_norm": 0.08983021229505539, "learning_rate": 1.974267190668102e-06, "loss": 8.4969, "step": 187100 }, { "epoch": 0.9344053534420335, "grad_norm": 0.09085972607135773, "learning_rate": 1.9727652757265513e-06, "loss": 8.5194, "step": 187110 }, { "epoch": 0.934455292266973, "grad_norm": 0.08963587135076523, "learning_rate": 1.971263360785001e-06, "loss": 8.4978, "step": 187120 }, { "epoch": 0.9345052310919124, "grad_norm": 0.08823879063129425, "learning_rate": 1.9697614458434504e-06, "loss": 8.5022, "step": 187130 }, { "epoch": 0.9345551699168518, "grad_norm": 0.08996618539094925, "learning_rate": 1.9682595309019e-06, "loss": 8.5172, "step": 187140 }, { "epoch": 0.9346051087417913, "grad_norm": 0.08975294977426529, "learning_rate": 1.9667576159603495e-06, "loss": 8.4987, "step": 187150 }, { "epoch": 0.9346550475667308, "grad_norm": 0.08694805204868317, "learning_rate": 1.965255701018799e-06, "loss": 8.5241, "step": 187160 }, { "epoch": 0.9347049863916702, "grad_norm": 0.09013424068689346, "learning_rate": 1.9637537860772486e-06, "loss": 8.5266, "step": 187170 }, { "epoch": 0.9347549252166096, "grad_norm": 0.09890510141849518, "learning_rate": 1.962251871135698e-06, "loss": 8.514, "step": 187180 }, { "epoch": 0.934804864041549, "grad_norm": 0.08957573026418686, "learning_rate": 1.9607499561941477e-06, "loss": 8.5077, "step": 187190 }, { "epoch": 0.9348548028664886, "grad_norm": 0.09044113010168076, "learning_rate": 1.959248041252597e-06, "loss": 8.4931, "step": 187200 }, { "epoch": 0.934904741691428, "grad_norm": 0.08695122599601746, "learning_rate": 1.9577461263110467e-06, "loss": 8.4954, "step": 187210 }, { "epoch": 0.9349546805163674, "grad_norm": 0.09316051751375198, "learning_rate": 1.956244211369496e-06, "loss": 8.515, "step": 187220 }, { "epoch": 0.9350046193413069, "grad_norm": 0.0904255136847496, "learning_rate": 1.9547422964279454e-06, "loss": 8.4974, "step": 187230 }, { "epoch": 0.9350545581662464, "grad_norm": 0.09191304445266724, "learning_rate": 1.953240381486395e-06, "loss": 8.5016, "step": 187240 }, { "epoch": 0.9351044969911858, "grad_norm": 0.09369964152574539, "learning_rate": 1.951738466544845e-06, "loss": 8.5113, "step": 187250 }, { "epoch": 0.9351544358161252, "grad_norm": 0.08856088668107986, "learning_rate": 1.9502365516032942e-06, "loss": 8.4985, "step": 187260 }, { "epoch": 0.9352043746410647, "grad_norm": 0.08918076008558273, "learning_rate": 1.9487346366617436e-06, "loss": 8.4963, "step": 187270 }, { "epoch": 0.9352543134660042, "grad_norm": 0.08736125379800797, "learning_rate": 1.947232721720193e-06, "loss": 8.5068, "step": 187280 }, { "epoch": 0.9353042522909436, "grad_norm": 0.08733315765857697, "learning_rate": 1.945730806778643e-06, "loss": 8.5016, "step": 187290 }, { "epoch": 0.935354191115883, "grad_norm": 0.0980258360505104, "learning_rate": 1.9442288918370924e-06, "loss": 8.4994, "step": 187300 }, { "epoch": 0.9354041299408224, "grad_norm": 0.09128105640411377, "learning_rate": 1.9427269768955418e-06, "loss": 8.508, "step": 187310 }, { "epoch": 0.935454068765762, "grad_norm": 0.08948434889316559, "learning_rate": 1.941225061953991e-06, "loss": 8.4966, "step": 187320 }, { "epoch": 0.9355040075907014, "grad_norm": 0.09193699061870575, "learning_rate": 1.939723147012441e-06, "loss": 8.4927, "step": 187330 }, { "epoch": 0.9355539464156408, "grad_norm": 0.09148573130369186, "learning_rate": 1.9382212320708906e-06, "loss": 8.4928, "step": 187340 }, { "epoch": 0.9356038852405802, "grad_norm": 0.09217602759599686, "learning_rate": 1.93671931712934e-06, "loss": 8.5084, "step": 187350 }, { "epoch": 0.9356538240655198, "grad_norm": 0.09518623352050781, "learning_rate": 1.9352174021877893e-06, "loss": 8.5043, "step": 187360 }, { "epoch": 0.9357037628904592, "grad_norm": 0.08786571770906448, "learning_rate": 1.933715487246239e-06, "loss": 8.5051, "step": 187370 }, { "epoch": 0.9357537017153986, "grad_norm": 0.0917324647307396, "learning_rate": 1.9322135723046883e-06, "loss": 8.5073, "step": 187380 }, { "epoch": 0.935803640540338, "grad_norm": 0.08800705522298813, "learning_rate": 1.930711657363138e-06, "loss": 8.5151, "step": 187390 }, { "epoch": 0.9358535793652776, "grad_norm": 0.09400518983602524, "learning_rate": 1.9292097424215874e-06, "loss": 8.4974, "step": 187400 }, { "epoch": 0.935903518190217, "grad_norm": 0.08761896938085556, "learning_rate": 1.927707827480037e-06, "loss": 8.507, "step": 187410 }, { "epoch": 0.9359534570151564, "grad_norm": 0.09526966512203217, "learning_rate": 1.9262059125384865e-06, "loss": 8.494, "step": 187420 }, { "epoch": 0.9360033958400958, "grad_norm": 0.08607209473848343, "learning_rate": 1.924703997596936e-06, "loss": 8.5211, "step": 187430 }, { "epoch": 0.9360533346650354, "grad_norm": 0.0938340425491333, "learning_rate": 1.9232020826553856e-06, "loss": 8.5096, "step": 187440 }, { "epoch": 0.9361032734899748, "grad_norm": 0.0961814746260643, "learning_rate": 1.9217001677138354e-06, "loss": 8.5094, "step": 187450 }, { "epoch": 0.9361532123149142, "grad_norm": 0.09486842155456543, "learning_rate": 1.9201982527722847e-06, "loss": 8.5038, "step": 187460 }, { "epoch": 0.9362031511398536, "grad_norm": 0.09420774132013321, "learning_rate": 1.918696337830734e-06, "loss": 8.4886, "step": 187470 }, { "epoch": 0.9362530899647932, "grad_norm": 0.09382013976573944, "learning_rate": 1.9171944228891838e-06, "loss": 8.4905, "step": 187480 }, { "epoch": 0.9363030287897326, "grad_norm": 0.08685174584388733, "learning_rate": 1.9156925079476335e-06, "loss": 8.5125, "step": 187490 }, { "epoch": 0.936352967614672, "grad_norm": 0.09120272099971771, "learning_rate": 1.914190593006083e-06, "loss": 8.5024, "step": 187500 }, { "epoch": 0.9364029064396114, "grad_norm": 0.09364549070596695, "learning_rate": 1.912688678064532e-06, "loss": 8.5126, "step": 187510 }, { "epoch": 0.936452845264551, "grad_norm": 0.0939672514796257, "learning_rate": 1.9111867631229815e-06, "loss": 8.4961, "step": 187520 }, { "epoch": 0.9365027840894904, "grad_norm": 0.08717799186706543, "learning_rate": 1.9096848481814317e-06, "loss": 8.5275, "step": 187530 }, { "epoch": 0.9365527229144298, "grad_norm": 0.09963074326515198, "learning_rate": 1.908182933239881e-06, "loss": 8.5056, "step": 187540 }, { "epoch": 0.9366026617393692, "grad_norm": 0.08945607393980026, "learning_rate": 1.9066810182983304e-06, "loss": 8.4934, "step": 187550 }, { "epoch": 0.9366526005643088, "grad_norm": 0.093963123857975, "learning_rate": 1.9051791033567797e-06, "loss": 8.5153, "step": 187560 }, { "epoch": 0.9367025393892482, "grad_norm": 0.09400065243244171, "learning_rate": 1.9036771884152297e-06, "loss": 8.4959, "step": 187570 }, { "epoch": 0.9367524782141876, "grad_norm": 0.09188316017389297, "learning_rate": 1.902175273473679e-06, "loss": 8.5011, "step": 187580 }, { "epoch": 0.936802417039127, "grad_norm": 0.09157782793045044, "learning_rate": 1.9006733585321283e-06, "loss": 8.4999, "step": 187590 }, { "epoch": 0.9368523558640666, "grad_norm": 0.08901472389698029, "learning_rate": 1.8991714435905779e-06, "loss": 8.5101, "step": 187600 }, { "epoch": 0.936902294689006, "grad_norm": 0.08923201262950897, "learning_rate": 1.8976695286490276e-06, "loss": 8.4988, "step": 187610 }, { "epoch": 0.9369522335139454, "grad_norm": 0.09177745878696442, "learning_rate": 1.8961676137074772e-06, "loss": 8.5018, "step": 187620 }, { "epoch": 0.9370021723388848, "grad_norm": 0.09261074662208557, "learning_rate": 1.8946656987659265e-06, "loss": 8.5063, "step": 187630 }, { "epoch": 0.9370521111638244, "grad_norm": 0.09517964720726013, "learning_rate": 1.893163783824376e-06, "loss": 8.4984, "step": 187640 }, { "epoch": 0.9371020499887638, "grad_norm": 0.09262313693761826, "learning_rate": 1.8916618688828258e-06, "loss": 8.4988, "step": 187650 }, { "epoch": 0.9371519888137032, "grad_norm": 0.08536347001791, "learning_rate": 1.8901599539412751e-06, "loss": 8.5007, "step": 187660 }, { "epoch": 0.9372019276386426, "grad_norm": 0.08875380456447601, "learning_rate": 1.8886580389997247e-06, "loss": 8.5065, "step": 187670 }, { "epoch": 0.9372518664635822, "grad_norm": 0.08886587619781494, "learning_rate": 1.887156124058174e-06, "loss": 8.5045, "step": 187680 }, { "epoch": 0.9373018052885216, "grad_norm": 0.09413565695285797, "learning_rate": 1.885654209116624e-06, "loss": 8.5019, "step": 187690 }, { "epoch": 0.937351744113461, "grad_norm": 0.0883907675743103, "learning_rate": 1.8841522941750733e-06, "loss": 8.4836, "step": 187700 }, { "epoch": 0.9374016829384004, "grad_norm": 0.09106942266225815, "learning_rate": 1.8826503792335226e-06, "loss": 8.5187, "step": 187710 }, { "epoch": 0.93745162176334, "grad_norm": 0.08953803032636642, "learning_rate": 1.8811484642919722e-06, "loss": 8.505, "step": 187720 }, { "epoch": 0.9375015605882794, "grad_norm": 0.09178190678358078, "learning_rate": 1.879646549350422e-06, "loss": 8.5085, "step": 187730 }, { "epoch": 0.9375514994132188, "grad_norm": 0.09529706090688705, "learning_rate": 1.8781446344088715e-06, "loss": 8.502, "step": 187740 }, { "epoch": 0.9376014382381582, "grad_norm": 0.08537425100803375, "learning_rate": 1.8766427194673208e-06, "loss": 8.4914, "step": 187750 }, { "epoch": 0.9376513770630978, "grad_norm": 0.0965605229139328, "learning_rate": 1.8751408045257701e-06, "loss": 8.5017, "step": 187760 }, { "epoch": 0.9377013158880372, "grad_norm": 0.08984053879976273, "learning_rate": 1.8736388895842199e-06, "loss": 8.5113, "step": 187770 }, { "epoch": 0.9377512547129766, "grad_norm": 0.0879005640745163, "learning_rate": 1.8721369746426694e-06, "loss": 8.5056, "step": 187780 }, { "epoch": 0.937801193537916, "grad_norm": 0.09019943326711655, "learning_rate": 1.870635059701119e-06, "loss": 8.4996, "step": 187790 }, { "epoch": 0.9378511323628556, "grad_norm": 0.08988826721906662, "learning_rate": 1.8691331447595685e-06, "loss": 8.5003, "step": 187800 }, { "epoch": 0.937901071187795, "grad_norm": 0.0893273875117302, "learning_rate": 1.8676312298180178e-06, "loss": 8.5022, "step": 187810 }, { "epoch": 0.9379510100127344, "grad_norm": 0.09221018105745316, "learning_rate": 1.8661293148764676e-06, "loss": 8.5128, "step": 187820 }, { "epoch": 0.9380009488376738, "grad_norm": 0.09288901835680008, "learning_rate": 1.864627399934917e-06, "loss": 8.4876, "step": 187830 }, { "epoch": 0.9380508876626134, "grad_norm": 0.09186063706874847, "learning_rate": 1.8631254849933667e-06, "loss": 8.4982, "step": 187840 }, { "epoch": 0.9381008264875528, "grad_norm": 0.09561605751514435, "learning_rate": 1.861623570051816e-06, "loss": 8.507, "step": 187850 }, { "epoch": 0.9381507653124922, "grad_norm": 0.09141556173563004, "learning_rate": 1.8601216551102656e-06, "loss": 8.5184, "step": 187860 }, { "epoch": 0.9382007041374316, "grad_norm": 0.08741176128387451, "learning_rate": 1.858619740168715e-06, "loss": 8.5107, "step": 187870 }, { "epoch": 0.9382506429623712, "grad_norm": 0.0920015349984169, "learning_rate": 1.8571178252271646e-06, "loss": 8.5134, "step": 187880 }, { "epoch": 0.9383005817873106, "grad_norm": 0.09082834422588348, "learning_rate": 1.8556159102856142e-06, "loss": 8.5058, "step": 187890 }, { "epoch": 0.93835052061225, "grad_norm": 0.093767449259758, "learning_rate": 1.8541139953440637e-06, "loss": 8.5058, "step": 187900 }, { "epoch": 0.9384004594371894, "grad_norm": 0.08792780339717865, "learning_rate": 1.8526120804025133e-06, "loss": 8.5191, "step": 187910 }, { "epoch": 0.9384503982621288, "grad_norm": 0.08794878423213959, "learning_rate": 1.8511101654609628e-06, "loss": 8.5125, "step": 187920 }, { "epoch": 0.9385003370870684, "grad_norm": 0.08979849517345428, "learning_rate": 1.8496082505194122e-06, "loss": 8.5039, "step": 187930 }, { "epoch": 0.9385502759120078, "grad_norm": 0.08824494481086731, "learning_rate": 1.848106335577862e-06, "loss": 8.4959, "step": 187940 }, { "epoch": 0.9386002147369472, "grad_norm": 0.08635149151086807, "learning_rate": 1.8466044206363112e-06, "loss": 8.4952, "step": 187950 }, { "epoch": 0.9386501535618866, "grad_norm": 0.09149076044559479, "learning_rate": 1.845102505694761e-06, "loss": 8.4943, "step": 187960 }, { "epoch": 0.9387000923868262, "grad_norm": 0.0918533205986023, "learning_rate": 1.8436005907532103e-06, "loss": 8.5059, "step": 187970 }, { "epoch": 0.9387500312117656, "grad_norm": 0.09328784048557281, "learning_rate": 1.8420986758116599e-06, "loss": 8.494, "step": 187980 }, { "epoch": 0.938799970036705, "grad_norm": 0.087800532579422, "learning_rate": 1.8405967608701094e-06, "loss": 8.5055, "step": 187990 }, { "epoch": 0.9388499088616444, "grad_norm": 0.0951266810297966, "learning_rate": 1.839094845928559e-06, "loss": 8.4924, "step": 188000 }, { "epoch": 0.938899847686584, "grad_norm": 0.0940432995557785, "learning_rate": 1.8375929309870085e-06, "loss": 8.4983, "step": 188010 }, { "epoch": 0.9389497865115234, "grad_norm": 0.08336132764816284, "learning_rate": 1.836091016045458e-06, "loss": 8.5174, "step": 188020 }, { "epoch": 0.9389997253364628, "grad_norm": 0.09214475005865097, "learning_rate": 1.8345891011039074e-06, "loss": 8.5017, "step": 188030 }, { "epoch": 0.9390496641614022, "grad_norm": 0.0941125676035881, "learning_rate": 1.8330871861623571e-06, "loss": 8.5126, "step": 188040 }, { "epoch": 0.9390996029863418, "grad_norm": 0.08968133479356766, "learning_rate": 1.8315852712208065e-06, "loss": 8.5143, "step": 188050 }, { "epoch": 0.9391495418112812, "grad_norm": 0.08894607424736023, "learning_rate": 1.8300833562792562e-06, "loss": 8.5021, "step": 188060 }, { "epoch": 0.9391994806362206, "grad_norm": 0.09236035495996475, "learning_rate": 1.8285814413377055e-06, "loss": 8.4871, "step": 188070 }, { "epoch": 0.93924941946116, "grad_norm": 0.09041870385408401, "learning_rate": 1.827079526396155e-06, "loss": 8.4946, "step": 188080 }, { "epoch": 0.9392993582860996, "grad_norm": 0.08861184120178223, "learning_rate": 1.8255776114546046e-06, "loss": 8.485, "step": 188090 }, { "epoch": 0.939349297111039, "grad_norm": 0.09006591141223907, "learning_rate": 1.8240756965130542e-06, "loss": 8.5131, "step": 188100 }, { "epoch": 0.9393992359359784, "grad_norm": 0.09157989919185638, "learning_rate": 1.8225737815715037e-06, "loss": 8.5131, "step": 188110 }, { "epoch": 0.9394491747609178, "grad_norm": 0.09308748692274094, "learning_rate": 1.8210718666299533e-06, "loss": 8.4979, "step": 188120 }, { "epoch": 0.9394991135858574, "grad_norm": 0.10365467518568039, "learning_rate": 1.8195699516884026e-06, "loss": 8.4922, "step": 188130 }, { "epoch": 0.9395490524107968, "grad_norm": 0.08948034048080444, "learning_rate": 1.8180680367468523e-06, "loss": 8.4991, "step": 188140 }, { "epoch": 0.9395989912357362, "grad_norm": 0.08552392572164536, "learning_rate": 1.8165661218053017e-06, "loss": 8.525, "step": 188150 }, { "epoch": 0.9396489300606756, "grad_norm": 0.09680628776550293, "learning_rate": 1.8150642068637514e-06, "loss": 8.5061, "step": 188160 }, { "epoch": 0.9396988688856152, "grad_norm": 0.08866409212350845, "learning_rate": 1.8135622919222008e-06, "loss": 8.5054, "step": 188170 }, { "epoch": 0.9397488077105546, "grad_norm": 0.09396608918905258, "learning_rate": 1.8120603769806505e-06, "loss": 8.515, "step": 188180 }, { "epoch": 0.939798746535494, "grad_norm": 0.08692758530378342, "learning_rate": 1.8105584620390998e-06, "loss": 8.5197, "step": 188190 }, { "epoch": 0.9398486853604334, "grad_norm": 0.0874529555439949, "learning_rate": 1.8090565470975494e-06, "loss": 8.509, "step": 188200 }, { "epoch": 0.939898624185373, "grad_norm": 0.08824202418327332, "learning_rate": 1.807554632155999e-06, "loss": 8.5272, "step": 188210 }, { "epoch": 0.9399485630103124, "grad_norm": 0.0905284509062767, "learning_rate": 1.8060527172144485e-06, "loss": 8.5125, "step": 188220 }, { "epoch": 0.9399985018352518, "grad_norm": 0.0894288644194603, "learning_rate": 1.804550802272898e-06, "loss": 8.4988, "step": 188230 }, { "epoch": 0.9400484406601912, "grad_norm": 0.09066865593194962, "learning_rate": 1.8030488873313476e-06, "loss": 8.5031, "step": 188240 }, { "epoch": 0.9400983794851308, "grad_norm": 0.08707483112812042, "learning_rate": 1.801546972389797e-06, "loss": 8.49, "step": 188250 }, { "epoch": 0.9401483183100702, "grad_norm": 0.08790065348148346, "learning_rate": 1.8000450574482467e-06, "loss": 8.4957, "step": 188260 }, { "epoch": 0.9401982571350096, "grad_norm": 0.09469541907310486, "learning_rate": 1.798543142506696e-06, "loss": 8.5037, "step": 188270 }, { "epoch": 0.940248195959949, "grad_norm": 0.0937683954834938, "learning_rate": 1.7970412275651457e-06, "loss": 8.5359, "step": 188280 }, { "epoch": 0.9402981347848886, "grad_norm": 0.08924166113138199, "learning_rate": 1.795539312623595e-06, "loss": 8.5029, "step": 188290 }, { "epoch": 0.940348073609828, "grad_norm": 0.08660190552473068, "learning_rate": 1.7940373976820446e-06, "loss": 8.5128, "step": 188300 }, { "epoch": 0.9403980124347674, "grad_norm": 0.0949644148349762, "learning_rate": 1.7925354827404942e-06, "loss": 8.5099, "step": 188310 }, { "epoch": 0.9404479512597068, "grad_norm": 0.08653347194194794, "learning_rate": 1.7910335677989437e-06, "loss": 8.4979, "step": 188320 }, { "epoch": 0.9404978900846463, "grad_norm": 0.09062409400939941, "learning_rate": 1.7895316528573932e-06, "loss": 8.5025, "step": 188330 }, { "epoch": 0.9405478289095858, "grad_norm": 0.09127449989318848, "learning_rate": 1.7880297379158428e-06, "loss": 8.4861, "step": 188340 }, { "epoch": 0.9405977677345252, "grad_norm": 0.09160736948251724, "learning_rate": 1.7865278229742921e-06, "loss": 8.498, "step": 188350 }, { "epoch": 0.9406477065594646, "grad_norm": 0.08944227546453476, "learning_rate": 1.7850259080327419e-06, "loss": 8.4942, "step": 188360 }, { "epoch": 0.9406976453844041, "grad_norm": 0.08706548810005188, "learning_rate": 1.7835239930911912e-06, "loss": 8.5159, "step": 188370 }, { "epoch": 0.9407475842093436, "grad_norm": 0.08913147449493408, "learning_rate": 1.782022078149641e-06, "loss": 8.4955, "step": 188380 }, { "epoch": 0.940797523034283, "grad_norm": 0.09037254750728607, "learning_rate": 1.7805201632080903e-06, "loss": 8.5251, "step": 188390 }, { "epoch": 0.9408474618592224, "grad_norm": 0.09032527357339859, "learning_rate": 1.7790182482665398e-06, "loss": 8.522, "step": 188400 }, { "epoch": 0.9408974006841619, "grad_norm": 0.08700371533632278, "learning_rate": 1.7775163333249894e-06, "loss": 8.4951, "step": 188410 }, { "epoch": 0.9409473395091014, "grad_norm": 0.09280155599117279, "learning_rate": 1.776014418383439e-06, "loss": 8.5057, "step": 188420 }, { "epoch": 0.9409972783340408, "grad_norm": 0.09698103368282318, "learning_rate": 1.7745125034418885e-06, "loss": 8.4896, "step": 188430 }, { "epoch": 0.9410472171589802, "grad_norm": 0.09256529062986374, "learning_rate": 1.773010588500338e-06, "loss": 8.5083, "step": 188440 }, { "epoch": 0.9410971559839197, "grad_norm": 0.0863165482878685, "learning_rate": 1.7715086735587875e-06, "loss": 8.5024, "step": 188450 }, { "epoch": 0.9411470948088592, "grad_norm": 0.08784864097833633, "learning_rate": 1.770006758617237e-06, "loss": 8.5086, "step": 188460 }, { "epoch": 0.9411970336337986, "grad_norm": 0.09332052618265152, "learning_rate": 1.7685048436756864e-06, "loss": 8.5082, "step": 188470 }, { "epoch": 0.941246972458738, "grad_norm": 0.08981883525848389, "learning_rate": 1.7670029287341362e-06, "loss": 8.5098, "step": 188480 }, { "epoch": 0.9412969112836775, "grad_norm": 0.08930005878210068, "learning_rate": 1.7655010137925855e-06, "loss": 8.5293, "step": 188490 }, { "epoch": 0.941346850108617, "grad_norm": 0.08967043459415436, "learning_rate": 1.7639990988510353e-06, "loss": 8.5243, "step": 188500 }, { "epoch": 0.9413967889335564, "grad_norm": 0.09381302446126938, "learning_rate": 1.7624971839094846e-06, "loss": 8.4928, "step": 188510 }, { "epoch": 0.9414467277584958, "grad_norm": 0.0840909332036972, "learning_rate": 1.7609952689679341e-06, "loss": 8.5254, "step": 188520 }, { "epoch": 0.9414966665834353, "grad_norm": 0.09190535545349121, "learning_rate": 1.7594933540263837e-06, "loss": 8.5259, "step": 188530 }, { "epoch": 0.9415466054083748, "grad_norm": 0.0981719046831131, "learning_rate": 1.7579914390848332e-06, "loss": 8.5103, "step": 188540 }, { "epoch": 0.9415965442333142, "grad_norm": 0.08787670731544495, "learning_rate": 1.7564895241432828e-06, "loss": 8.5087, "step": 188550 }, { "epoch": 0.9416464830582536, "grad_norm": 0.09270340204238892, "learning_rate": 1.7549876092017323e-06, "loss": 8.5074, "step": 188560 }, { "epoch": 0.9416964218831931, "grad_norm": 0.09285027533769608, "learning_rate": 1.7534856942601816e-06, "loss": 8.5098, "step": 188570 }, { "epoch": 0.9417463607081326, "grad_norm": 0.0908166691660881, "learning_rate": 1.7519837793186314e-06, "loss": 8.4977, "step": 188580 }, { "epoch": 0.941796299533072, "grad_norm": 0.09321475028991699, "learning_rate": 1.7504818643770807e-06, "loss": 8.5135, "step": 188590 }, { "epoch": 0.9418462383580114, "grad_norm": 0.08778084069490433, "learning_rate": 1.7489799494355305e-06, "loss": 8.5213, "step": 188600 }, { "epoch": 0.9418961771829509, "grad_norm": 0.09464406967163086, "learning_rate": 1.7474780344939798e-06, "loss": 8.5029, "step": 188610 }, { "epoch": 0.9419461160078904, "grad_norm": 0.09429196268320084, "learning_rate": 1.7459761195524294e-06, "loss": 8.4987, "step": 188620 }, { "epoch": 0.9419960548328298, "grad_norm": 0.09396494925022125, "learning_rate": 1.744474204610879e-06, "loss": 8.5222, "step": 188630 }, { "epoch": 0.9420459936577692, "grad_norm": 0.08751998841762543, "learning_rate": 1.7429722896693284e-06, "loss": 8.4981, "step": 188640 }, { "epoch": 0.9420959324827087, "grad_norm": 0.08901175856590271, "learning_rate": 1.741470374727778e-06, "loss": 8.5163, "step": 188650 }, { "epoch": 0.9421458713076482, "grad_norm": 0.08800294995307922, "learning_rate": 1.7399684597862275e-06, "loss": 8.5103, "step": 188660 }, { "epoch": 0.9421958101325876, "grad_norm": 0.09194714576005936, "learning_rate": 1.7384665448446769e-06, "loss": 8.4861, "step": 188670 }, { "epoch": 0.942245748957527, "grad_norm": 0.10198995471000671, "learning_rate": 1.7369646299031266e-06, "loss": 8.511, "step": 188680 }, { "epoch": 0.9422956877824665, "grad_norm": 0.09428546577692032, "learning_rate": 1.735462714961576e-06, "loss": 8.5202, "step": 188690 }, { "epoch": 0.942345626607406, "grad_norm": 0.09078855067491531, "learning_rate": 1.7339608000200257e-06, "loss": 8.5044, "step": 188700 }, { "epoch": 0.9423955654323454, "grad_norm": 0.09226953983306885, "learning_rate": 1.732458885078475e-06, "loss": 8.5043, "step": 188710 }, { "epoch": 0.9424455042572848, "grad_norm": 0.09067227691411972, "learning_rate": 1.7309569701369248e-06, "loss": 8.5106, "step": 188720 }, { "epoch": 0.9424954430822243, "grad_norm": 0.09468891471624374, "learning_rate": 1.7294550551953741e-06, "loss": 8.4964, "step": 188730 }, { "epoch": 0.9425453819071637, "grad_norm": 0.0890989601612091, "learning_rate": 1.7279531402538237e-06, "loss": 8.5198, "step": 188740 }, { "epoch": 0.9425953207321032, "grad_norm": 0.09626948088407516, "learning_rate": 1.7264512253122732e-06, "loss": 8.5006, "step": 188750 }, { "epoch": 0.9426452595570426, "grad_norm": 0.08856556564569473, "learning_rate": 1.7249493103707227e-06, "loss": 8.487, "step": 188760 }, { "epoch": 0.9426951983819821, "grad_norm": 0.09216383099555969, "learning_rate": 1.7234473954291723e-06, "loss": 8.5164, "step": 188770 }, { "epoch": 0.9427451372069215, "grad_norm": 0.08988320082426071, "learning_rate": 1.7219454804876218e-06, "loss": 8.5159, "step": 188780 }, { "epoch": 0.942795076031861, "grad_norm": 0.09181145578622818, "learning_rate": 1.7204435655460712e-06, "loss": 8.4966, "step": 188790 }, { "epoch": 0.9428450148568004, "grad_norm": 0.08662037551403046, "learning_rate": 1.718941650604521e-06, "loss": 8.5168, "step": 188800 }, { "epoch": 0.9428949536817399, "grad_norm": 0.09044136106967926, "learning_rate": 1.7174397356629702e-06, "loss": 8.5264, "step": 188810 }, { "epoch": 0.9429448925066793, "grad_norm": 0.09236202389001846, "learning_rate": 1.71593782072142e-06, "loss": 8.4977, "step": 188820 }, { "epoch": 0.9429948313316188, "grad_norm": 0.08900753408670425, "learning_rate": 1.7144359057798693e-06, "loss": 8.5269, "step": 188830 }, { "epoch": 0.9430447701565582, "grad_norm": 0.09463819116353989, "learning_rate": 1.7129339908383189e-06, "loss": 8.5008, "step": 188840 }, { "epoch": 0.9430947089814977, "grad_norm": 0.09038529545068741, "learning_rate": 1.7114320758967684e-06, "loss": 8.5102, "step": 188850 }, { "epoch": 0.9431446478064371, "grad_norm": 0.0910005047917366, "learning_rate": 1.709930160955218e-06, "loss": 8.4995, "step": 188860 }, { "epoch": 0.9431945866313766, "grad_norm": 0.09129851311445236, "learning_rate": 1.7084282460136675e-06, "loss": 8.5172, "step": 188870 }, { "epoch": 0.943244525456316, "grad_norm": 0.09213831275701523, "learning_rate": 1.706926331072117e-06, "loss": 8.5083, "step": 188880 }, { "epoch": 0.9432944642812554, "grad_norm": 0.09839709848165512, "learning_rate": 1.7054244161305664e-06, "loss": 8.4996, "step": 188890 }, { "epoch": 0.9433444031061949, "grad_norm": 0.09194820374250412, "learning_rate": 1.7039225011890161e-06, "loss": 8.493, "step": 188900 }, { "epoch": 0.9433943419311344, "grad_norm": 0.0895412266254425, "learning_rate": 1.7024205862474655e-06, "loss": 8.5171, "step": 188910 }, { "epoch": 0.9434442807560738, "grad_norm": 0.08914782851934433, "learning_rate": 1.7009186713059152e-06, "loss": 8.4905, "step": 188920 }, { "epoch": 0.9434942195810132, "grad_norm": 0.09766605496406555, "learning_rate": 1.6994167563643646e-06, "loss": 8.5128, "step": 188930 }, { "epoch": 0.9435441584059527, "grad_norm": 0.0920957624912262, "learning_rate": 1.697914841422814e-06, "loss": 8.4913, "step": 188940 }, { "epoch": 0.9435940972308922, "grad_norm": 0.08751115202903748, "learning_rate": 1.6964129264812636e-06, "loss": 8.5104, "step": 188950 }, { "epoch": 0.9436440360558316, "grad_norm": 0.09014736860990524, "learning_rate": 1.6949110115397132e-06, "loss": 8.5043, "step": 188960 }, { "epoch": 0.943693974880771, "grad_norm": 0.08869920670986176, "learning_rate": 1.6934090965981627e-06, "loss": 8.4973, "step": 188970 }, { "epoch": 0.9437439137057105, "grad_norm": 0.088557668030262, "learning_rate": 1.6919071816566123e-06, "loss": 8.5082, "step": 188980 }, { "epoch": 0.94379385253065, "grad_norm": 0.08449137955904007, "learning_rate": 1.6904052667150618e-06, "loss": 8.5086, "step": 188990 }, { "epoch": 0.9438437913555894, "grad_norm": 0.08829719573259354, "learning_rate": 1.6889033517735114e-06, "loss": 8.5273, "step": 189000 }, { "epoch": 0.9438937301805288, "grad_norm": 0.09361531585454941, "learning_rate": 1.6874014368319607e-06, "loss": 8.4817, "step": 189010 }, { "epoch": 0.9439436690054683, "grad_norm": 0.09768420457839966, "learning_rate": 1.6858995218904104e-06, "loss": 8.5082, "step": 189020 }, { "epoch": 0.9439936078304078, "grad_norm": 0.0924157053232193, "learning_rate": 1.6843976069488598e-06, "loss": 8.5025, "step": 189030 }, { "epoch": 0.9440435466553472, "grad_norm": 0.09079400449991226, "learning_rate": 1.6828956920073095e-06, "loss": 8.5017, "step": 189040 }, { "epoch": 0.9440934854802866, "grad_norm": 0.09591791033744812, "learning_rate": 1.6813937770657589e-06, "loss": 8.5061, "step": 189050 }, { "epoch": 0.9441434243052261, "grad_norm": 0.0918201357126236, "learning_rate": 1.6798918621242084e-06, "loss": 8.515, "step": 189060 }, { "epoch": 0.9441933631301656, "grad_norm": 0.09003403782844543, "learning_rate": 1.678389947182658e-06, "loss": 8.5041, "step": 189070 }, { "epoch": 0.944243301955105, "grad_norm": 0.09714401513338089, "learning_rate": 1.6768880322411075e-06, "loss": 8.4791, "step": 189080 }, { "epoch": 0.9442932407800444, "grad_norm": 0.08682556450366974, "learning_rate": 1.675386117299557e-06, "loss": 8.5136, "step": 189090 }, { "epoch": 0.9443431796049839, "grad_norm": 0.08617331087589264, "learning_rate": 1.6738842023580066e-06, "loss": 8.5153, "step": 189100 }, { "epoch": 0.9443931184299234, "grad_norm": 0.09631218761205673, "learning_rate": 1.672382287416456e-06, "loss": 8.4938, "step": 189110 }, { "epoch": 0.9444430572548628, "grad_norm": 0.09221839159727097, "learning_rate": 1.6708803724749057e-06, "loss": 8.5093, "step": 189120 }, { "epoch": 0.9444929960798022, "grad_norm": 0.08441857248544693, "learning_rate": 1.669378457533355e-06, "loss": 8.5082, "step": 189130 }, { "epoch": 0.9445429349047417, "grad_norm": 0.0894455835223198, "learning_rate": 1.6678765425918047e-06, "loss": 8.4961, "step": 189140 }, { "epoch": 0.9445928737296811, "grad_norm": 0.08852816373109818, "learning_rate": 1.666374627650254e-06, "loss": 8.4978, "step": 189150 }, { "epoch": 0.9446428125546206, "grad_norm": 0.09122937172651291, "learning_rate": 1.6648727127087036e-06, "loss": 8.5011, "step": 189160 }, { "epoch": 0.94469275137956, "grad_norm": 0.09438908100128174, "learning_rate": 1.6633707977671532e-06, "loss": 8.5027, "step": 189170 }, { "epoch": 0.9447426902044995, "grad_norm": 0.08923160284757614, "learning_rate": 1.6618688828256027e-06, "loss": 8.4992, "step": 189180 }, { "epoch": 0.944792629029439, "grad_norm": 0.08867133408784866, "learning_rate": 1.6603669678840523e-06, "loss": 8.4922, "step": 189190 }, { "epoch": 0.9448425678543784, "grad_norm": 0.0948406532406807, "learning_rate": 1.6588650529425018e-06, "loss": 8.5108, "step": 189200 }, { "epoch": 0.9448925066793178, "grad_norm": 0.08932336419820786, "learning_rate": 1.6573631380009511e-06, "loss": 8.5091, "step": 189210 }, { "epoch": 0.9449424455042573, "grad_norm": 0.09022360295057297, "learning_rate": 1.6558612230594009e-06, "loss": 8.5191, "step": 189220 }, { "epoch": 0.9449923843291967, "grad_norm": 0.09157562255859375, "learning_rate": 1.6543593081178502e-06, "loss": 8.5045, "step": 189230 }, { "epoch": 0.9450423231541362, "grad_norm": 0.09014785289764404, "learning_rate": 1.6528573931763e-06, "loss": 8.5158, "step": 189240 }, { "epoch": 0.9450922619790756, "grad_norm": 0.09535112977027893, "learning_rate": 1.6513554782347493e-06, "loss": 8.5037, "step": 189250 }, { "epoch": 0.9451422008040151, "grad_norm": 0.09707490354776382, "learning_rate": 1.6498535632931988e-06, "loss": 8.5056, "step": 189260 }, { "epoch": 0.9451921396289545, "grad_norm": 0.09138431400060654, "learning_rate": 1.6483516483516484e-06, "loss": 8.4955, "step": 189270 }, { "epoch": 0.945242078453894, "grad_norm": 0.0848042219877243, "learning_rate": 1.646849733410098e-06, "loss": 8.5018, "step": 189280 }, { "epoch": 0.9452920172788334, "grad_norm": 0.08952652662992477, "learning_rate": 1.6453478184685475e-06, "loss": 8.5142, "step": 189290 }, { "epoch": 0.9453419561037729, "grad_norm": 0.09085434675216675, "learning_rate": 1.643845903526997e-06, "loss": 8.5238, "step": 189300 }, { "epoch": 0.9453918949287123, "grad_norm": 0.08714916557073593, "learning_rate": 1.6423439885854466e-06, "loss": 8.531, "step": 189310 }, { "epoch": 0.9454418337536518, "grad_norm": 0.09484901279211044, "learning_rate": 1.640842073643896e-06, "loss": 8.4926, "step": 189320 }, { "epoch": 0.9454917725785912, "grad_norm": 0.08953174948692322, "learning_rate": 1.6393401587023454e-06, "loss": 8.5142, "step": 189330 }, { "epoch": 0.9455417114035307, "grad_norm": 0.09061595797538757, "learning_rate": 1.6378382437607952e-06, "loss": 8.4869, "step": 189340 }, { "epoch": 0.9455916502284701, "grad_norm": 0.08741827309131622, "learning_rate": 1.6363363288192445e-06, "loss": 8.506, "step": 189350 }, { "epoch": 0.9456415890534096, "grad_norm": 0.0936361625790596, "learning_rate": 1.6348344138776943e-06, "loss": 8.4956, "step": 189360 }, { "epoch": 0.945691527878349, "grad_norm": 0.0899646058678627, "learning_rate": 1.6333324989361436e-06, "loss": 8.5156, "step": 189370 }, { "epoch": 0.9457414667032885, "grad_norm": 0.09246868640184402, "learning_rate": 1.6318305839945931e-06, "loss": 8.5201, "step": 189380 }, { "epoch": 0.9457914055282279, "grad_norm": 0.08746492117643356, "learning_rate": 1.6303286690530427e-06, "loss": 8.509, "step": 189390 }, { "epoch": 0.9458413443531674, "grad_norm": 0.09183592349290848, "learning_rate": 1.6288267541114922e-06, "loss": 8.5058, "step": 189400 }, { "epoch": 0.9458912831781068, "grad_norm": 0.088591568171978, "learning_rate": 1.6273248391699418e-06, "loss": 8.5103, "step": 189410 }, { "epoch": 0.9459412220030463, "grad_norm": 0.09180916845798492, "learning_rate": 1.6258229242283913e-06, "loss": 8.505, "step": 189420 }, { "epoch": 0.9459911608279857, "grad_norm": 0.0878295823931694, "learning_rate": 1.6243210092868406e-06, "loss": 8.4924, "step": 189430 }, { "epoch": 0.9460410996529252, "grad_norm": 0.09343598783016205, "learning_rate": 1.6228190943452904e-06, "loss": 8.4996, "step": 189440 }, { "epoch": 0.9460910384778646, "grad_norm": 0.08655836433172226, "learning_rate": 1.6213171794037397e-06, "loss": 8.4907, "step": 189450 }, { "epoch": 0.9461409773028041, "grad_norm": 0.09079290181398392, "learning_rate": 1.6198152644621895e-06, "loss": 8.5051, "step": 189460 }, { "epoch": 0.9461909161277435, "grad_norm": 0.09278516471385956, "learning_rate": 1.6183133495206388e-06, "loss": 8.5262, "step": 189470 }, { "epoch": 0.946240854952683, "grad_norm": 0.0923718810081482, "learning_rate": 1.6168114345790884e-06, "loss": 8.4926, "step": 189480 }, { "epoch": 0.9462907937776224, "grad_norm": 0.0890062153339386, "learning_rate": 1.615309519637538e-06, "loss": 8.5063, "step": 189490 }, { "epoch": 0.9463407326025619, "grad_norm": 0.0915982574224472, "learning_rate": 1.6138076046959875e-06, "loss": 8.5034, "step": 189500 }, { "epoch": 0.9463906714275013, "grad_norm": 0.0939352884888649, "learning_rate": 1.612305689754437e-06, "loss": 8.4948, "step": 189510 }, { "epoch": 0.9464406102524408, "grad_norm": 0.09317021816968918, "learning_rate": 1.6108037748128865e-06, "loss": 8.5007, "step": 189520 }, { "epoch": 0.9464905490773802, "grad_norm": 0.08831048011779785, "learning_rate": 1.609301859871336e-06, "loss": 8.4932, "step": 189530 }, { "epoch": 0.9465404879023197, "grad_norm": 0.1017037183046341, "learning_rate": 1.6077999449297856e-06, "loss": 8.5018, "step": 189540 }, { "epoch": 0.9465904267272591, "grad_norm": 0.09273435175418854, "learning_rate": 1.606298029988235e-06, "loss": 8.4929, "step": 189550 }, { "epoch": 0.9466403655521985, "grad_norm": 0.09123826771974564, "learning_rate": 1.6047961150466847e-06, "loss": 8.4936, "step": 189560 }, { "epoch": 0.946690304377138, "grad_norm": 0.09415683150291443, "learning_rate": 1.603294200105134e-06, "loss": 8.4812, "step": 189570 }, { "epoch": 0.9467402432020775, "grad_norm": 0.08812573552131653, "learning_rate": 1.6017922851635838e-06, "loss": 8.5189, "step": 189580 }, { "epoch": 0.9467901820270169, "grad_norm": 0.0949978157877922, "learning_rate": 1.6002903702220331e-06, "loss": 8.4971, "step": 189590 }, { "epoch": 0.9468401208519563, "grad_norm": 0.09173561632633209, "learning_rate": 1.5987884552804827e-06, "loss": 8.4943, "step": 189600 }, { "epoch": 0.9468900596768958, "grad_norm": 0.09235606342554092, "learning_rate": 1.5972865403389322e-06, "loss": 8.4975, "step": 189610 }, { "epoch": 0.9469399985018353, "grad_norm": 0.09276280552148819, "learning_rate": 1.5957846253973818e-06, "loss": 8.5019, "step": 189620 }, { "epoch": 0.9469899373267747, "grad_norm": 0.09055425226688385, "learning_rate": 1.5942827104558313e-06, "loss": 8.5128, "step": 189630 }, { "epoch": 0.9470398761517141, "grad_norm": 0.09334809333086014, "learning_rate": 1.5927807955142808e-06, "loss": 8.4991, "step": 189640 }, { "epoch": 0.9470898149766536, "grad_norm": 0.09053409099578857, "learning_rate": 1.5912788805727302e-06, "loss": 8.4977, "step": 189650 }, { "epoch": 0.9471397538015931, "grad_norm": 0.09045706689357758, "learning_rate": 1.58977696563118e-06, "loss": 8.4876, "step": 189660 }, { "epoch": 0.9471896926265325, "grad_norm": 0.0899735763669014, "learning_rate": 1.5882750506896293e-06, "loss": 8.5024, "step": 189670 }, { "epoch": 0.9472396314514719, "grad_norm": 0.09022071212530136, "learning_rate": 1.586773135748079e-06, "loss": 8.5059, "step": 189680 }, { "epoch": 0.9472895702764114, "grad_norm": 0.08928265422582626, "learning_rate": 1.5852712208065283e-06, "loss": 8.5079, "step": 189690 }, { "epoch": 0.9473395091013509, "grad_norm": 0.08643801510334015, "learning_rate": 1.5837693058649779e-06, "loss": 8.4948, "step": 189700 }, { "epoch": 0.9473894479262903, "grad_norm": 0.09584169834852219, "learning_rate": 1.5822673909234274e-06, "loss": 8.4735, "step": 189710 }, { "epoch": 0.9474393867512297, "grad_norm": 0.08683201670646667, "learning_rate": 1.580765475981877e-06, "loss": 8.4989, "step": 189720 }, { "epoch": 0.9474893255761692, "grad_norm": 0.08900432288646698, "learning_rate": 1.5792635610403265e-06, "loss": 8.5159, "step": 189730 }, { "epoch": 0.9475392644011087, "grad_norm": 0.0945209488272667, "learning_rate": 1.577761646098776e-06, "loss": 8.4977, "step": 189740 }, { "epoch": 0.9475892032260481, "grad_norm": 0.09035822004079819, "learning_rate": 1.5762597311572254e-06, "loss": 8.4902, "step": 189750 }, { "epoch": 0.9476391420509875, "grad_norm": 0.09136451780796051, "learning_rate": 1.5747578162156751e-06, "loss": 8.5167, "step": 189760 }, { "epoch": 0.947689080875927, "grad_norm": 0.09294602274894714, "learning_rate": 1.5732559012741245e-06, "loss": 8.4919, "step": 189770 }, { "epoch": 0.9477390197008665, "grad_norm": 0.08745280653238297, "learning_rate": 1.5717539863325742e-06, "loss": 8.5084, "step": 189780 }, { "epoch": 0.9477889585258059, "grad_norm": 0.08939157426357269, "learning_rate": 1.5702520713910236e-06, "loss": 8.5243, "step": 189790 }, { "epoch": 0.9478388973507453, "grad_norm": 0.09142594784498215, "learning_rate": 1.5687501564494731e-06, "loss": 8.5088, "step": 189800 }, { "epoch": 0.9478888361756848, "grad_norm": 0.08955658972263336, "learning_rate": 1.5672482415079227e-06, "loss": 8.5227, "step": 189810 }, { "epoch": 0.9479387750006243, "grad_norm": 0.08954078704118729, "learning_rate": 1.5657463265663722e-06, "loss": 8.5083, "step": 189820 }, { "epoch": 0.9479887138255637, "grad_norm": 0.09221227467060089, "learning_rate": 1.5642444116248217e-06, "loss": 8.5135, "step": 189830 }, { "epoch": 0.9480386526505031, "grad_norm": 0.09133052825927734, "learning_rate": 1.5627424966832713e-06, "loss": 8.5026, "step": 189840 }, { "epoch": 0.9480885914754426, "grad_norm": 0.0908413901925087, "learning_rate": 1.5612405817417208e-06, "loss": 8.5082, "step": 189850 }, { "epoch": 0.9481385303003821, "grad_norm": 0.09032785892486572, "learning_rate": 1.5597386668001704e-06, "loss": 8.5031, "step": 189860 }, { "epoch": 0.9481884691253215, "grad_norm": 0.09097219258546829, "learning_rate": 1.5582367518586197e-06, "loss": 8.5025, "step": 189870 }, { "epoch": 0.9482384079502609, "grad_norm": 0.08840753883123398, "learning_rate": 1.5567348369170695e-06, "loss": 8.4975, "step": 189880 }, { "epoch": 0.9482883467752004, "grad_norm": 0.08880984038114548, "learning_rate": 1.5552329219755188e-06, "loss": 8.5018, "step": 189890 }, { "epoch": 0.9483382856001398, "grad_norm": 0.08809378743171692, "learning_rate": 1.5537310070339685e-06, "loss": 8.5057, "step": 189900 }, { "epoch": 0.9483882244250793, "grad_norm": 0.09054802358150482, "learning_rate": 1.5522290920924179e-06, "loss": 8.4954, "step": 189910 }, { "epoch": 0.9484381632500187, "grad_norm": 0.08676068484783173, "learning_rate": 1.5507271771508674e-06, "loss": 8.4857, "step": 189920 }, { "epoch": 0.9484881020749582, "grad_norm": 0.0902305543422699, "learning_rate": 1.549225262209317e-06, "loss": 8.4918, "step": 189930 }, { "epoch": 0.9485380408998976, "grad_norm": 0.08811389654874802, "learning_rate": 1.5477233472677665e-06, "loss": 8.5045, "step": 189940 }, { "epoch": 0.9485879797248371, "grad_norm": 0.08572709560394287, "learning_rate": 1.546221432326216e-06, "loss": 8.5071, "step": 189950 }, { "epoch": 0.9486379185497765, "grad_norm": 0.09473098814487457, "learning_rate": 1.5447195173846656e-06, "loss": 8.5074, "step": 189960 }, { "epoch": 0.948687857374716, "grad_norm": 0.08549437671899796, "learning_rate": 1.543217602443115e-06, "loss": 8.5064, "step": 189970 }, { "epoch": 0.9487377961996554, "grad_norm": 0.08814796805381775, "learning_rate": 1.5417156875015647e-06, "loss": 8.5284, "step": 189980 }, { "epoch": 0.9487877350245949, "grad_norm": 0.0959198847413063, "learning_rate": 1.540213772560014e-06, "loss": 8.501, "step": 189990 }, { "epoch": 0.9488376738495343, "grad_norm": 0.08826687932014465, "learning_rate": 1.5387118576184638e-06, "loss": 8.5092, "step": 190000 }, { "epoch": 0.9488876126744737, "grad_norm": 0.09331593662500381, "learning_rate": 1.537209942676913e-06, "loss": 8.4996, "step": 190010 }, { "epoch": 0.9489375514994132, "grad_norm": 0.09332361817359924, "learning_rate": 1.5357080277353626e-06, "loss": 8.5019, "step": 190020 }, { "epoch": 0.9489874903243527, "grad_norm": 0.09323418140411377, "learning_rate": 1.5342061127938122e-06, "loss": 8.5008, "step": 190030 }, { "epoch": 0.9490374291492921, "grad_norm": 0.09189082682132721, "learning_rate": 1.5327041978522615e-06, "loss": 8.5162, "step": 190040 }, { "epoch": 0.9490873679742315, "grad_norm": 0.0889974981546402, "learning_rate": 1.5312022829107113e-06, "loss": 8.4987, "step": 190050 }, { "epoch": 0.949137306799171, "grad_norm": 0.08503816276788712, "learning_rate": 1.5297003679691606e-06, "loss": 8.5133, "step": 190060 }, { "epoch": 0.9491872456241105, "grad_norm": 0.09503103792667389, "learning_rate": 1.5281984530276103e-06, "loss": 8.4963, "step": 190070 }, { "epoch": 0.9492371844490499, "grad_norm": 0.08845657110214233, "learning_rate": 1.5266965380860597e-06, "loss": 8.4878, "step": 190080 }, { "epoch": 0.9492871232739893, "grad_norm": 0.08706570416688919, "learning_rate": 1.5251946231445092e-06, "loss": 8.5065, "step": 190090 }, { "epoch": 0.9493370620989288, "grad_norm": 0.08456262946128845, "learning_rate": 1.5236927082029588e-06, "loss": 8.502, "step": 190100 }, { "epoch": 0.9493870009238683, "grad_norm": 0.09131083637475967, "learning_rate": 1.5221907932614083e-06, "loss": 8.4949, "step": 190110 }, { "epoch": 0.9494369397488077, "grad_norm": 0.09142878651618958, "learning_rate": 1.5206888783198579e-06, "loss": 8.5019, "step": 190120 }, { "epoch": 0.9494868785737471, "grad_norm": 0.08769478648900986, "learning_rate": 1.5191869633783074e-06, "loss": 8.5095, "step": 190130 }, { "epoch": 0.9495368173986866, "grad_norm": 0.09371474385261536, "learning_rate": 1.5176850484367567e-06, "loss": 8.5074, "step": 190140 }, { "epoch": 0.9495867562236261, "grad_norm": 0.08654850721359253, "learning_rate": 1.5161831334952065e-06, "loss": 8.5099, "step": 190150 }, { "epoch": 0.9496366950485655, "grad_norm": 0.09408658742904663, "learning_rate": 1.5146812185536558e-06, "loss": 8.4846, "step": 190160 }, { "epoch": 0.9496866338735049, "grad_norm": 0.0925004854798317, "learning_rate": 1.5131793036121056e-06, "loss": 8.4927, "step": 190170 }, { "epoch": 0.9497365726984444, "grad_norm": 0.09245412796735764, "learning_rate": 1.511677388670555e-06, "loss": 8.5093, "step": 190180 }, { "epoch": 0.9497865115233839, "grad_norm": 0.0886409729719162, "learning_rate": 1.5101754737290044e-06, "loss": 8.5086, "step": 190190 }, { "epoch": 0.9498364503483233, "grad_norm": 0.08819697797298431, "learning_rate": 1.508673558787454e-06, "loss": 8.4979, "step": 190200 }, { "epoch": 0.9498863891732627, "grad_norm": 0.09439955651760101, "learning_rate": 1.5071716438459035e-06, "loss": 8.5077, "step": 190210 }, { "epoch": 0.9499363279982022, "grad_norm": 0.09136414527893066, "learning_rate": 1.505669728904353e-06, "loss": 8.5202, "step": 190220 }, { "epoch": 0.9499862668231417, "grad_norm": 0.09268520772457123, "learning_rate": 1.5041678139628026e-06, "loss": 8.498, "step": 190230 }, { "epoch": 0.9500362056480811, "grad_norm": 0.08827424794435501, "learning_rate": 1.502665899021252e-06, "loss": 8.5254, "step": 190240 }, { "epoch": 0.9500861444730205, "grad_norm": 0.09765148162841797, "learning_rate": 1.5011639840797017e-06, "loss": 8.4877, "step": 190250 }, { "epoch": 0.95013608329796, "grad_norm": 0.09497714787721634, "learning_rate": 1.499662069138151e-06, "loss": 8.495, "step": 190260 }, { "epoch": 0.9501860221228995, "grad_norm": 0.091663658618927, "learning_rate": 1.4981601541966008e-06, "loss": 8.5161, "step": 190270 }, { "epoch": 0.9502359609478389, "grad_norm": 0.09366423636674881, "learning_rate": 1.4966582392550501e-06, "loss": 8.4835, "step": 190280 }, { "epoch": 0.9502858997727783, "grad_norm": 0.09287559986114502, "learning_rate": 1.4951563243134997e-06, "loss": 8.4897, "step": 190290 }, { "epoch": 0.9503358385977178, "grad_norm": 0.09357742220163345, "learning_rate": 1.4936544093719492e-06, "loss": 8.4976, "step": 190300 }, { "epoch": 0.9503857774226573, "grad_norm": 0.09103827178478241, "learning_rate": 1.4921524944303987e-06, "loss": 8.4842, "step": 190310 }, { "epoch": 0.9504357162475967, "grad_norm": 0.08826170116662979, "learning_rate": 1.4906505794888483e-06, "loss": 8.5147, "step": 190320 }, { "epoch": 0.9504856550725361, "grad_norm": 0.08988435566425323, "learning_rate": 1.4891486645472978e-06, "loss": 8.4985, "step": 190330 }, { "epoch": 0.9505355938974756, "grad_norm": 0.09322842955589294, "learning_rate": 1.4876467496057474e-06, "loss": 8.5058, "step": 190340 }, { "epoch": 0.9505855327224151, "grad_norm": 0.09746944904327393, "learning_rate": 1.486144834664197e-06, "loss": 8.4953, "step": 190350 }, { "epoch": 0.9506354715473545, "grad_norm": 0.08793298900127411, "learning_rate": 1.4846429197226463e-06, "loss": 8.495, "step": 190360 }, { "epoch": 0.9506854103722939, "grad_norm": 0.08952205628156662, "learning_rate": 1.483141004781096e-06, "loss": 8.5206, "step": 190370 }, { "epoch": 0.9507353491972333, "grad_norm": 0.09018826484680176, "learning_rate": 1.4816390898395453e-06, "loss": 8.4989, "step": 190380 }, { "epoch": 0.9507852880221729, "grad_norm": 0.08909120410680771, "learning_rate": 1.480137174897995e-06, "loss": 8.5011, "step": 190390 }, { "epoch": 0.9508352268471123, "grad_norm": 0.0869319885969162, "learning_rate": 1.4786352599564444e-06, "loss": 8.4915, "step": 190400 }, { "epoch": 0.9508851656720517, "grad_norm": 0.0899181067943573, "learning_rate": 1.477133345014894e-06, "loss": 8.4897, "step": 190410 }, { "epoch": 0.9509351044969911, "grad_norm": 0.0868888720870018, "learning_rate": 1.4756314300733435e-06, "loss": 8.5152, "step": 190420 }, { "epoch": 0.9509850433219307, "grad_norm": 0.09073241055011749, "learning_rate": 1.474129515131793e-06, "loss": 8.5101, "step": 190430 }, { "epoch": 0.9510349821468701, "grad_norm": 0.08949577063322067, "learning_rate": 1.4726276001902426e-06, "loss": 8.4997, "step": 190440 }, { "epoch": 0.9510849209718095, "grad_norm": 0.09318594634532928, "learning_rate": 1.4711256852486921e-06, "loss": 8.4928, "step": 190450 }, { "epoch": 0.9511348597967489, "grad_norm": 0.08922556787729263, "learning_rate": 1.4696237703071415e-06, "loss": 8.5018, "step": 190460 }, { "epoch": 0.9511847986216885, "grad_norm": 0.08539392054080963, "learning_rate": 1.4681218553655912e-06, "loss": 8.4915, "step": 190470 }, { "epoch": 0.9512347374466279, "grad_norm": 0.09017814695835114, "learning_rate": 1.4666199404240406e-06, "loss": 8.4938, "step": 190480 }, { "epoch": 0.9512846762715673, "grad_norm": 0.08659885823726654, "learning_rate": 1.4651180254824903e-06, "loss": 8.487, "step": 190490 }, { "epoch": 0.9513346150965067, "grad_norm": 0.09293199330568314, "learning_rate": 1.4636161105409396e-06, "loss": 8.5041, "step": 190500 }, { "epoch": 0.9513845539214463, "grad_norm": 0.08663637191057205, "learning_rate": 1.4621141955993892e-06, "loss": 8.49, "step": 190510 }, { "epoch": 0.9514344927463857, "grad_norm": 0.09226275235414505, "learning_rate": 1.4606122806578387e-06, "loss": 8.4864, "step": 190520 }, { "epoch": 0.9514844315713251, "grad_norm": 0.09186282753944397, "learning_rate": 1.4591103657162883e-06, "loss": 8.5054, "step": 190530 }, { "epoch": 0.9515343703962645, "grad_norm": 0.09182189404964447, "learning_rate": 1.4576084507747378e-06, "loss": 8.5114, "step": 190540 }, { "epoch": 0.9515843092212041, "grad_norm": 0.09429624676704407, "learning_rate": 1.4561065358331874e-06, "loss": 8.5163, "step": 190550 }, { "epoch": 0.9516342480461435, "grad_norm": 0.09003430604934692, "learning_rate": 1.4546046208916367e-06, "loss": 8.4997, "step": 190560 }, { "epoch": 0.9516841868710829, "grad_norm": 0.0899423211812973, "learning_rate": 1.4531027059500864e-06, "loss": 8.5114, "step": 190570 }, { "epoch": 0.9517341256960223, "grad_norm": 0.0908636823296547, "learning_rate": 1.4516007910085358e-06, "loss": 8.5043, "step": 190580 }, { "epoch": 0.9517840645209619, "grad_norm": 0.095972940325737, "learning_rate": 1.4500988760669855e-06, "loss": 8.5164, "step": 190590 }, { "epoch": 0.9518340033459013, "grad_norm": 0.09233302623033524, "learning_rate": 1.4485969611254349e-06, "loss": 8.5112, "step": 190600 }, { "epoch": 0.9518839421708407, "grad_norm": 0.0877123698592186, "learning_rate": 1.4470950461838844e-06, "loss": 8.5003, "step": 190610 }, { "epoch": 0.9519338809957801, "grad_norm": 0.08805521577596664, "learning_rate": 1.445593131242334e-06, "loss": 8.5093, "step": 190620 }, { "epoch": 0.9519838198207197, "grad_norm": 0.09961655735969543, "learning_rate": 1.4440912163007835e-06, "loss": 8.4953, "step": 190630 }, { "epoch": 0.9520337586456591, "grad_norm": 0.09043627232313156, "learning_rate": 1.442589301359233e-06, "loss": 8.4995, "step": 190640 }, { "epoch": 0.9520836974705985, "grad_norm": 0.08915705978870392, "learning_rate": 1.4410873864176826e-06, "loss": 8.502, "step": 190650 }, { "epoch": 0.9521336362955379, "grad_norm": 0.08573364466428757, "learning_rate": 1.4395854714761321e-06, "loss": 8.5038, "step": 190660 }, { "epoch": 0.9521835751204775, "grad_norm": 0.08786933869123459, "learning_rate": 1.4380835565345817e-06, "loss": 8.5091, "step": 190670 }, { "epoch": 0.9522335139454169, "grad_norm": 0.08725972473621368, "learning_rate": 1.436581641593031e-06, "loss": 8.4832, "step": 190680 }, { "epoch": 0.9522834527703563, "grad_norm": 0.08919131010770798, "learning_rate": 1.4350797266514807e-06, "loss": 8.4989, "step": 190690 }, { "epoch": 0.9523333915952957, "grad_norm": 0.08816230297088623, "learning_rate": 1.43357781170993e-06, "loss": 8.4939, "step": 190700 }, { "epoch": 0.9523833304202353, "grad_norm": 0.08805028349161148, "learning_rate": 1.4320758967683798e-06, "loss": 8.5108, "step": 190710 }, { "epoch": 0.9524332692451747, "grad_norm": 0.08876121044158936, "learning_rate": 1.4305739818268292e-06, "loss": 8.5145, "step": 190720 }, { "epoch": 0.9524832080701141, "grad_norm": 0.09014929085969925, "learning_rate": 1.4290720668852787e-06, "loss": 8.5085, "step": 190730 }, { "epoch": 0.9525331468950535, "grad_norm": 0.09153331071138382, "learning_rate": 1.4275701519437283e-06, "loss": 8.4938, "step": 190740 }, { "epoch": 0.9525830857199931, "grad_norm": 0.09297018498182297, "learning_rate": 1.4260682370021778e-06, "loss": 8.5057, "step": 190750 }, { "epoch": 0.9526330245449325, "grad_norm": 0.09277532249689102, "learning_rate": 1.4245663220606273e-06, "loss": 8.5036, "step": 190760 }, { "epoch": 0.9526829633698719, "grad_norm": 0.08956871181726456, "learning_rate": 1.4230644071190769e-06, "loss": 8.4772, "step": 190770 }, { "epoch": 0.9527329021948113, "grad_norm": 0.09383154660463333, "learning_rate": 1.4215624921775262e-06, "loss": 8.5021, "step": 190780 }, { "epoch": 0.9527828410197509, "grad_norm": 0.08647419512271881, "learning_rate": 1.420060577235976e-06, "loss": 8.4969, "step": 190790 }, { "epoch": 0.9528327798446903, "grad_norm": 0.08593643456697464, "learning_rate": 1.4185586622944253e-06, "loss": 8.496, "step": 190800 }, { "epoch": 0.9528827186696297, "grad_norm": 0.09408606588840485, "learning_rate": 1.417056747352875e-06, "loss": 8.4957, "step": 190810 }, { "epoch": 0.9529326574945691, "grad_norm": 0.08725640922784805, "learning_rate": 1.4155548324113244e-06, "loss": 8.5259, "step": 190820 }, { "epoch": 0.9529825963195087, "grad_norm": 0.09070757776498795, "learning_rate": 1.414052917469774e-06, "loss": 8.4818, "step": 190830 }, { "epoch": 0.9530325351444481, "grad_norm": 0.08983830362558365, "learning_rate": 1.4125510025282235e-06, "loss": 8.4856, "step": 190840 }, { "epoch": 0.9530824739693875, "grad_norm": 0.08678752928972244, "learning_rate": 1.411049087586673e-06, "loss": 8.513, "step": 190850 }, { "epoch": 0.9531324127943269, "grad_norm": 0.09017140418291092, "learning_rate": 1.4095471726451226e-06, "loss": 8.518, "step": 190860 }, { "epoch": 0.9531823516192663, "grad_norm": 0.09126543253660202, "learning_rate": 1.408045257703572e-06, "loss": 8.508, "step": 190870 }, { "epoch": 0.9532322904442059, "grad_norm": 0.09493157267570496, "learning_rate": 1.4065433427620216e-06, "loss": 8.5058, "step": 190880 }, { "epoch": 0.9532822292691453, "grad_norm": 0.09498272091150284, "learning_rate": 1.4050414278204712e-06, "loss": 8.5069, "step": 190890 }, { "epoch": 0.9533321680940847, "grad_norm": 0.08452911674976349, "learning_rate": 1.4035395128789205e-06, "loss": 8.5176, "step": 190900 }, { "epoch": 0.9533821069190241, "grad_norm": 0.08910375088453293, "learning_rate": 1.4020375979373703e-06, "loss": 8.5208, "step": 190910 }, { "epoch": 0.9534320457439637, "grad_norm": 0.10094549506902695, "learning_rate": 1.4005356829958196e-06, "loss": 8.5057, "step": 190920 }, { "epoch": 0.9534819845689031, "grad_norm": 0.08806487917900085, "learning_rate": 1.3990337680542694e-06, "loss": 8.4974, "step": 190930 }, { "epoch": 0.9535319233938425, "grad_norm": 0.09561792016029358, "learning_rate": 1.3975318531127187e-06, "loss": 8.4931, "step": 190940 }, { "epoch": 0.9535818622187819, "grad_norm": 0.08623037487268448, "learning_rate": 1.3960299381711682e-06, "loss": 8.496, "step": 190950 }, { "epoch": 0.9536318010437215, "grad_norm": 0.08998066931962967, "learning_rate": 1.3945280232296178e-06, "loss": 8.5056, "step": 190960 }, { "epoch": 0.9536817398686609, "grad_norm": 0.10207744687795639, "learning_rate": 1.3930261082880673e-06, "loss": 8.4875, "step": 190970 }, { "epoch": 0.9537316786936003, "grad_norm": 0.09375947713851929, "learning_rate": 1.3915241933465169e-06, "loss": 8.4791, "step": 190980 }, { "epoch": 0.9537816175185397, "grad_norm": 0.09087695181369781, "learning_rate": 1.3900222784049664e-06, "loss": 8.4961, "step": 190990 }, { "epoch": 0.9538315563434793, "grad_norm": 0.09509552270174026, "learning_rate": 1.3885203634634157e-06, "loss": 8.4978, "step": 191000 }, { "epoch": 0.9538814951684187, "grad_norm": 0.08731069415807724, "learning_rate": 1.3870184485218655e-06, "loss": 8.5142, "step": 191010 }, { "epoch": 0.9539314339933581, "grad_norm": 0.09380168467760086, "learning_rate": 1.3855165335803148e-06, "loss": 8.4932, "step": 191020 }, { "epoch": 0.9539813728182975, "grad_norm": 0.09554290771484375, "learning_rate": 1.3840146186387646e-06, "loss": 8.4965, "step": 191030 }, { "epoch": 0.9540313116432371, "grad_norm": 0.09323696792125702, "learning_rate": 1.382512703697214e-06, "loss": 8.509, "step": 191040 }, { "epoch": 0.9540812504681765, "grad_norm": 0.09462062269449234, "learning_rate": 1.3810107887556635e-06, "loss": 8.5129, "step": 191050 }, { "epoch": 0.9541311892931159, "grad_norm": 0.08865117281675339, "learning_rate": 1.379508873814113e-06, "loss": 8.5054, "step": 191060 }, { "epoch": 0.9541811281180553, "grad_norm": 0.09803714603185654, "learning_rate": 1.3780069588725625e-06, "loss": 8.5022, "step": 191070 }, { "epoch": 0.9542310669429949, "grad_norm": 0.09523754566907883, "learning_rate": 1.376505043931012e-06, "loss": 8.4978, "step": 191080 }, { "epoch": 0.9542810057679343, "grad_norm": 0.09519781172275543, "learning_rate": 1.3750031289894616e-06, "loss": 8.4971, "step": 191090 }, { "epoch": 0.9543309445928737, "grad_norm": 0.09077487140893936, "learning_rate": 1.373501214047911e-06, "loss": 8.5253, "step": 191100 }, { "epoch": 0.9543808834178131, "grad_norm": 0.08766099810600281, "learning_rate": 1.3719992991063607e-06, "loss": 8.4914, "step": 191110 }, { "epoch": 0.9544308222427527, "grad_norm": 0.08919545263051987, "learning_rate": 1.37049738416481e-06, "loss": 8.4901, "step": 191120 }, { "epoch": 0.9544807610676921, "grad_norm": 0.091376394033432, "learning_rate": 1.3689954692232598e-06, "loss": 8.4782, "step": 191130 }, { "epoch": 0.9545306998926315, "grad_norm": 0.09148900955915451, "learning_rate": 1.3674935542817091e-06, "loss": 8.5037, "step": 191140 }, { "epoch": 0.9545806387175709, "grad_norm": 0.08681777864694595, "learning_rate": 1.3659916393401587e-06, "loss": 8.4946, "step": 191150 }, { "epoch": 0.9546305775425105, "grad_norm": 0.09286382794380188, "learning_rate": 1.3644897243986082e-06, "loss": 8.4937, "step": 191160 }, { "epoch": 0.9546805163674499, "grad_norm": 0.08869923651218414, "learning_rate": 1.3629878094570578e-06, "loss": 8.5241, "step": 191170 }, { "epoch": 0.9547304551923893, "grad_norm": 0.09262396395206451, "learning_rate": 1.3614858945155073e-06, "loss": 8.4772, "step": 191180 }, { "epoch": 0.9547803940173287, "grad_norm": 0.08976420015096664, "learning_rate": 1.3599839795739568e-06, "loss": 8.5162, "step": 191190 }, { "epoch": 0.9548303328422683, "grad_norm": 0.08680476248264313, "learning_rate": 1.3584820646324064e-06, "loss": 8.5119, "step": 191200 }, { "epoch": 0.9548802716672077, "grad_norm": 0.09427861869335175, "learning_rate": 1.356980149690856e-06, "loss": 8.5139, "step": 191210 }, { "epoch": 0.9549302104921471, "grad_norm": 0.08783675730228424, "learning_rate": 1.3554782347493053e-06, "loss": 8.5041, "step": 191220 }, { "epoch": 0.9549801493170865, "grad_norm": 0.09209182858467102, "learning_rate": 1.353976319807755e-06, "loss": 8.5103, "step": 191230 }, { "epoch": 0.9550300881420261, "grad_norm": 0.08795662224292755, "learning_rate": 1.3524744048662043e-06, "loss": 8.5108, "step": 191240 }, { "epoch": 0.9550800269669655, "grad_norm": 0.09797421097755432, "learning_rate": 1.350972489924654e-06, "loss": 8.4915, "step": 191250 }, { "epoch": 0.9551299657919049, "grad_norm": 0.0975293442606926, "learning_rate": 1.3494705749831034e-06, "loss": 8.5011, "step": 191260 }, { "epoch": 0.9551799046168443, "grad_norm": 0.09412509948015213, "learning_rate": 1.347968660041553e-06, "loss": 8.5011, "step": 191270 }, { "epoch": 0.9552298434417839, "grad_norm": 0.09029638022184372, "learning_rate": 1.3464667451000025e-06, "loss": 8.5052, "step": 191280 }, { "epoch": 0.9552797822667233, "grad_norm": 0.09370943903923035, "learning_rate": 1.344964830158452e-06, "loss": 8.4932, "step": 191290 }, { "epoch": 0.9553297210916627, "grad_norm": 0.09150079637765884, "learning_rate": 1.3434629152169016e-06, "loss": 8.5044, "step": 191300 }, { "epoch": 0.9553796599166021, "grad_norm": 0.09412145614624023, "learning_rate": 1.3419610002753511e-06, "loss": 8.5145, "step": 191310 }, { "epoch": 0.9554295987415417, "grad_norm": 0.09421419352293015, "learning_rate": 1.3404590853338005e-06, "loss": 8.5175, "step": 191320 }, { "epoch": 0.9554795375664811, "grad_norm": 0.08627758920192719, "learning_rate": 1.3389571703922502e-06, "loss": 8.497, "step": 191330 }, { "epoch": 0.9555294763914205, "grad_norm": 0.0896778404712677, "learning_rate": 1.3374552554506996e-06, "loss": 8.4944, "step": 191340 }, { "epoch": 0.9555794152163599, "grad_norm": 0.09137307852506638, "learning_rate": 1.3359533405091493e-06, "loss": 8.4975, "step": 191350 }, { "epoch": 0.9556293540412995, "grad_norm": 0.0910387709736824, "learning_rate": 1.3344514255675987e-06, "loss": 8.5027, "step": 191360 }, { "epoch": 0.9556792928662389, "grad_norm": 0.09388083219528198, "learning_rate": 1.3329495106260482e-06, "loss": 8.5204, "step": 191370 }, { "epoch": 0.9557292316911783, "grad_norm": 0.0911535695195198, "learning_rate": 1.3314475956844977e-06, "loss": 8.5079, "step": 191380 }, { "epoch": 0.9557791705161177, "grad_norm": 0.0940667912364006, "learning_rate": 1.3299456807429473e-06, "loss": 8.5054, "step": 191390 }, { "epoch": 0.9558291093410572, "grad_norm": 0.08681704849004745, "learning_rate": 1.3284437658013968e-06, "loss": 8.4992, "step": 191400 }, { "epoch": 0.9558790481659967, "grad_norm": 0.09067001193761826, "learning_rate": 1.3269418508598464e-06, "loss": 8.5076, "step": 191410 }, { "epoch": 0.9559289869909361, "grad_norm": 0.08941010385751724, "learning_rate": 1.325439935918296e-06, "loss": 8.4981, "step": 191420 }, { "epoch": 0.9559789258158755, "grad_norm": 0.08782663941383362, "learning_rate": 1.3239380209767455e-06, "loss": 8.4801, "step": 191430 }, { "epoch": 0.956028864640815, "grad_norm": 0.0957464948296547, "learning_rate": 1.3224361060351948e-06, "loss": 8.4795, "step": 191440 }, { "epoch": 0.9560788034657545, "grad_norm": 0.0979733094573021, "learning_rate": 1.3209341910936445e-06, "loss": 8.4909, "step": 191450 }, { "epoch": 0.9561287422906939, "grad_norm": 0.09294435381889343, "learning_rate": 1.3194322761520939e-06, "loss": 8.4827, "step": 191460 }, { "epoch": 0.9561786811156333, "grad_norm": 0.09183111786842346, "learning_rate": 1.3179303612105436e-06, "loss": 8.4947, "step": 191470 }, { "epoch": 0.9562286199405728, "grad_norm": 0.0917709469795227, "learning_rate": 1.316428446268993e-06, "loss": 8.5117, "step": 191480 }, { "epoch": 0.9562785587655123, "grad_norm": 0.0903320387005806, "learning_rate": 1.3149265313274425e-06, "loss": 8.4863, "step": 191490 }, { "epoch": 0.9563284975904517, "grad_norm": 0.09910201281309128, "learning_rate": 1.313424616385892e-06, "loss": 8.519, "step": 191500 }, { "epoch": 0.9563784364153911, "grad_norm": 0.08993766456842422, "learning_rate": 1.3119227014443416e-06, "loss": 8.5283, "step": 191510 }, { "epoch": 0.9564283752403306, "grad_norm": 0.09398555755615234, "learning_rate": 1.3104207865027911e-06, "loss": 8.4969, "step": 191520 }, { "epoch": 0.9564783140652701, "grad_norm": 0.08856987208127975, "learning_rate": 1.3089188715612407e-06, "loss": 8.5304, "step": 191530 }, { "epoch": 0.9565282528902095, "grad_norm": 0.09370505064725876, "learning_rate": 1.30741695661969e-06, "loss": 8.5073, "step": 191540 }, { "epoch": 0.9565781917151489, "grad_norm": 0.08654750138521194, "learning_rate": 1.3059150416781398e-06, "loss": 8.502, "step": 191550 }, { "epoch": 0.9566281305400884, "grad_norm": 0.08909036964178085, "learning_rate": 1.304413126736589e-06, "loss": 8.5031, "step": 191560 }, { "epoch": 0.9566780693650279, "grad_norm": 0.09467563778162003, "learning_rate": 1.3029112117950388e-06, "loss": 8.4999, "step": 191570 }, { "epoch": 0.9567280081899673, "grad_norm": 0.08794749528169632, "learning_rate": 1.3014092968534882e-06, "loss": 8.5046, "step": 191580 }, { "epoch": 0.9567779470149067, "grad_norm": 0.08709059655666351, "learning_rate": 1.2999073819119377e-06, "loss": 8.4894, "step": 191590 }, { "epoch": 0.9568278858398462, "grad_norm": 0.09558872878551483, "learning_rate": 1.2984054669703873e-06, "loss": 8.5192, "step": 191600 }, { "epoch": 0.9568778246647857, "grad_norm": 0.09214767813682556, "learning_rate": 1.2969035520288368e-06, "loss": 8.5062, "step": 191610 }, { "epoch": 0.9569277634897251, "grad_norm": 0.08921287953853607, "learning_rate": 1.2954016370872863e-06, "loss": 8.5002, "step": 191620 }, { "epoch": 0.9569777023146645, "grad_norm": 0.09433959424495697, "learning_rate": 1.2938997221457359e-06, "loss": 8.497, "step": 191630 }, { "epoch": 0.957027641139604, "grad_norm": 0.08520886301994324, "learning_rate": 1.2923978072041852e-06, "loss": 8.4983, "step": 191640 }, { "epoch": 0.9570775799645435, "grad_norm": 0.0877714678645134, "learning_rate": 1.290895892262635e-06, "loss": 8.513, "step": 191650 }, { "epoch": 0.9571275187894829, "grad_norm": 0.09116245806217194, "learning_rate": 1.2893939773210843e-06, "loss": 8.4974, "step": 191660 }, { "epoch": 0.9571774576144223, "grad_norm": 0.08422242105007172, "learning_rate": 1.287892062379534e-06, "loss": 8.5094, "step": 191670 }, { "epoch": 0.9572273964393618, "grad_norm": 0.08759858459234238, "learning_rate": 1.2863901474379834e-06, "loss": 8.4916, "step": 191680 }, { "epoch": 0.9572773352643013, "grad_norm": 0.08717787265777588, "learning_rate": 1.284888232496433e-06, "loss": 8.5063, "step": 191690 }, { "epoch": 0.9573272740892407, "grad_norm": 0.09190387278795242, "learning_rate": 1.2833863175548825e-06, "loss": 8.4875, "step": 191700 }, { "epoch": 0.9573772129141801, "grad_norm": 0.09158718585968018, "learning_rate": 1.281884402613332e-06, "loss": 8.5172, "step": 191710 }, { "epoch": 0.9574271517391196, "grad_norm": 0.09621883928775787, "learning_rate": 1.2803824876717816e-06, "loss": 8.5133, "step": 191720 }, { "epoch": 0.957477090564059, "grad_norm": 0.08670492470264435, "learning_rate": 1.2788805727302311e-06, "loss": 8.5062, "step": 191730 }, { "epoch": 0.9575270293889985, "grad_norm": 0.08832064270973206, "learning_rate": 1.2773786577886807e-06, "loss": 8.4969, "step": 191740 }, { "epoch": 0.9575769682139379, "grad_norm": 0.08412336558103561, "learning_rate": 1.2758767428471302e-06, "loss": 8.5298, "step": 191750 }, { "epoch": 0.9576269070388774, "grad_norm": 0.09177365899085999, "learning_rate": 1.2743748279055795e-06, "loss": 8.494, "step": 191760 }, { "epoch": 0.9576768458638169, "grad_norm": 0.08740728348493576, "learning_rate": 1.2728729129640293e-06, "loss": 8.4938, "step": 191770 }, { "epoch": 0.9577267846887563, "grad_norm": 0.08786563575267792, "learning_rate": 1.2713709980224786e-06, "loss": 8.5155, "step": 191780 }, { "epoch": 0.9577767235136957, "grad_norm": 0.09397321939468384, "learning_rate": 1.2698690830809284e-06, "loss": 8.5062, "step": 191790 }, { "epoch": 0.9578266623386352, "grad_norm": 0.08770589530467987, "learning_rate": 1.2683671681393777e-06, "loss": 8.485, "step": 191800 }, { "epoch": 0.9578766011635746, "grad_norm": 0.08947274088859558, "learning_rate": 1.2668652531978272e-06, "loss": 8.4993, "step": 191810 }, { "epoch": 0.9579265399885141, "grad_norm": 0.0907110795378685, "learning_rate": 1.2653633382562768e-06, "loss": 8.5106, "step": 191820 }, { "epoch": 0.9579764788134535, "grad_norm": 0.08664959669113159, "learning_rate": 1.2638614233147263e-06, "loss": 8.4997, "step": 191830 }, { "epoch": 0.9580264176383929, "grad_norm": 0.09159454703330994, "learning_rate": 1.2623595083731759e-06, "loss": 8.5125, "step": 191840 }, { "epoch": 0.9580763564633324, "grad_norm": 0.09214504808187485, "learning_rate": 1.2608575934316254e-06, "loss": 8.4985, "step": 191850 }, { "epoch": 0.9581262952882719, "grad_norm": 0.09041109681129456, "learning_rate": 1.2593556784900747e-06, "loss": 8.4857, "step": 191860 }, { "epoch": 0.9581762341132113, "grad_norm": 0.09253381192684174, "learning_rate": 1.2578537635485245e-06, "loss": 8.4968, "step": 191870 }, { "epoch": 0.9582261729381507, "grad_norm": 0.08627009391784668, "learning_rate": 1.2563518486069738e-06, "loss": 8.4892, "step": 191880 }, { "epoch": 0.9582761117630902, "grad_norm": 0.08717307448387146, "learning_rate": 1.2548499336654236e-06, "loss": 8.4982, "step": 191890 }, { "epoch": 0.9583260505880297, "grad_norm": 0.08845217525959015, "learning_rate": 1.253348018723873e-06, "loss": 8.4926, "step": 191900 }, { "epoch": 0.9583759894129691, "grad_norm": 0.09016247093677521, "learning_rate": 1.2518461037823225e-06, "loss": 8.4968, "step": 191910 }, { "epoch": 0.9584259282379085, "grad_norm": 0.08623816072940826, "learning_rate": 1.250344188840772e-06, "loss": 8.4969, "step": 191920 }, { "epoch": 0.958475867062848, "grad_norm": 0.0954393595457077, "learning_rate": 1.2488422738992216e-06, "loss": 8.517, "step": 191930 }, { "epoch": 0.9585258058877875, "grad_norm": 0.09252654761075974, "learning_rate": 1.247340358957671e-06, "loss": 8.4971, "step": 191940 }, { "epoch": 0.9585757447127269, "grad_norm": 0.08835329860448837, "learning_rate": 1.2458384440161206e-06, "loss": 8.5138, "step": 191950 }, { "epoch": 0.9586256835376663, "grad_norm": 0.08202789723873138, "learning_rate": 1.24433652907457e-06, "loss": 8.4846, "step": 191960 }, { "epoch": 0.9586756223626058, "grad_norm": 0.09334851056337357, "learning_rate": 1.2428346141330197e-06, "loss": 8.5079, "step": 191970 }, { "epoch": 0.9587255611875453, "grad_norm": 0.08949562907218933, "learning_rate": 1.241332699191469e-06, "loss": 8.4951, "step": 191980 }, { "epoch": 0.9587755000124847, "grad_norm": 0.09301643818616867, "learning_rate": 1.2398307842499188e-06, "loss": 8.5035, "step": 191990 }, { "epoch": 0.9588254388374241, "grad_norm": 0.08926796913146973, "learning_rate": 1.2383288693083681e-06, "loss": 8.5123, "step": 192000 }, { "epoch": 0.9588753776623636, "grad_norm": 0.09037186205387115, "learning_rate": 1.236826954366818e-06, "loss": 8.5027, "step": 192010 }, { "epoch": 0.9589253164873031, "grad_norm": 0.09102381020784378, "learning_rate": 1.2353250394252672e-06, "loss": 8.5107, "step": 192020 }, { "epoch": 0.9589752553122425, "grad_norm": 0.08897027373313904, "learning_rate": 1.2338231244837168e-06, "loss": 8.5083, "step": 192030 }, { "epoch": 0.9590251941371819, "grad_norm": 0.08504099398851395, "learning_rate": 1.2323212095421663e-06, "loss": 8.5118, "step": 192040 }, { "epoch": 0.9590751329621214, "grad_norm": 0.09035465121269226, "learning_rate": 1.2308192946006159e-06, "loss": 8.502, "step": 192050 }, { "epoch": 0.9591250717870609, "grad_norm": 0.0969761461019516, "learning_rate": 1.2293173796590654e-06, "loss": 8.5028, "step": 192060 }, { "epoch": 0.9591750106120003, "grad_norm": 0.0866863876581192, "learning_rate": 1.227815464717515e-06, "loss": 8.5155, "step": 192070 }, { "epoch": 0.9592249494369397, "grad_norm": 0.08988199383020401, "learning_rate": 1.2263135497759643e-06, "loss": 8.4989, "step": 192080 }, { "epoch": 0.9592748882618792, "grad_norm": 0.09326018393039703, "learning_rate": 1.224811634834414e-06, "loss": 8.5031, "step": 192090 }, { "epoch": 0.9593248270868187, "grad_norm": 0.0929257795214653, "learning_rate": 1.2233097198928634e-06, "loss": 8.4906, "step": 192100 }, { "epoch": 0.9593747659117581, "grad_norm": 0.09300316870212555, "learning_rate": 1.2218078049513131e-06, "loss": 8.5025, "step": 192110 }, { "epoch": 0.9594247047366975, "grad_norm": 0.09423361718654633, "learning_rate": 1.2203058900097624e-06, "loss": 8.5001, "step": 192120 }, { "epoch": 0.959474643561637, "grad_norm": 0.09319435060024261, "learning_rate": 1.218803975068212e-06, "loss": 8.4935, "step": 192130 }, { "epoch": 0.9595245823865765, "grad_norm": 0.09030423313379288, "learning_rate": 1.2173020601266615e-06, "loss": 8.4956, "step": 192140 }, { "epoch": 0.9595745212115159, "grad_norm": 0.08913615345954895, "learning_rate": 1.215800145185111e-06, "loss": 8.5366, "step": 192150 }, { "epoch": 0.9596244600364553, "grad_norm": 0.09200779348611832, "learning_rate": 1.2142982302435606e-06, "loss": 8.5005, "step": 192160 }, { "epoch": 0.9596743988613948, "grad_norm": 0.08741243183612823, "learning_rate": 1.2127963153020102e-06, "loss": 8.4892, "step": 192170 }, { "epoch": 0.9597243376863343, "grad_norm": 0.09448469430208206, "learning_rate": 1.2112944003604595e-06, "loss": 8.4926, "step": 192180 }, { "epoch": 0.9597742765112737, "grad_norm": 0.0888211578130722, "learning_rate": 1.2097924854189092e-06, "loss": 8.4874, "step": 192190 }, { "epoch": 0.9598242153362131, "grad_norm": 0.0948527380824089, "learning_rate": 1.2082905704773586e-06, "loss": 8.5015, "step": 192200 }, { "epoch": 0.9598741541611526, "grad_norm": 0.09242790937423706, "learning_rate": 1.2067886555358083e-06, "loss": 8.4939, "step": 192210 }, { "epoch": 0.959924092986092, "grad_norm": 0.09047332406044006, "learning_rate": 1.2052867405942577e-06, "loss": 8.4966, "step": 192220 }, { "epoch": 0.9599740318110315, "grad_norm": 0.09905733168125153, "learning_rate": 1.2037848256527072e-06, "loss": 8.5018, "step": 192230 }, { "epoch": 0.9600239706359709, "grad_norm": 0.09126092493534088, "learning_rate": 1.2022829107111568e-06, "loss": 8.5119, "step": 192240 }, { "epoch": 0.9600739094609104, "grad_norm": 0.09281742572784424, "learning_rate": 1.2007809957696063e-06, "loss": 8.5087, "step": 192250 }, { "epoch": 0.9601238482858498, "grad_norm": 0.09464964270591736, "learning_rate": 1.1992790808280558e-06, "loss": 8.505, "step": 192260 }, { "epoch": 0.9601737871107893, "grad_norm": 0.09487392008304596, "learning_rate": 1.1977771658865054e-06, "loss": 8.4981, "step": 192270 }, { "epoch": 0.9602237259357287, "grad_norm": 0.09476413577795029, "learning_rate": 1.196275250944955e-06, "loss": 8.4889, "step": 192280 }, { "epoch": 0.9602736647606682, "grad_norm": 0.09175974875688553, "learning_rate": 1.1947733360034045e-06, "loss": 8.4984, "step": 192290 }, { "epoch": 0.9603236035856076, "grad_norm": 0.09201910346746445, "learning_rate": 1.1932714210618538e-06, "loss": 8.4981, "step": 192300 }, { "epoch": 0.9603735424105471, "grad_norm": 0.0915134996175766, "learning_rate": 1.1917695061203036e-06, "loss": 8.5039, "step": 192310 }, { "epoch": 0.9604234812354865, "grad_norm": 0.08767704665660858, "learning_rate": 1.1902675911787529e-06, "loss": 8.5014, "step": 192320 }, { "epoch": 0.960473420060426, "grad_norm": 0.0916207805275917, "learning_rate": 1.1887656762372026e-06, "loss": 8.5048, "step": 192330 }, { "epoch": 0.9605233588853654, "grad_norm": 0.09325848519802094, "learning_rate": 1.187263761295652e-06, "loss": 8.5005, "step": 192340 }, { "epoch": 0.9605732977103049, "grad_norm": 0.08897262811660767, "learning_rate": 1.1857618463541015e-06, "loss": 8.5037, "step": 192350 }, { "epoch": 0.9606232365352443, "grad_norm": 0.08885764330625534, "learning_rate": 1.184259931412551e-06, "loss": 8.5089, "step": 192360 }, { "epoch": 0.9606731753601838, "grad_norm": 0.085177481174469, "learning_rate": 1.1827580164710006e-06, "loss": 8.5113, "step": 192370 }, { "epoch": 0.9607231141851232, "grad_norm": 0.08419841527938843, "learning_rate": 1.1812561015294501e-06, "loss": 8.48, "step": 192380 }, { "epoch": 0.9607730530100627, "grad_norm": 0.08771277964115143, "learning_rate": 1.1797541865878997e-06, "loss": 8.5046, "step": 192390 }, { "epoch": 0.9608229918350021, "grad_norm": 0.09421413391828537, "learning_rate": 1.178252271646349e-06, "loss": 8.479, "step": 192400 }, { "epoch": 0.9608729306599416, "grad_norm": 0.09432334452867508, "learning_rate": 1.1767503567047988e-06, "loss": 8.4848, "step": 192410 }, { "epoch": 0.960922869484881, "grad_norm": 0.09068858623504639, "learning_rate": 1.175248441763248e-06, "loss": 8.5012, "step": 192420 }, { "epoch": 0.9609728083098205, "grad_norm": 0.09556064754724503, "learning_rate": 1.1737465268216979e-06, "loss": 8.5111, "step": 192430 }, { "epoch": 0.9610227471347599, "grad_norm": 0.08801019191741943, "learning_rate": 1.1722446118801472e-06, "loss": 8.4905, "step": 192440 }, { "epoch": 0.9610726859596994, "grad_norm": 0.09224274009466171, "learning_rate": 1.1707426969385967e-06, "loss": 8.4729, "step": 192450 }, { "epoch": 0.9611226247846388, "grad_norm": 0.08931384235620499, "learning_rate": 1.1692407819970463e-06, "loss": 8.5073, "step": 192460 }, { "epoch": 0.9611725636095783, "grad_norm": 0.09076393395662308, "learning_rate": 1.1677388670554958e-06, "loss": 8.4992, "step": 192470 }, { "epoch": 0.9612225024345177, "grad_norm": 0.08941461890935898, "learning_rate": 1.1662369521139454e-06, "loss": 8.5019, "step": 192480 }, { "epoch": 0.9612724412594572, "grad_norm": 0.08615435659885406, "learning_rate": 1.164735037172395e-06, "loss": 8.4891, "step": 192490 }, { "epoch": 0.9613223800843966, "grad_norm": 0.08833363652229309, "learning_rate": 1.1632331222308442e-06, "loss": 8.5133, "step": 192500 }, { "epoch": 0.961372318909336, "grad_norm": 0.09384980797767639, "learning_rate": 1.161731207289294e-06, "loss": 8.5135, "step": 192510 }, { "epoch": 0.9614222577342755, "grad_norm": 0.09676310420036316, "learning_rate": 1.1602292923477433e-06, "loss": 8.4996, "step": 192520 }, { "epoch": 0.961472196559215, "grad_norm": 0.09096337109804153, "learning_rate": 1.158727377406193e-06, "loss": 8.4903, "step": 192530 }, { "epoch": 0.9615221353841544, "grad_norm": 0.09308335185050964, "learning_rate": 1.1572254624646424e-06, "loss": 8.5021, "step": 192540 }, { "epoch": 0.9615720742090939, "grad_norm": 0.0906360074877739, "learning_rate": 1.1557235475230922e-06, "loss": 8.5127, "step": 192550 }, { "epoch": 0.9616220130340333, "grad_norm": 0.09376540780067444, "learning_rate": 1.1542216325815415e-06, "loss": 8.5081, "step": 192560 }, { "epoch": 0.9616719518589728, "grad_norm": 0.09033221006393433, "learning_rate": 1.152719717639991e-06, "loss": 8.5068, "step": 192570 }, { "epoch": 0.9617218906839122, "grad_norm": 0.0966610535979271, "learning_rate": 1.1512178026984406e-06, "loss": 8.4919, "step": 192580 }, { "epoch": 0.9617718295088517, "grad_norm": 0.09105295687913895, "learning_rate": 1.1497158877568901e-06, "loss": 8.5013, "step": 192590 }, { "epoch": 0.9618217683337911, "grad_norm": 0.08788570016622543, "learning_rate": 1.1482139728153397e-06, "loss": 8.4736, "step": 192600 }, { "epoch": 0.9618717071587306, "grad_norm": 0.09203591197729111, "learning_rate": 1.1467120578737892e-06, "loss": 8.493, "step": 192610 }, { "epoch": 0.96192164598367, "grad_norm": 0.09675536304712296, "learning_rate": 1.1452101429322385e-06, "loss": 8.5179, "step": 192620 }, { "epoch": 0.9619715848086094, "grad_norm": 0.09318403899669647, "learning_rate": 1.1437082279906883e-06, "loss": 8.5007, "step": 192630 }, { "epoch": 0.9620215236335489, "grad_norm": 0.09018708020448685, "learning_rate": 1.1422063130491376e-06, "loss": 8.4895, "step": 192640 }, { "epoch": 0.9620714624584884, "grad_norm": 0.09459145367145538, "learning_rate": 1.1407043981075874e-06, "loss": 8.5098, "step": 192650 }, { "epoch": 0.9621214012834278, "grad_norm": 0.09730951488018036, "learning_rate": 1.1392024831660367e-06, "loss": 8.5015, "step": 192660 }, { "epoch": 0.9621713401083672, "grad_norm": 0.09757350385189056, "learning_rate": 1.1377005682244863e-06, "loss": 8.5212, "step": 192670 }, { "epoch": 0.9622212789333067, "grad_norm": 0.08909895271062851, "learning_rate": 1.1361986532829358e-06, "loss": 8.508, "step": 192680 }, { "epoch": 0.9622712177582462, "grad_norm": 0.09438234567642212, "learning_rate": 1.1346967383413853e-06, "loss": 8.4956, "step": 192690 }, { "epoch": 0.9623211565831856, "grad_norm": 0.08857671916484833, "learning_rate": 1.1331948233998349e-06, "loss": 8.5006, "step": 192700 }, { "epoch": 0.962371095408125, "grad_norm": 0.0914953425526619, "learning_rate": 1.1316929084582844e-06, "loss": 8.5024, "step": 192710 }, { "epoch": 0.9624210342330645, "grad_norm": 0.08889735490083694, "learning_rate": 1.1301909935167338e-06, "loss": 8.5009, "step": 192720 }, { "epoch": 0.962470973058004, "grad_norm": 0.09225352108478546, "learning_rate": 1.1286890785751835e-06, "loss": 8.5067, "step": 192730 }, { "epoch": 0.9625209118829434, "grad_norm": 0.09539781510829926, "learning_rate": 1.1271871636336328e-06, "loss": 8.5049, "step": 192740 }, { "epoch": 0.9625708507078828, "grad_norm": 0.0937805026769638, "learning_rate": 1.1256852486920826e-06, "loss": 8.5277, "step": 192750 }, { "epoch": 0.9626207895328223, "grad_norm": 0.08874644339084625, "learning_rate": 1.124183333750532e-06, "loss": 8.5063, "step": 192760 }, { "epoch": 0.9626707283577618, "grad_norm": 0.0926111564040184, "learning_rate": 1.1226814188089815e-06, "loss": 8.495, "step": 192770 }, { "epoch": 0.9627206671827012, "grad_norm": 0.09661145508289337, "learning_rate": 1.121179503867431e-06, "loss": 8.5039, "step": 192780 }, { "epoch": 0.9627706060076406, "grad_norm": 0.09471116960048676, "learning_rate": 1.1196775889258806e-06, "loss": 8.4877, "step": 192790 }, { "epoch": 0.9628205448325801, "grad_norm": 0.09448757022619247, "learning_rate": 1.11817567398433e-06, "loss": 8.4881, "step": 192800 }, { "epoch": 0.9628704836575196, "grad_norm": 0.09081356972455978, "learning_rate": 1.1166737590427796e-06, "loss": 8.5098, "step": 192810 }, { "epoch": 0.962920422482459, "grad_norm": 0.08594221621751785, "learning_rate": 1.1151718441012292e-06, "loss": 8.51, "step": 192820 }, { "epoch": 0.9629703613073984, "grad_norm": 0.08855900168418884, "learning_rate": 1.1136699291596787e-06, "loss": 8.5147, "step": 192830 }, { "epoch": 0.9630203001323379, "grad_norm": 0.09176114946603775, "learning_rate": 1.112168014218128e-06, "loss": 8.5023, "step": 192840 }, { "epoch": 0.9630702389572773, "grad_norm": 0.09412090480327606, "learning_rate": 1.1106660992765778e-06, "loss": 8.5108, "step": 192850 }, { "epoch": 0.9631201777822168, "grad_norm": 0.0874660536646843, "learning_rate": 1.1091641843350272e-06, "loss": 8.5087, "step": 192860 }, { "epoch": 0.9631701166071562, "grad_norm": 0.08917557448148727, "learning_rate": 1.107662269393477e-06, "loss": 8.502, "step": 192870 }, { "epoch": 0.9632200554320957, "grad_norm": 0.09307212382555008, "learning_rate": 1.1061603544519262e-06, "loss": 8.5, "step": 192880 }, { "epoch": 0.9632699942570351, "grad_norm": 0.0892016664147377, "learning_rate": 1.1046584395103758e-06, "loss": 8.4979, "step": 192890 }, { "epoch": 0.9633199330819746, "grad_norm": 0.08102846145629883, "learning_rate": 1.1031565245688253e-06, "loss": 8.511, "step": 192900 }, { "epoch": 0.963369871906914, "grad_norm": 0.09051341563463211, "learning_rate": 1.1016546096272749e-06, "loss": 8.5003, "step": 192910 }, { "epoch": 0.9634198107318535, "grad_norm": 0.09337877482175827, "learning_rate": 1.1001526946857244e-06, "loss": 8.5054, "step": 192920 }, { "epoch": 0.9634697495567929, "grad_norm": 0.0894298255443573, "learning_rate": 1.098650779744174e-06, "loss": 8.5, "step": 192930 }, { "epoch": 0.9635196883817324, "grad_norm": 0.09010350704193115, "learning_rate": 1.0971488648026233e-06, "loss": 8.5062, "step": 192940 }, { "epoch": 0.9635696272066718, "grad_norm": 0.08480501919984818, "learning_rate": 1.095646949861073e-06, "loss": 8.5046, "step": 192950 }, { "epoch": 0.9636195660316113, "grad_norm": 0.09088391810655594, "learning_rate": 1.0941450349195224e-06, "loss": 8.4948, "step": 192960 }, { "epoch": 0.9636695048565507, "grad_norm": 0.09037703275680542, "learning_rate": 1.092643119977972e-06, "loss": 8.4929, "step": 192970 }, { "epoch": 0.9637194436814902, "grad_norm": 0.08855674415826797, "learning_rate": 1.0911412050364215e-06, "loss": 8.508, "step": 192980 }, { "epoch": 0.9637693825064296, "grad_norm": 0.08638061583042145, "learning_rate": 1.0896392900948708e-06, "loss": 8.5144, "step": 192990 }, { "epoch": 0.963819321331369, "grad_norm": 0.09214214235544205, "learning_rate": 1.0881373751533205e-06, "loss": 8.4967, "step": 193000 }, { "epoch": 0.9638692601563085, "grad_norm": 0.09081536531448364, "learning_rate": 1.0866354602117699e-06, "loss": 8.5044, "step": 193010 }, { "epoch": 0.963919198981248, "grad_norm": 0.09031979739665985, "learning_rate": 1.0851335452702196e-06, "loss": 8.5013, "step": 193020 }, { "epoch": 0.9639691378061874, "grad_norm": 0.08983229100704193, "learning_rate": 1.083631630328669e-06, "loss": 8.4903, "step": 193030 }, { "epoch": 0.9640190766311268, "grad_norm": 0.08677353709936142, "learning_rate": 1.0821297153871185e-06, "loss": 8.5, "step": 193040 }, { "epoch": 0.9640690154560663, "grad_norm": 0.09196817129850388, "learning_rate": 1.080627800445568e-06, "loss": 8.508, "step": 193050 }, { "epoch": 0.9641189542810058, "grad_norm": 0.09112430363893509, "learning_rate": 1.0791258855040176e-06, "loss": 8.5005, "step": 193060 }, { "epoch": 0.9641688931059452, "grad_norm": 0.09055691957473755, "learning_rate": 1.0776239705624671e-06, "loss": 8.4864, "step": 193070 }, { "epoch": 0.9642188319308846, "grad_norm": 0.09631075710058212, "learning_rate": 1.0761220556209167e-06, "loss": 8.5011, "step": 193080 }, { "epoch": 0.9642687707558241, "grad_norm": 0.091032475233078, "learning_rate": 1.0746201406793662e-06, "loss": 8.4968, "step": 193090 }, { "epoch": 0.9643187095807636, "grad_norm": 0.09456492960453033, "learning_rate": 1.0731182257378158e-06, "loss": 8.5076, "step": 193100 }, { "epoch": 0.964368648405703, "grad_norm": 0.09016124159097672, "learning_rate": 1.071616310796265e-06, "loss": 8.5205, "step": 193110 }, { "epoch": 0.9644185872306424, "grad_norm": 0.09598606824874878, "learning_rate": 1.0701143958547148e-06, "loss": 8.483, "step": 193120 }, { "epoch": 0.9644685260555819, "grad_norm": 0.08811622112989426, "learning_rate": 1.0686124809131642e-06, "loss": 8.4907, "step": 193130 }, { "epoch": 0.9645184648805214, "grad_norm": 0.0883399173617363, "learning_rate": 1.067110565971614e-06, "loss": 8.5047, "step": 193140 }, { "epoch": 0.9645684037054608, "grad_norm": 0.0905977189540863, "learning_rate": 1.0656086510300633e-06, "loss": 8.4943, "step": 193150 }, { "epoch": 0.9646183425304002, "grad_norm": 0.09106041491031647, "learning_rate": 1.0641067360885128e-06, "loss": 8.5189, "step": 193160 }, { "epoch": 0.9646682813553397, "grad_norm": 0.08970261365175247, "learning_rate": 1.0626048211469624e-06, "loss": 8.5162, "step": 193170 }, { "epoch": 0.9647182201802792, "grad_norm": 0.08954989910125732, "learning_rate": 1.061102906205412e-06, "loss": 8.4932, "step": 193180 }, { "epoch": 0.9647681590052186, "grad_norm": 0.08976080268621445, "learning_rate": 1.0596009912638614e-06, "loss": 8.4927, "step": 193190 }, { "epoch": 0.964818097830158, "grad_norm": 0.09053555130958557, "learning_rate": 1.058099076322311e-06, "loss": 8.4998, "step": 193200 }, { "epoch": 0.9648680366550975, "grad_norm": 0.09599043428897858, "learning_rate": 1.0565971613807603e-06, "loss": 8.5194, "step": 193210 }, { "epoch": 0.964917975480037, "grad_norm": 0.09338483214378357, "learning_rate": 1.05509524643921e-06, "loss": 8.4981, "step": 193220 }, { "epoch": 0.9649679143049764, "grad_norm": 0.09092381596565247, "learning_rate": 1.0535933314976594e-06, "loss": 8.5404, "step": 193230 }, { "epoch": 0.9650178531299158, "grad_norm": 0.0899573415517807, "learning_rate": 1.0520914165561092e-06, "loss": 8.5163, "step": 193240 }, { "epoch": 0.9650677919548553, "grad_norm": 0.09256323426961899, "learning_rate": 1.0505895016145585e-06, "loss": 8.4776, "step": 193250 }, { "epoch": 0.9651177307797948, "grad_norm": 0.08706999570131302, "learning_rate": 1.049087586673008e-06, "loss": 8.487, "step": 193260 }, { "epoch": 0.9651676696047342, "grad_norm": 0.09370410442352295, "learning_rate": 1.0475856717314576e-06, "loss": 8.5015, "step": 193270 }, { "epoch": 0.9652176084296736, "grad_norm": 0.09068583697080612, "learning_rate": 1.0460837567899071e-06, "loss": 8.4835, "step": 193280 }, { "epoch": 0.9652675472546131, "grad_norm": 0.09026405960321426, "learning_rate": 1.0445818418483567e-06, "loss": 8.4975, "step": 193290 }, { "epoch": 0.9653174860795526, "grad_norm": 0.0878196433186531, "learning_rate": 1.0430799269068062e-06, "loss": 8.4881, "step": 193300 }, { "epoch": 0.965367424904492, "grad_norm": 0.08937402069568634, "learning_rate": 1.0415780119652555e-06, "loss": 8.497, "step": 193310 }, { "epoch": 0.9654173637294314, "grad_norm": 0.09177373349666595, "learning_rate": 1.0400760970237053e-06, "loss": 8.4949, "step": 193320 }, { "epoch": 0.9654673025543709, "grad_norm": 0.09359072893857956, "learning_rate": 1.0385741820821546e-06, "loss": 8.5039, "step": 193330 }, { "epoch": 0.9655172413793104, "grad_norm": 0.09130553156137466, "learning_rate": 1.0370722671406044e-06, "loss": 8.489, "step": 193340 }, { "epoch": 0.9655671802042498, "grad_norm": 0.09181410074234009, "learning_rate": 1.0355703521990537e-06, "loss": 8.5007, "step": 193350 }, { "epoch": 0.9656171190291892, "grad_norm": 0.09474316984415054, "learning_rate": 1.0340684372575035e-06, "loss": 8.5026, "step": 193360 }, { "epoch": 0.9656670578541287, "grad_norm": 0.09248656779527664, "learning_rate": 1.0325665223159528e-06, "loss": 8.5032, "step": 193370 }, { "epoch": 0.9657169966790682, "grad_norm": 0.09201966971158981, "learning_rate": 1.0310646073744023e-06, "loss": 8.5016, "step": 193380 }, { "epoch": 0.9657669355040076, "grad_norm": 0.09128998965024948, "learning_rate": 1.0295626924328519e-06, "loss": 8.4941, "step": 193390 }, { "epoch": 0.965816874328947, "grad_norm": 0.08449841290712357, "learning_rate": 1.0280607774913014e-06, "loss": 8.5381, "step": 193400 }, { "epoch": 0.9658668131538864, "grad_norm": 0.09496010094881058, "learning_rate": 1.026558862549751e-06, "loss": 8.5214, "step": 193410 }, { "epoch": 0.965916751978826, "grad_norm": 0.08874188363552094, "learning_rate": 1.0250569476082005e-06, "loss": 8.512, "step": 193420 }, { "epoch": 0.9659666908037654, "grad_norm": 0.08817078918218613, "learning_rate": 1.0235550326666498e-06, "loss": 8.4784, "step": 193430 }, { "epoch": 0.9660166296287048, "grad_norm": 0.09461791068315506, "learning_rate": 1.0220531177250996e-06, "loss": 8.4886, "step": 193440 }, { "epoch": 0.9660665684536442, "grad_norm": 0.09889914095401764, "learning_rate": 1.020551202783549e-06, "loss": 8.506, "step": 193450 }, { "epoch": 0.9661165072785838, "grad_norm": 0.09483031183481216, "learning_rate": 1.0190492878419987e-06, "loss": 8.5026, "step": 193460 }, { "epoch": 0.9661664461035232, "grad_norm": 0.09063508361577988, "learning_rate": 1.017547372900448e-06, "loss": 8.4843, "step": 193470 }, { "epoch": 0.9662163849284626, "grad_norm": 0.09315601736307144, "learning_rate": 1.0160454579588976e-06, "loss": 8.4879, "step": 193480 }, { "epoch": 0.966266323753402, "grad_norm": 0.09240519255399704, "learning_rate": 1.014543543017347e-06, "loss": 8.4912, "step": 193490 }, { "epoch": 0.9663162625783416, "grad_norm": 0.09266150742769241, "learning_rate": 1.0130416280757966e-06, "loss": 8.5122, "step": 193500 }, { "epoch": 0.966366201403281, "grad_norm": 0.08804892003536224, "learning_rate": 1.0115397131342462e-06, "loss": 8.498, "step": 193510 }, { "epoch": 0.9664161402282204, "grad_norm": 0.0914304181933403, "learning_rate": 1.0100377981926957e-06, "loss": 8.5013, "step": 193520 }, { "epoch": 0.9664660790531598, "grad_norm": 0.08695808053016663, "learning_rate": 1.008535883251145e-06, "loss": 8.5127, "step": 193530 }, { "epoch": 0.9665160178780994, "grad_norm": 0.09132501482963562, "learning_rate": 1.0070339683095948e-06, "loss": 8.4965, "step": 193540 }, { "epoch": 0.9665659567030388, "grad_norm": 0.08505229651927948, "learning_rate": 1.0055320533680441e-06, "loss": 8.5024, "step": 193550 }, { "epoch": 0.9666158955279782, "grad_norm": 0.09099217504262924, "learning_rate": 1.004030138426494e-06, "loss": 8.5031, "step": 193560 }, { "epoch": 0.9666658343529176, "grad_norm": 0.09543818980455399, "learning_rate": 1.0025282234849432e-06, "loss": 8.497, "step": 193570 }, { "epoch": 0.9667157731778572, "grad_norm": 0.09109151363372803, "learning_rate": 1.0010263085433928e-06, "loss": 8.4983, "step": 193580 }, { "epoch": 0.9667657120027966, "grad_norm": 0.09007328748703003, "learning_rate": 9.995243936018423e-07, "loss": 8.5035, "step": 193590 }, { "epoch": 0.966815650827736, "grad_norm": 0.09476472437381744, "learning_rate": 9.980224786602919e-07, "loss": 8.4901, "step": 193600 }, { "epoch": 0.9668655896526754, "grad_norm": 0.09251927584409714, "learning_rate": 9.965205637187414e-07, "loss": 8.5123, "step": 193610 }, { "epoch": 0.966915528477615, "grad_norm": 0.08989694714546204, "learning_rate": 9.95018648777191e-07, "loss": 8.5028, "step": 193620 }, { "epoch": 0.9669654673025544, "grad_norm": 0.09202085435390472, "learning_rate": 9.935167338356405e-07, "loss": 8.5208, "step": 193630 }, { "epoch": 0.9670154061274938, "grad_norm": 0.09178955852985382, "learning_rate": 9.9201481889409e-07, "loss": 8.5038, "step": 193640 }, { "epoch": 0.9670653449524332, "grad_norm": 0.08983652293682098, "learning_rate": 9.905129039525394e-07, "loss": 8.4953, "step": 193650 }, { "epoch": 0.9671152837773728, "grad_norm": 0.08969952166080475, "learning_rate": 9.890109890109891e-07, "loss": 8.5043, "step": 193660 }, { "epoch": 0.9671652226023122, "grad_norm": 0.09150168299674988, "learning_rate": 9.875090740694384e-07, "loss": 8.5058, "step": 193670 }, { "epoch": 0.9672151614272516, "grad_norm": 0.09063497930765152, "learning_rate": 9.860071591278882e-07, "loss": 8.5009, "step": 193680 }, { "epoch": 0.967265100252191, "grad_norm": 0.08984601497650146, "learning_rate": 9.845052441863375e-07, "loss": 8.5082, "step": 193690 }, { "epoch": 0.9673150390771306, "grad_norm": 0.09148889034986496, "learning_rate": 9.83003329244787e-07, "loss": 8.5017, "step": 193700 }, { "epoch": 0.96736497790207, "grad_norm": 0.09478826075792313, "learning_rate": 9.815014143032366e-07, "loss": 8.4963, "step": 193710 }, { "epoch": 0.9674149167270094, "grad_norm": 0.0906151756644249, "learning_rate": 9.799994993616862e-07, "loss": 8.4947, "step": 193720 }, { "epoch": 0.9674648555519488, "grad_norm": 0.0921965166926384, "learning_rate": 9.784975844201357e-07, "loss": 8.5126, "step": 193730 }, { "epoch": 0.9675147943768884, "grad_norm": 0.09627021849155426, "learning_rate": 9.769956694785852e-07, "loss": 8.4987, "step": 193740 }, { "epoch": 0.9675647332018278, "grad_norm": 0.0912567749619484, "learning_rate": 9.754937545370346e-07, "loss": 8.5012, "step": 193750 }, { "epoch": 0.9676146720267672, "grad_norm": 0.09098992496728897, "learning_rate": 9.739918395954843e-07, "loss": 8.4986, "step": 193760 }, { "epoch": 0.9676646108517066, "grad_norm": 0.09001246094703674, "learning_rate": 9.724899246539337e-07, "loss": 8.4776, "step": 193770 }, { "epoch": 0.9677145496766462, "grad_norm": 0.09463299065828323, "learning_rate": 9.709880097123834e-07, "loss": 8.4844, "step": 193780 }, { "epoch": 0.9677644885015856, "grad_norm": 0.09130155295133591, "learning_rate": 9.694860947708328e-07, "loss": 8.4935, "step": 193790 }, { "epoch": 0.967814427326525, "grad_norm": 0.09506887197494507, "learning_rate": 9.679841798292823e-07, "loss": 8.5098, "step": 193800 }, { "epoch": 0.9678643661514644, "grad_norm": 0.0893334150314331, "learning_rate": 9.664822648877318e-07, "loss": 8.4957, "step": 193810 }, { "epoch": 0.9679143049764038, "grad_norm": 0.08439191430807114, "learning_rate": 9.649803499461814e-07, "loss": 8.5174, "step": 193820 }, { "epoch": 0.9679642438013434, "grad_norm": 0.0962853729724884, "learning_rate": 9.63478435004631e-07, "loss": 8.5103, "step": 193830 }, { "epoch": 0.9680141826262828, "grad_norm": 0.09273120760917664, "learning_rate": 9.619765200630805e-07, "loss": 8.4903, "step": 193840 }, { "epoch": 0.9680641214512222, "grad_norm": 0.08646199852228165, "learning_rate": 9.604746051215298e-07, "loss": 8.4982, "step": 193850 }, { "epoch": 0.9681140602761616, "grad_norm": 0.08911348879337311, "learning_rate": 9.589726901799796e-07, "loss": 8.4981, "step": 193860 }, { "epoch": 0.9681639991011012, "grad_norm": 0.09012620151042938, "learning_rate": 9.574707752384289e-07, "loss": 8.4926, "step": 193870 }, { "epoch": 0.9682139379260406, "grad_norm": 0.08890789747238159, "learning_rate": 9.559688602968786e-07, "loss": 8.4956, "step": 193880 }, { "epoch": 0.96826387675098, "grad_norm": 0.0956655889749527, "learning_rate": 9.54466945355328e-07, "loss": 8.5063, "step": 193890 }, { "epoch": 0.9683138155759194, "grad_norm": 0.0944046676158905, "learning_rate": 9.529650304137776e-07, "loss": 8.4913, "step": 193900 }, { "epoch": 0.968363754400859, "grad_norm": 0.09098576009273529, "learning_rate": 9.514631154722271e-07, "loss": 8.4847, "step": 193910 }, { "epoch": 0.9684136932257984, "grad_norm": 0.09032325446605682, "learning_rate": 9.499612005306767e-07, "loss": 8.5017, "step": 193920 }, { "epoch": 0.9684636320507378, "grad_norm": 0.08898947387933731, "learning_rate": 9.484592855891261e-07, "loss": 8.4929, "step": 193930 }, { "epoch": 0.9685135708756772, "grad_norm": 0.087337426841259, "learning_rate": 9.469573706475757e-07, "loss": 8.5065, "step": 193940 }, { "epoch": 0.9685635097006168, "grad_norm": 0.089985191822052, "learning_rate": 9.454554557060251e-07, "loss": 8.4867, "step": 193950 }, { "epoch": 0.9686134485255562, "grad_norm": 0.09129071980714798, "learning_rate": 9.439535407644748e-07, "loss": 8.5096, "step": 193960 }, { "epoch": 0.9686633873504956, "grad_norm": 0.08964653313159943, "learning_rate": 9.424516258229242e-07, "loss": 8.5179, "step": 193970 }, { "epoch": 0.968713326175435, "grad_norm": 0.09389611333608627, "learning_rate": 9.409497108813739e-07, "loss": 8.4891, "step": 193980 }, { "epoch": 0.9687632650003746, "grad_norm": 0.0910143330693245, "learning_rate": 9.394477959398232e-07, "loss": 8.5042, "step": 193990 }, { "epoch": 0.968813203825314, "grad_norm": 0.09279486536979675, "learning_rate": 9.379458809982728e-07, "loss": 8.5063, "step": 194000 }, { "epoch": 0.9688631426502534, "grad_norm": 0.09352979063987732, "learning_rate": 9.364439660567224e-07, "loss": 8.5034, "step": 194010 }, { "epoch": 0.9689130814751928, "grad_norm": 0.09566289931535721, "learning_rate": 9.349420511151719e-07, "loss": 8.4996, "step": 194020 }, { "epoch": 0.9689630203001324, "grad_norm": 0.09447862952947617, "learning_rate": 9.334401361736215e-07, "loss": 8.4978, "step": 194030 }, { "epoch": 0.9690129591250718, "grad_norm": 0.09513899683952332, "learning_rate": 9.319382212320709e-07, "loss": 8.5037, "step": 194040 }, { "epoch": 0.9690628979500112, "grad_norm": 0.09640028327703476, "learning_rate": 9.304363062905204e-07, "loss": 8.5095, "step": 194050 }, { "epoch": 0.9691128367749506, "grad_norm": 0.09304415434598923, "learning_rate": 9.2893439134897e-07, "loss": 8.4915, "step": 194060 }, { "epoch": 0.9691627755998902, "grad_norm": 0.08626173436641693, "learning_rate": 9.274324764074195e-07, "loss": 8.5022, "step": 194070 }, { "epoch": 0.9692127144248296, "grad_norm": 0.08531943708658218, "learning_rate": 9.259305614658691e-07, "loss": 8.5094, "step": 194080 }, { "epoch": 0.969262653249769, "grad_norm": 0.08645452558994293, "learning_rate": 9.244286465243186e-07, "loss": 8.4834, "step": 194090 }, { "epoch": 0.9693125920747084, "grad_norm": 0.08829811960458755, "learning_rate": 9.229267315827681e-07, "loss": 8.4885, "step": 194100 }, { "epoch": 0.969362530899648, "grad_norm": 0.08838041871786118, "learning_rate": 9.214248166412176e-07, "loss": 8.4871, "step": 194110 }, { "epoch": 0.9694124697245874, "grad_norm": 0.10451626777648926, "learning_rate": 9.199229016996671e-07, "loss": 8.5013, "step": 194120 }, { "epoch": 0.9694624085495268, "grad_norm": 0.0896938219666481, "learning_rate": 9.184209867581167e-07, "loss": 8.4912, "step": 194130 }, { "epoch": 0.9695123473744662, "grad_norm": 0.09267926216125488, "learning_rate": 9.169190718165662e-07, "loss": 8.5015, "step": 194140 }, { "epoch": 0.9695622861994058, "grad_norm": 0.09529542177915573, "learning_rate": 9.154171568750157e-07, "loss": 8.5052, "step": 194150 }, { "epoch": 0.9696122250243452, "grad_norm": 0.09126924723386765, "learning_rate": 9.139152419334652e-07, "loss": 8.4985, "step": 194160 }, { "epoch": 0.9696621638492846, "grad_norm": 0.09080453962087631, "learning_rate": 9.124133269919148e-07, "loss": 8.4891, "step": 194170 }, { "epoch": 0.969712102674224, "grad_norm": 0.09079496562480927, "learning_rate": 9.109114120503643e-07, "loss": 8.4894, "step": 194180 }, { "epoch": 0.9697620414991636, "grad_norm": 0.09417524933815002, "learning_rate": 9.094094971088138e-07, "loss": 8.5045, "step": 194190 }, { "epoch": 0.969811980324103, "grad_norm": 0.09117228537797928, "learning_rate": 9.079075821672633e-07, "loss": 8.5076, "step": 194200 }, { "epoch": 0.9698619191490424, "grad_norm": 0.09321728348731995, "learning_rate": 9.064056672257128e-07, "loss": 8.4965, "step": 194210 }, { "epoch": 0.9699118579739818, "grad_norm": 0.08753789961338043, "learning_rate": 9.049037522841624e-07, "loss": 8.4978, "step": 194220 }, { "epoch": 0.9699617967989214, "grad_norm": 0.09341299533843994, "learning_rate": 9.034018373426119e-07, "loss": 8.4969, "step": 194230 }, { "epoch": 0.9700117356238608, "grad_norm": 0.09108027070760727, "learning_rate": 9.018999224010614e-07, "loss": 8.4958, "step": 194240 }, { "epoch": 0.9700616744488002, "grad_norm": 0.09051679074764252, "learning_rate": 9.00398007459511e-07, "loss": 8.5203, "step": 194250 }, { "epoch": 0.9701116132737396, "grad_norm": 0.0874452218413353, "learning_rate": 8.988960925179604e-07, "loss": 8.4992, "step": 194260 }, { "epoch": 0.9701615520986792, "grad_norm": 0.09082052111625671, "learning_rate": 8.9739417757641e-07, "loss": 8.5051, "step": 194270 }, { "epoch": 0.9702114909236186, "grad_norm": 0.08856025338172913, "learning_rate": 8.958922626348595e-07, "loss": 8.5033, "step": 194280 }, { "epoch": 0.970261429748558, "grad_norm": 0.09108460694551468, "learning_rate": 8.943903476933091e-07, "loss": 8.5058, "step": 194290 }, { "epoch": 0.9703113685734974, "grad_norm": 0.09241397678852081, "learning_rate": 8.928884327517586e-07, "loss": 8.5089, "step": 194300 }, { "epoch": 0.970361307398437, "grad_norm": 0.099297434091568, "learning_rate": 8.91386517810208e-07, "loss": 8.4976, "step": 194310 }, { "epoch": 0.9704112462233764, "grad_norm": 0.08700823783874512, "learning_rate": 8.898846028686576e-07, "loss": 8.4843, "step": 194320 }, { "epoch": 0.9704611850483158, "grad_norm": 0.09079594165086746, "learning_rate": 8.883826879271071e-07, "loss": 8.5041, "step": 194330 }, { "epoch": 0.9705111238732552, "grad_norm": 0.08681687712669373, "learning_rate": 8.868807729855567e-07, "loss": 8.4892, "step": 194340 }, { "epoch": 0.9705610626981948, "grad_norm": 0.09553930163383484, "learning_rate": 8.853788580440062e-07, "loss": 8.5019, "step": 194350 }, { "epoch": 0.9706110015231342, "grad_norm": 0.08355355262756348, "learning_rate": 8.838769431024558e-07, "loss": 8.5108, "step": 194360 }, { "epoch": 0.9706609403480736, "grad_norm": 0.0913529172539711, "learning_rate": 8.823750281609052e-07, "loss": 8.5125, "step": 194370 }, { "epoch": 0.970710879173013, "grad_norm": 0.09661257266998291, "learning_rate": 8.808731132193547e-07, "loss": 8.5045, "step": 194380 }, { "epoch": 0.9707608179979526, "grad_norm": 0.09083771705627441, "learning_rate": 8.793711982778043e-07, "loss": 8.4961, "step": 194390 }, { "epoch": 0.970810756822892, "grad_norm": 0.08930204063653946, "learning_rate": 8.778692833362538e-07, "loss": 8.509, "step": 194400 }, { "epoch": 0.9708606956478314, "grad_norm": 0.08711707592010498, "learning_rate": 8.763673683947034e-07, "loss": 8.5015, "step": 194410 }, { "epoch": 0.9709106344727708, "grad_norm": 0.09438509494066238, "learning_rate": 8.748654534531527e-07, "loss": 8.4954, "step": 194420 }, { "epoch": 0.9709605732977103, "grad_norm": 0.09597020596265793, "learning_rate": 8.733635385116022e-07, "loss": 8.4873, "step": 194430 }, { "epoch": 0.9710105121226498, "grad_norm": 0.09720340371131897, "learning_rate": 8.718616235700518e-07, "loss": 8.5018, "step": 194440 }, { "epoch": 0.9710604509475892, "grad_norm": 0.09392903745174408, "learning_rate": 8.703597086285013e-07, "loss": 8.498, "step": 194450 }, { "epoch": 0.9711103897725286, "grad_norm": 0.08631488680839539, "learning_rate": 8.688577936869509e-07, "loss": 8.5162, "step": 194460 }, { "epoch": 0.9711603285974681, "grad_norm": 0.0900026261806488, "learning_rate": 8.673558787454003e-07, "loss": 8.4875, "step": 194470 }, { "epoch": 0.9712102674224076, "grad_norm": 0.09366903454065323, "learning_rate": 8.658539638038498e-07, "loss": 8.4926, "step": 194480 }, { "epoch": 0.971260206247347, "grad_norm": 0.08986707031726837, "learning_rate": 8.643520488622994e-07, "loss": 8.5027, "step": 194490 }, { "epoch": 0.9713101450722864, "grad_norm": 0.08996418118476868, "learning_rate": 8.628501339207489e-07, "loss": 8.4983, "step": 194500 }, { "epoch": 0.971360083897226, "grad_norm": 0.09488117694854736, "learning_rate": 8.613482189791985e-07, "loss": 8.4869, "step": 194510 }, { "epoch": 0.9714100227221654, "grad_norm": 0.0917017012834549, "learning_rate": 8.59846304037648e-07, "loss": 8.5092, "step": 194520 }, { "epoch": 0.9714599615471048, "grad_norm": 0.0919492170214653, "learning_rate": 8.583443890960975e-07, "loss": 8.4992, "step": 194530 }, { "epoch": 0.9715099003720442, "grad_norm": 0.08751703053712845, "learning_rate": 8.56842474154547e-07, "loss": 8.5088, "step": 194540 }, { "epoch": 0.9715598391969837, "grad_norm": 0.09287269413471222, "learning_rate": 8.553405592129965e-07, "loss": 8.496, "step": 194550 }, { "epoch": 0.9716097780219232, "grad_norm": 0.08905121684074402, "learning_rate": 8.538386442714461e-07, "loss": 8.5047, "step": 194560 }, { "epoch": 0.9716597168468626, "grad_norm": 0.0883074626326561, "learning_rate": 8.523367293298956e-07, "loss": 8.4918, "step": 194570 }, { "epoch": 0.971709655671802, "grad_norm": 0.08838310837745667, "learning_rate": 8.508348143883451e-07, "loss": 8.4834, "step": 194580 }, { "epoch": 0.9717595944967415, "grad_norm": 0.08658456802368164, "learning_rate": 8.493328994467946e-07, "loss": 8.4993, "step": 194590 }, { "epoch": 0.971809533321681, "grad_norm": 0.08917684108018875, "learning_rate": 8.478309845052442e-07, "loss": 8.5031, "step": 194600 }, { "epoch": 0.9718594721466204, "grad_norm": 0.09120479226112366, "learning_rate": 8.463290695636937e-07, "loss": 8.495, "step": 194610 }, { "epoch": 0.9719094109715598, "grad_norm": 0.09076827019453049, "learning_rate": 8.448271546221432e-07, "loss": 8.5126, "step": 194620 }, { "epoch": 0.9719593497964993, "grad_norm": 0.09772755205631256, "learning_rate": 8.433252396805928e-07, "loss": 8.4906, "step": 194630 }, { "epoch": 0.9720092886214388, "grad_norm": 0.08785592764616013, "learning_rate": 8.418233247390422e-07, "loss": 8.5035, "step": 194640 }, { "epoch": 0.9720592274463782, "grad_norm": 0.09105689078569412, "learning_rate": 8.403214097974918e-07, "loss": 8.4977, "step": 194650 }, { "epoch": 0.9721091662713176, "grad_norm": 0.09146515280008316, "learning_rate": 8.388194948559413e-07, "loss": 8.5028, "step": 194660 }, { "epoch": 0.9721591050962571, "grad_norm": 0.09533209353685379, "learning_rate": 8.373175799143908e-07, "loss": 8.4955, "step": 194670 }, { "epoch": 0.9722090439211966, "grad_norm": 0.08939139544963837, "learning_rate": 8.358156649728404e-07, "loss": 8.4918, "step": 194680 }, { "epoch": 0.972258982746136, "grad_norm": 0.0908391997218132, "learning_rate": 8.343137500312898e-07, "loss": 8.5214, "step": 194690 }, { "epoch": 0.9723089215710754, "grad_norm": 0.08793427795171738, "learning_rate": 8.328118350897394e-07, "loss": 8.5005, "step": 194700 }, { "epoch": 0.9723588603960149, "grad_norm": 0.09640876948833466, "learning_rate": 8.313099201481889e-07, "loss": 8.5052, "step": 194710 }, { "epoch": 0.9724087992209544, "grad_norm": 0.0828898623585701, "learning_rate": 8.298080052066385e-07, "loss": 8.4938, "step": 194720 }, { "epoch": 0.9724587380458938, "grad_norm": 0.09573552757501602, "learning_rate": 8.28306090265088e-07, "loss": 8.5073, "step": 194730 }, { "epoch": 0.9725086768708332, "grad_norm": 0.09013304859399796, "learning_rate": 8.268041753235374e-07, "loss": 8.4836, "step": 194740 }, { "epoch": 0.9725586156957727, "grad_norm": 0.08847355097532272, "learning_rate": 8.25302260381987e-07, "loss": 8.4966, "step": 194750 }, { "epoch": 0.9726085545207122, "grad_norm": 0.0923420786857605, "learning_rate": 8.238003454404365e-07, "loss": 8.4948, "step": 194760 }, { "epoch": 0.9726584933456516, "grad_norm": 0.09254893660545349, "learning_rate": 8.222984304988861e-07, "loss": 8.5117, "step": 194770 }, { "epoch": 0.972708432170591, "grad_norm": 0.09156614542007446, "learning_rate": 8.207965155573356e-07, "loss": 8.4967, "step": 194780 }, { "epoch": 0.9727583709955305, "grad_norm": 0.09188225865364075, "learning_rate": 8.192946006157852e-07, "loss": 8.4973, "step": 194790 }, { "epoch": 0.97280830982047, "grad_norm": 0.09390117973089218, "learning_rate": 8.177926856742346e-07, "loss": 8.4866, "step": 194800 }, { "epoch": 0.9728582486454094, "grad_norm": 0.09112615883350372, "learning_rate": 8.162907707326841e-07, "loss": 8.5038, "step": 194810 }, { "epoch": 0.9729081874703488, "grad_norm": 0.08958408236503601, "learning_rate": 8.147888557911337e-07, "loss": 8.5039, "step": 194820 }, { "epoch": 0.9729581262952882, "grad_norm": 0.09056022018194199, "learning_rate": 8.132869408495832e-07, "loss": 8.5089, "step": 194830 }, { "epoch": 0.9730080651202277, "grad_norm": 0.0902940034866333, "learning_rate": 8.117850259080328e-07, "loss": 8.4841, "step": 194840 }, { "epoch": 0.9730580039451672, "grad_norm": 0.08953052759170532, "learning_rate": 8.102831109664822e-07, "loss": 8.5099, "step": 194850 }, { "epoch": 0.9731079427701066, "grad_norm": 0.09145776927471161, "learning_rate": 8.087811960249317e-07, "loss": 8.4842, "step": 194860 }, { "epoch": 0.973157881595046, "grad_norm": 0.09231266379356384, "learning_rate": 8.072792810833813e-07, "loss": 8.4961, "step": 194870 }, { "epoch": 0.9732078204199855, "grad_norm": 0.09086192399263382, "learning_rate": 8.057773661418308e-07, "loss": 8.5026, "step": 194880 }, { "epoch": 0.973257759244925, "grad_norm": 0.0860838070511818, "learning_rate": 8.042754512002804e-07, "loss": 8.4974, "step": 194890 }, { "epoch": 0.9733076980698644, "grad_norm": 0.09107068926095963, "learning_rate": 8.027735362587299e-07, "loss": 8.509, "step": 194900 }, { "epoch": 0.9733576368948038, "grad_norm": 0.09526333957910538, "learning_rate": 8.012716213171794e-07, "loss": 8.4865, "step": 194910 }, { "epoch": 0.9734075757197433, "grad_norm": 0.09245917946100235, "learning_rate": 7.997697063756289e-07, "loss": 8.5036, "step": 194920 }, { "epoch": 0.9734575145446828, "grad_norm": 0.09295765310525894, "learning_rate": 7.982677914340784e-07, "loss": 8.4988, "step": 194930 }, { "epoch": 0.9735074533696222, "grad_norm": 0.08823594450950623, "learning_rate": 7.96765876492528e-07, "loss": 8.4804, "step": 194940 }, { "epoch": 0.9735573921945616, "grad_norm": 0.08996216952800751, "learning_rate": 7.952639615509775e-07, "loss": 8.5019, "step": 194950 }, { "epoch": 0.9736073310195011, "grad_norm": 0.08816206455230713, "learning_rate": 7.93762046609427e-07, "loss": 8.5015, "step": 194960 }, { "epoch": 0.9736572698444406, "grad_norm": 0.09345098584890366, "learning_rate": 7.922601316678765e-07, "loss": 8.4941, "step": 194970 }, { "epoch": 0.97370720866938, "grad_norm": 0.08701256662607193, "learning_rate": 7.90758216726326e-07, "loss": 8.4983, "step": 194980 }, { "epoch": 0.9737571474943194, "grad_norm": 0.08897973597049713, "learning_rate": 7.892563017847756e-07, "loss": 8.5118, "step": 194990 }, { "epoch": 0.9738070863192589, "grad_norm": 0.08834607154130936, "learning_rate": 7.877543868432251e-07, "loss": 8.5255, "step": 195000 }, { "epoch": 0.9738570251441984, "grad_norm": 0.09231365472078323, "learning_rate": 7.862524719016746e-07, "loss": 8.5062, "step": 195010 }, { "epoch": 0.9739069639691378, "grad_norm": 0.089638851583004, "learning_rate": 7.847505569601241e-07, "loss": 8.4869, "step": 195020 }, { "epoch": 0.9739569027940772, "grad_norm": 0.09642373770475388, "learning_rate": 7.832486420185737e-07, "loss": 8.5094, "step": 195030 }, { "epoch": 0.9740068416190167, "grad_norm": 0.09365835040807724, "learning_rate": 7.817467270770232e-07, "loss": 8.5179, "step": 195040 }, { "epoch": 0.9740567804439562, "grad_norm": 0.09444354474544525, "learning_rate": 7.802448121354727e-07, "loss": 8.5114, "step": 195050 }, { "epoch": 0.9741067192688956, "grad_norm": 0.09574456512928009, "learning_rate": 7.787428971939223e-07, "loss": 8.4882, "step": 195060 }, { "epoch": 0.974156658093835, "grad_norm": 0.09401273727416992, "learning_rate": 7.772409822523717e-07, "loss": 8.4983, "step": 195070 }, { "epoch": 0.9742065969187745, "grad_norm": 0.08458282053470612, "learning_rate": 7.757390673108213e-07, "loss": 8.4861, "step": 195080 }, { "epoch": 0.974256535743714, "grad_norm": 0.09678689390420914, "learning_rate": 7.742371523692708e-07, "loss": 8.4971, "step": 195090 }, { "epoch": 0.9743064745686534, "grad_norm": 0.08890896290540695, "learning_rate": 7.727352374277204e-07, "loss": 8.4881, "step": 195100 }, { "epoch": 0.9743564133935928, "grad_norm": 0.0906500592827797, "learning_rate": 7.712333224861699e-07, "loss": 8.4991, "step": 195110 }, { "epoch": 0.9744063522185323, "grad_norm": 0.09543705731630325, "learning_rate": 7.697314075446193e-07, "loss": 8.5054, "step": 195120 }, { "epoch": 0.9744562910434718, "grad_norm": 0.0905163511633873, "learning_rate": 7.682294926030689e-07, "loss": 8.4979, "step": 195130 }, { "epoch": 0.9745062298684112, "grad_norm": 0.09297658503055573, "learning_rate": 7.667275776615184e-07, "loss": 8.4858, "step": 195140 }, { "epoch": 0.9745561686933506, "grad_norm": 0.08763201534748077, "learning_rate": 7.65225662719968e-07, "loss": 8.4979, "step": 195150 }, { "epoch": 0.9746061075182901, "grad_norm": 0.0941351130604744, "learning_rate": 7.637237477784175e-07, "loss": 8.5004, "step": 195160 }, { "epoch": 0.9746560463432296, "grad_norm": 0.09358199685811996, "learning_rate": 7.62221832836867e-07, "loss": 8.4789, "step": 195170 }, { "epoch": 0.974705985168169, "grad_norm": 0.09558181464672089, "learning_rate": 7.607199178953165e-07, "loss": 8.5046, "step": 195180 }, { "epoch": 0.9747559239931084, "grad_norm": 0.08955127000808716, "learning_rate": 7.59218002953766e-07, "loss": 8.5107, "step": 195190 }, { "epoch": 0.9748058628180479, "grad_norm": 0.08842689543962479, "learning_rate": 7.577160880122156e-07, "loss": 8.4977, "step": 195200 }, { "epoch": 0.9748558016429874, "grad_norm": 0.08837328106164932, "learning_rate": 7.562141730706651e-07, "loss": 8.4808, "step": 195210 }, { "epoch": 0.9749057404679268, "grad_norm": 0.09056874364614487, "learning_rate": 7.547122581291147e-07, "loss": 8.5065, "step": 195220 }, { "epoch": 0.9749556792928662, "grad_norm": 0.0923481434583664, "learning_rate": 7.532103431875641e-07, "loss": 8.4953, "step": 195230 }, { "epoch": 0.9750056181178057, "grad_norm": 0.09200780093669891, "learning_rate": 7.517084282460136e-07, "loss": 8.5131, "step": 195240 }, { "epoch": 0.9750555569427451, "grad_norm": 0.09253337979316711, "learning_rate": 7.502065133044632e-07, "loss": 8.503, "step": 195250 }, { "epoch": 0.9751054957676846, "grad_norm": 0.09560063481330872, "learning_rate": 7.487045983629127e-07, "loss": 8.4932, "step": 195260 }, { "epoch": 0.975155434592624, "grad_norm": 0.09416811168193817, "learning_rate": 7.472026834213623e-07, "loss": 8.5141, "step": 195270 }, { "epoch": 0.9752053734175635, "grad_norm": 0.08733347803354263, "learning_rate": 7.457007684798117e-07, "loss": 8.5092, "step": 195280 }, { "epoch": 0.975255312242503, "grad_norm": 0.08784154802560806, "learning_rate": 7.441988535382612e-07, "loss": 8.5029, "step": 195290 }, { "epoch": 0.9753052510674424, "grad_norm": 0.09478521347045898, "learning_rate": 7.426969385967108e-07, "loss": 8.5035, "step": 195300 }, { "epoch": 0.9753551898923818, "grad_norm": 0.09163037687540054, "learning_rate": 7.411950236551603e-07, "loss": 8.4888, "step": 195310 }, { "epoch": 0.9754051287173213, "grad_norm": 0.10056457668542862, "learning_rate": 7.396931087136099e-07, "loss": 8.5046, "step": 195320 }, { "epoch": 0.9754550675422607, "grad_norm": 0.09030482172966003, "learning_rate": 7.381911937720594e-07, "loss": 8.505, "step": 195330 }, { "epoch": 0.9755050063672002, "grad_norm": 0.089601531624794, "learning_rate": 7.366892788305089e-07, "loss": 8.4949, "step": 195340 }, { "epoch": 0.9755549451921396, "grad_norm": 0.09470442682504654, "learning_rate": 7.351873638889584e-07, "loss": 8.4826, "step": 195350 }, { "epoch": 0.9756048840170791, "grad_norm": 0.0969916582107544, "learning_rate": 7.336854489474079e-07, "loss": 8.4949, "step": 195360 }, { "epoch": 0.9756548228420185, "grad_norm": 0.09550881385803223, "learning_rate": 7.321835340058575e-07, "loss": 8.4897, "step": 195370 }, { "epoch": 0.975704761666958, "grad_norm": 0.08882039040327072, "learning_rate": 7.30681619064307e-07, "loss": 8.4957, "step": 195380 }, { "epoch": 0.9757547004918974, "grad_norm": 0.09256576001644135, "learning_rate": 7.291797041227565e-07, "loss": 8.5088, "step": 195390 }, { "epoch": 0.9758046393168369, "grad_norm": 0.09013333171606064, "learning_rate": 7.27677789181206e-07, "loss": 8.5056, "step": 195400 }, { "epoch": 0.9758545781417763, "grad_norm": 0.09338297694921494, "learning_rate": 7.261758742396556e-07, "loss": 8.4914, "step": 195410 }, { "epoch": 0.9759045169667158, "grad_norm": 0.08930335938930511, "learning_rate": 7.246739592981051e-07, "loss": 8.4948, "step": 195420 }, { "epoch": 0.9759544557916552, "grad_norm": 0.09636734426021576, "learning_rate": 7.231720443565546e-07, "loss": 8.4691, "step": 195430 }, { "epoch": 0.9760043946165947, "grad_norm": 0.09596901386976242, "learning_rate": 7.216701294150042e-07, "loss": 8.5013, "step": 195440 }, { "epoch": 0.9760543334415341, "grad_norm": 0.08806633204221725, "learning_rate": 7.201682144734536e-07, "loss": 8.4973, "step": 195450 }, { "epoch": 0.9761042722664736, "grad_norm": 0.08893059194087982, "learning_rate": 7.186662995319032e-07, "loss": 8.5316, "step": 195460 }, { "epoch": 0.976154211091413, "grad_norm": 0.09048335999250412, "learning_rate": 7.171643845903527e-07, "loss": 8.504, "step": 195470 }, { "epoch": 0.9762041499163525, "grad_norm": 0.09120376408100128, "learning_rate": 7.156624696488023e-07, "loss": 8.4939, "step": 195480 }, { "epoch": 0.9762540887412919, "grad_norm": 0.09385009855031967, "learning_rate": 7.141605547072518e-07, "loss": 8.4997, "step": 195490 }, { "epoch": 0.9763040275662314, "grad_norm": 0.09150715172290802, "learning_rate": 7.126586397657012e-07, "loss": 8.4917, "step": 195500 }, { "epoch": 0.9763539663911708, "grad_norm": 0.08368659019470215, "learning_rate": 7.111567248241508e-07, "loss": 8.5044, "step": 195510 }, { "epoch": 0.9764039052161103, "grad_norm": 0.09331393986940384, "learning_rate": 7.096548098826003e-07, "loss": 8.5056, "step": 195520 }, { "epoch": 0.9764538440410497, "grad_norm": 0.08423703908920288, "learning_rate": 7.081528949410499e-07, "loss": 8.4948, "step": 195530 }, { "epoch": 0.9765037828659892, "grad_norm": 0.090935617685318, "learning_rate": 7.066509799994994e-07, "loss": 8.5093, "step": 195540 }, { "epoch": 0.9765537216909286, "grad_norm": 0.089198037981987, "learning_rate": 7.051490650579488e-07, "loss": 8.5092, "step": 195550 }, { "epoch": 0.9766036605158681, "grad_norm": 0.09370332956314087, "learning_rate": 7.036471501163984e-07, "loss": 8.4945, "step": 195560 }, { "epoch": 0.9766535993408075, "grad_norm": 0.09572462737560272, "learning_rate": 7.021452351748479e-07, "loss": 8.4806, "step": 195570 }, { "epoch": 0.976703538165747, "grad_norm": 0.09619718790054321, "learning_rate": 7.006433202332975e-07, "loss": 8.4985, "step": 195580 }, { "epoch": 0.9767534769906864, "grad_norm": 0.08778340369462967, "learning_rate": 6.99141405291747e-07, "loss": 8.5015, "step": 195590 }, { "epoch": 0.9768034158156259, "grad_norm": 0.09372592717409134, "learning_rate": 6.976394903501966e-07, "loss": 8.4923, "step": 195600 }, { "epoch": 0.9768533546405653, "grad_norm": 0.08879277855157852, "learning_rate": 6.96137575408646e-07, "loss": 8.5049, "step": 195610 }, { "epoch": 0.9769032934655048, "grad_norm": 0.09419248253107071, "learning_rate": 6.946356604670955e-07, "loss": 8.5186, "step": 195620 }, { "epoch": 0.9769532322904442, "grad_norm": 0.09513996541500092, "learning_rate": 6.931337455255451e-07, "loss": 8.5032, "step": 195630 }, { "epoch": 0.9770031711153837, "grad_norm": 0.09008798003196716, "learning_rate": 6.916318305839946e-07, "loss": 8.493, "step": 195640 }, { "epoch": 0.9770531099403231, "grad_norm": 0.08629944920539856, "learning_rate": 6.901299156424442e-07, "loss": 8.4955, "step": 195650 }, { "epoch": 0.9771030487652625, "grad_norm": 0.09061171859502792, "learning_rate": 6.886280007008936e-07, "loss": 8.5169, "step": 195660 }, { "epoch": 0.977152987590202, "grad_norm": 0.0888964906334877, "learning_rate": 6.871260857593431e-07, "loss": 8.4928, "step": 195670 }, { "epoch": 0.9772029264151415, "grad_norm": 0.08664719760417938, "learning_rate": 6.856241708177927e-07, "loss": 8.4964, "step": 195680 }, { "epoch": 0.9772528652400809, "grad_norm": 0.08702714741230011, "learning_rate": 6.841222558762422e-07, "loss": 8.4972, "step": 195690 }, { "epoch": 0.9773028040650203, "grad_norm": 0.0985323116183281, "learning_rate": 6.826203409346918e-07, "loss": 8.4806, "step": 195700 }, { "epoch": 0.9773527428899598, "grad_norm": 0.08669869601726532, "learning_rate": 6.811184259931413e-07, "loss": 8.5084, "step": 195710 }, { "epoch": 0.9774026817148993, "grad_norm": 0.09846388548612595, "learning_rate": 6.796165110515908e-07, "loss": 8.5034, "step": 195720 }, { "epoch": 0.9774526205398387, "grad_norm": 0.08850651979446411, "learning_rate": 6.781145961100403e-07, "loss": 8.5063, "step": 195730 }, { "epoch": 0.9775025593647781, "grad_norm": 0.09550187736749649, "learning_rate": 6.766126811684898e-07, "loss": 8.4943, "step": 195740 }, { "epoch": 0.9775524981897176, "grad_norm": 0.08764312416315079, "learning_rate": 6.751107662269394e-07, "loss": 8.5015, "step": 195750 }, { "epoch": 0.9776024370146571, "grad_norm": 0.09270459413528442, "learning_rate": 6.736088512853889e-07, "loss": 8.4993, "step": 195760 }, { "epoch": 0.9776523758395965, "grad_norm": 0.09069382399320602, "learning_rate": 6.721069363438384e-07, "loss": 8.4991, "step": 195770 }, { "epoch": 0.9777023146645359, "grad_norm": 0.09017602354288101, "learning_rate": 6.706050214022879e-07, "loss": 8.5079, "step": 195780 }, { "epoch": 0.9777522534894754, "grad_norm": 0.0948113352060318, "learning_rate": 6.691031064607375e-07, "loss": 8.4913, "step": 195790 }, { "epoch": 0.9778021923144148, "grad_norm": 0.09429457783699036, "learning_rate": 6.67601191519187e-07, "loss": 8.501, "step": 195800 }, { "epoch": 0.9778521311393543, "grad_norm": 0.09153127670288086, "learning_rate": 6.660992765776365e-07, "loss": 8.4899, "step": 195810 }, { "epoch": 0.9779020699642937, "grad_norm": 0.0950402021408081, "learning_rate": 6.64597361636086e-07, "loss": 8.5159, "step": 195820 }, { "epoch": 0.9779520087892332, "grad_norm": 0.09019756317138672, "learning_rate": 6.630954466945355e-07, "loss": 8.494, "step": 195830 }, { "epoch": 0.9780019476141726, "grad_norm": 0.08579851686954498, "learning_rate": 6.615935317529851e-07, "loss": 8.5005, "step": 195840 }, { "epoch": 0.9780518864391121, "grad_norm": 0.09236463159322739, "learning_rate": 6.600916168114346e-07, "loss": 8.4911, "step": 195850 }, { "epoch": 0.9781018252640515, "grad_norm": 0.08974046260118484, "learning_rate": 6.585897018698841e-07, "loss": 8.5048, "step": 195860 }, { "epoch": 0.978151764088991, "grad_norm": 0.0937148705124855, "learning_rate": 6.570877869283337e-07, "loss": 8.5085, "step": 195870 }, { "epoch": 0.9782017029139304, "grad_norm": 0.09331946820020676, "learning_rate": 6.555858719867831e-07, "loss": 8.4902, "step": 195880 }, { "epoch": 0.9782516417388699, "grad_norm": 0.08849994838237762, "learning_rate": 6.540839570452327e-07, "loss": 8.513, "step": 195890 }, { "epoch": 0.9783015805638093, "grad_norm": 0.09173839539289474, "learning_rate": 6.525820421036822e-07, "loss": 8.4979, "step": 195900 }, { "epoch": 0.9783515193887488, "grad_norm": 0.09019637852907181, "learning_rate": 6.510801271621318e-07, "loss": 8.503, "step": 195910 }, { "epoch": 0.9784014582136882, "grad_norm": 0.09567339718341827, "learning_rate": 6.495782122205813e-07, "loss": 8.5031, "step": 195920 }, { "epoch": 0.9784513970386277, "grad_norm": 0.09023743122816086, "learning_rate": 6.480762972790307e-07, "loss": 8.4926, "step": 195930 }, { "epoch": 0.9785013358635671, "grad_norm": 0.08731987327337265, "learning_rate": 6.465743823374803e-07, "loss": 8.4897, "step": 195940 }, { "epoch": 0.9785512746885066, "grad_norm": 0.09049738943576813, "learning_rate": 6.450724673959298e-07, "loss": 8.514, "step": 195950 }, { "epoch": 0.978601213513446, "grad_norm": 0.09187744557857513, "learning_rate": 6.435705524543794e-07, "loss": 8.4761, "step": 195960 }, { "epoch": 0.9786511523383855, "grad_norm": 0.09112652391195297, "learning_rate": 6.420686375128289e-07, "loss": 8.5003, "step": 195970 }, { "epoch": 0.9787010911633249, "grad_norm": 0.09285633265972137, "learning_rate": 6.405667225712785e-07, "loss": 8.4864, "step": 195980 }, { "epoch": 0.9787510299882644, "grad_norm": 0.09360034763813019, "learning_rate": 6.390648076297279e-07, "loss": 8.497, "step": 195990 }, { "epoch": 0.9788009688132038, "grad_norm": 0.09473513811826706, "learning_rate": 6.375628926881774e-07, "loss": 8.5115, "step": 196000 }, { "epoch": 0.9788509076381433, "grad_norm": 0.09806603938341141, "learning_rate": 6.36060977746627e-07, "loss": 8.5038, "step": 196010 }, { "epoch": 0.9789008464630827, "grad_norm": 0.0843457505106926, "learning_rate": 6.345590628050765e-07, "loss": 8.5104, "step": 196020 }, { "epoch": 0.9789507852880222, "grad_norm": 0.0888645127415657, "learning_rate": 6.330571478635261e-07, "loss": 8.5043, "step": 196030 }, { "epoch": 0.9790007241129616, "grad_norm": 0.09105794876813889, "learning_rate": 6.315552329219755e-07, "loss": 8.5055, "step": 196040 }, { "epoch": 0.9790506629379011, "grad_norm": 0.08398020267486572, "learning_rate": 6.30053317980425e-07, "loss": 8.5022, "step": 196050 }, { "epoch": 0.9791006017628405, "grad_norm": 0.09144193679094315, "learning_rate": 6.285514030388746e-07, "loss": 8.4931, "step": 196060 }, { "epoch": 0.97915054058778, "grad_norm": 0.08951180428266525, "learning_rate": 6.270494880973241e-07, "loss": 8.4928, "step": 196070 }, { "epoch": 0.9792004794127194, "grad_norm": 0.08442022651433945, "learning_rate": 6.255475731557737e-07, "loss": 8.495, "step": 196080 }, { "epoch": 0.9792504182376589, "grad_norm": 0.0968695729970932, "learning_rate": 6.240456582142231e-07, "loss": 8.4993, "step": 196090 }, { "epoch": 0.9793003570625983, "grad_norm": 0.08780843019485474, "learning_rate": 6.225437432726727e-07, "loss": 8.5002, "step": 196100 }, { "epoch": 0.9793502958875377, "grad_norm": 0.08662664890289307, "learning_rate": 6.210418283311222e-07, "loss": 8.5069, "step": 196110 }, { "epoch": 0.9794002347124772, "grad_norm": 0.09597373008728027, "learning_rate": 6.195399133895717e-07, "loss": 8.5132, "step": 196120 }, { "epoch": 0.9794501735374167, "grad_norm": 0.09307907521724701, "learning_rate": 6.180379984480213e-07, "loss": 8.4828, "step": 196130 }, { "epoch": 0.9795001123623561, "grad_norm": 0.0918552353978157, "learning_rate": 6.165360835064708e-07, "loss": 8.508, "step": 196140 }, { "epoch": 0.9795500511872955, "grad_norm": 0.08918620645999908, "learning_rate": 6.150341685649203e-07, "loss": 8.5133, "step": 196150 }, { "epoch": 0.979599990012235, "grad_norm": 0.09026898443698883, "learning_rate": 6.135322536233698e-07, "loss": 8.5028, "step": 196160 }, { "epoch": 0.9796499288371745, "grad_norm": 0.08795063197612762, "learning_rate": 6.120303386818193e-07, "loss": 8.5184, "step": 196170 }, { "epoch": 0.9796998676621139, "grad_norm": 0.09220097213983536, "learning_rate": 6.105284237402689e-07, "loss": 8.5082, "step": 196180 }, { "epoch": 0.9797498064870533, "grad_norm": 0.08570639044046402, "learning_rate": 6.090265087987184e-07, "loss": 8.5005, "step": 196190 }, { "epoch": 0.9797997453119928, "grad_norm": 0.08638404309749603, "learning_rate": 6.075245938571679e-07, "loss": 8.506, "step": 196200 }, { "epoch": 0.9798496841369323, "grad_norm": 0.09257509559392929, "learning_rate": 6.060226789156174e-07, "loss": 8.4789, "step": 196210 }, { "epoch": 0.9798996229618717, "grad_norm": 0.09392807632684708, "learning_rate": 6.04520763974067e-07, "loss": 8.4907, "step": 196220 }, { "epoch": 0.9799495617868111, "grad_norm": 0.087054044008255, "learning_rate": 6.030188490325165e-07, "loss": 8.4988, "step": 196230 }, { "epoch": 0.9799995006117506, "grad_norm": 0.09597992151975632, "learning_rate": 6.01516934090966e-07, "loss": 8.4917, "step": 196240 }, { "epoch": 0.9800494394366901, "grad_norm": 0.08776523172855377, "learning_rate": 6.000150191494156e-07, "loss": 8.5092, "step": 196250 }, { "epoch": 0.9800993782616295, "grad_norm": 0.08641576766967773, "learning_rate": 5.98513104207865e-07, "loss": 8.5092, "step": 196260 }, { "epoch": 0.9801493170865689, "grad_norm": 0.08972152322530746, "learning_rate": 5.970111892663146e-07, "loss": 8.4981, "step": 196270 }, { "epoch": 0.9801992559115084, "grad_norm": 0.09072574973106384, "learning_rate": 5.955092743247641e-07, "loss": 8.5155, "step": 196280 }, { "epoch": 0.9802491947364479, "grad_norm": 0.09410280734300613, "learning_rate": 5.940073593832137e-07, "loss": 8.5123, "step": 196290 }, { "epoch": 0.9802991335613873, "grad_norm": 0.09331624209880829, "learning_rate": 5.925054444416632e-07, "loss": 8.4814, "step": 196300 }, { "epoch": 0.9803490723863267, "grad_norm": 0.09342946112155914, "learning_rate": 5.910035295001126e-07, "loss": 8.4965, "step": 196310 }, { "epoch": 0.9803990112112662, "grad_norm": 0.08969230949878693, "learning_rate": 5.895016145585622e-07, "loss": 8.4857, "step": 196320 }, { "epoch": 0.9804489500362057, "grad_norm": 0.08578766137361526, "learning_rate": 5.879996996170117e-07, "loss": 8.4913, "step": 196330 }, { "epoch": 0.9804988888611451, "grad_norm": 0.0996427908539772, "learning_rate": 5.864977846754613e-07, "loss": 8.4963, "step": 196340 }, { "epoch": 0.9805488276860845, "grad_norm": 0.09154878556728363, "learning_rate": 5.849958697339108e-07, "loss": 8.4779, "step": 196350 }, { "epoch": 0.980598766511024, "grad_norm": 0.09681200236082077, "learning_rate": 5.834939547923602e-07, "loss": 8.5062, "step": 196360 }, { "epoch": 0.9806487053359635, "grad_norm": 0.09222651273012161, "learning_rate": 5.819920398508098e-07, "loss": 8.4888, "step": 196370 }, { "epoch": 0.9806986441609029, "grad_norm": 0.08910214900970459, "learning_rate": 5.804901249092593e-07, "loss": 8.5117, "step": 196380 }, { "epoch": 0.9807485829858423, "grad_norm": 0.09133374691009521, "learning_rate": 5.789882099677089e-07, "loss": 8.4863, "step": 196390 }, { "epoch": 0.9807985218107818, "grad_norm": 0.09324437379837036, "learning_rate": 5.774862950261584e-07, "loss": 8.4807, "step": 196400 }, { "epoch": 0.9808484606357213, "grad_norm": 0.08947594463825226, "learning_rate": 5.75984380084608e-07, "loss": 8.4995, "step": 196410 }, { "epoch": 0.9808983994606607, "grad_norm": 0.09630808234214783, "learning_rate": 5.744824651430574e-07, "loss": 8.4807, "step": 196420 }, { "epoch": 0.9809483382856001, "grad_norm": 0.08872811496257782, "learning_rate": 5.729805502015069e-07, "loss": 8.5092, "step": 196430 }, { "epoch": 0.9809982771105396, "grad_norm": 0.08594346791505814, "learning_rate": 5.714786352599565e-07, "loss": 8.5042, "step": 196440 }, { "epoch": 0.9810482159354791, "grad_norm": 0.08785369992256165, "learning_rate": 5.69976720318406e-07, "loss": 8.4873, "step": 196450 }, { "epoch": 0.9810981547604185, "grad_norm": 0.08623884618282318, "learning_rate": 5.684748053768556e-07, "loss": 8.5087, "step": 196460 }, { "epoch": 0.9811480935853579, "grad_norm": 0.09336108714342117, "learning_rate": 5.66972890435305e-07, "loss": 8.4784, "step": 196470 }, { "epoch": 0.9811980324102973, "grad_norm": 0.09492529183626175, "learning_rate": 5.654709754937545e-07, "loss": 8.5124, "step": 196480 }, { "epoch": 0.9812479712352369, "grad_norm": 0.08852214366197586, "learning_rate": 5.639690605522041e-07, "loss": 8.5098, "step": 196490 }, { "epoch": 0.9812979100601763, "grad_norm": 0.09364449232816696, "learning_rate": 5.624671456106536e-07, "loss": 8.4938, "step": 196500 }, { "epoch": 0.9813478488851157, "grad_norm": 0.0880199745297432, "learning_rate": 5.609652306691032e-07, "loss": 8.5006, "step": 196510 }, { "epoch": 0.9813977877100551, "grad_norm": 0.08908896148204803, "learning_rate": 5.594633157275527e-07, "loss": 8.5116, "step": 196520 }, { "epoch": 0.9814477265349947, "grad_norm": 0.0934915542602539, "learning_rate": 5.579614007860022e-07, "loss": 8.4807, "step": 196530 }, { "epoch": 0.9814976653599341, "grad_norm": 0.09108605235815048, "learning_rate": 5.564594858444517e-07, "loss": 8.4912, "step": 196540 }, { "epoch": 0.9815476041848735, "grad_norm": 0.08718253672122955, "learning_rate": 5.549575709029012e-07, "loss": 8.5017, "step": 196550 }, { "epoch": 0.981597543009813, "grad_norm": 0.09099813550710678, "learning_rate": 5.534556559613508e-07, "loss": 8.5023, "step": 196560 }, { "epoch": 0.9816474818347525, "grad_norm": 0.08504286408424377, "learning_rate": 5.519537410198003e-07, "loss": 8.522, "step": 196570 }, { "epoch": 0.9816974206596919, "grad_norm": 0.08254232257604599, "learning_rate": 5.504518260782498e-07, "loss": 8.4956, "step": 196580 }, { "epoch": 0.9817473594846313, "grad_norm": 0.08998601138591766, "learning_rate": 5.489499111366993e-07, "loss": 8.5012, "step": 196590 }, { "epoch": 0.9817972983095707, "grad_norm": 0.09334680438041687, "learning_rate": 5.474479961951489e-07, "loss": 8.5018, "step": 196600 }, { "epoch": 0.9818472371345103, "grad_norm": 0.09622131288051605, "learning_rate": 5.459460812535984e-07, "loss": 8.5014, "step": 196610 }, { "epoch": 0.9818971759594497, "grad_norm": 0.0919957160949707, "learning_rate": 5.444441663120479e-07, "loss": 8.5115, "step": 196620 }, { "epoch": 0.9819471147843891, "grad_norm": 0.08943960070610046, "learning_rate": 5.429422513704974e-07, "loss": 8.5137, "step": 196630 }, { "epoch": 0.9819970536093285, "grad_norm": 0.09023985266685486, "learning_rate": 5.414403364289469e-07, "loss": 8.5006, "step": 196640 }, { "epoch": 0.9820469924342681, "grad_norm": 0.09034501016139984, "learning_rate": 5.399384214873965e-07, "loss": 8.4943, "step": 196650 }, { "epoch": 0.9820969312592075, "grad_norm": 0.08900442719459534, "learning_rate": 5.38436506545846e-07, "loss": 8.5223, "step": 196660 }, { "epoch": 0.9821468700841469, "grad_norm": 0.08898912370204926, "learning_rate": 5.369345916042955e-07, "loss": 8.51, "step": 196670 }, { "epoch": 0.9821968089090863, "grad_norm": 0.09157400578260422, "learning_rate": 5.354326766627451e-07, "loss": 8.5056, "step": 196680 }, { "epoch": 0.9822467477340259, "grad_norm": 0.09418471902608871, "learning_rate": 5.339307617211945e-07, "loss": 8.4981, "step": 196690 }, { "epoch": 0.9822966865589653, "grad_norm": 0.0900435671210289, "learning_rate": 5.324288467796441e-07, "loss": 8.4888, "step": 196700 }, { "epoch": 0.9823466253839047, "grad_norm": 0.0917317196726799, "learning_rate": 5.309269318380936e-07, "loss": 8.5087, "step": 196710 }, { "epoch": 0.9823965642088441, "grad_norm": 0.09196818619966507, "learning_rate": 5.294250168965432e-07, "loss": 8.5133, "step": 196720 }, { "epoch": 0.9824465030337837, "grad_norm": 0.09314832836389542, "learning_rate": 5.279231019549927e-07, "loss": 8.4901, "step": 196730 }, { "epoch": 0.9824964418587231, "grad_norm": 0.09629698842763901, "learning_rate": 5.264211870134421e-07, "loss": 8.4865, "step": 196740 }, { "epoch": 0.9825463806836625, "grad_norm": 0.09784742444753647, "learning_rate": 5.249192720718917e-07, "loss": 8.4937, "step": 196750 }, { "epoch": 0.9825963195086019, "grad_norm": 0.08975919336080551, "learning_rate": 5.234173571303412e-07, "loss": 8.4921, "step": 196760 }, { "epoch": 0.9826462583335415, "grad_norm": 0.09287573397159576, "learning_rate": 5.219154421887908e-07, "loss": 8.4988, "step": 196770 }, { "epoch": 0.9826961971584809, "grad_norm": 0.09043823927640915, "learning_rate": 5.204135272472403e-07, "loss": 8.4797, "step": 196780 }, { "epoch": 0.9827461359834203, "grad_norm": 0.0993688553571701, "learning_rate": 5.189116123056899e-07, "loss": 8.4846, "step": 196790 }, { "epoch": 0.9827960748083597, "grad_norm": 0.0893450453877449, "learning_rate": 5.174096973641393e-07, "loss": 8.4981, "step": 196800 }, { "epoch": 0.9828460136332992, "grad_norm": 0.08755376189947128, "learning_rate": 5.159077824225888e-07, "loss": 8.5012, "step": 196810 }, { "epoch": 0.9828959524582387, "grad_norm": 0.09437739849090576, "learning_rate": 5.144058674810384e-07, "loss": 8.5008, "step": 196820 }, { "epoch": 0.9829458912831781, "grad_norm": 0.08554568886756897, "learning_rate": 5.129039525394879e-07, "loss": 8.4992, "step": 196830 }, { "epoch": 0.9829958301081175, "grad_norm": 0.09264194965362549, "learning_rate": 5.114020375979375e-07, "loss": 8.4875, "step": 196840 }, { "epoch": 0.983045768933057, "grad_norm": 0.09195772558450699, "learning_rate": 5.099001226563869e-07, "loss": 8.5076, "step": 196850 }, { "epoch": 0.9830957077579965, "grad_norm": 0.0927330031991005, "learning_rate": 5.083982077148364e-07, "loss": 8.5003, "step": 196860 }, { "epoch": 0.9831456465829359, "grad_norm": 0.09278733283281326, "learning_rate": 5.06896292773286e-07, "loss": 8.4892, "step": 196870 }, { "epoch": 0.9831955854078753, "grad_norm": 0.09159595519304276, "learning_rate": 5.053943778317355e-07, "loss": 8.501, "step": 196880 }, { "epoch": 0.9832455242328147, "grad_norm": 0.08706831187009811, "learning_rate": 5.038924628901851e-07, "loss": 8.5019, "step": 196890 }, { "epoch": 0.9832954630577543, "grad_norm": 0.08902629464864731, "learning_rate": 5.023905479486345e-07, "loss": 8.4914, "step": 196900 }, { "epoch": 0.9833454018826937, "grad_norm": 0.09136991202831268, "learning_rate": 5.00888633007084e-07, "loss": 8.4933, "step": 196910 }, { "epoch": 0.9833953407076331, "grad_norm": 0.09446898847818375, "learning_rate": 4.993867180655336e-07, "loss": 8.4835, "step": 196920 }, { "epoch": 0.9834452795325725, "grad_norm": 0.09721554815769196, "learning_rate": 4.978848031239831e-07, "loss": 8.4986, "step": 196930 }, { "epoch": 0.9834952183575121, "grad_norm": 0.09217740595340729, "learning_rate": 4.963828881824327e-07, "loss": 8.4987, "step": 196940 }, { "epoch": 0.9835451571824515, "grad_norm": 0.09315560013055801, "learning_rate": 4.948809732408822e-07, "loss": 8.4985, "step": 196950 }, { "epoch": 0.9835950960073909, "grad_norm": 0.08701568096876144, "learning_rate": 4.933790582993317e-07, "loss": 8.5176, "step": 196960 }, { "epoch": 0.9836450348323303, "grad_norm": 0.09363686293363571, "learning_rate": 4.918771433577812e-07, "loss": 8.5186, "step": 196970 }, { "epoch": 0.9836949736572699, "grad_norm": 0.0900358110666275, "learning_rate": 4.903752284162307e-07, "loss": 8.5071, "step": 196980 }, { "epoch": 0.9837449124822093, "grad_norm": 0.08556856215000153, "learning_rate": 4.888733134746803e-07, "loss": 8.508, "step": 196990 }, { "epoch": 0.9837948513071487, "grad_norm": 0.08887305855751038, "learning_rate": 4.873713985331298e-07, "loss": 8.4958, "step": 197000 }, { "epoch": 0.9838447901320881, "grad_norm": 0.08783209323883057, "learning_rate": 4.858694835915793e-07, "loss": 8.5032, "step": 197010 }, { "epoch": 0.9838947289570277, "grad_norm": 0.08912937343120575, "learning_rate": 4.843675686500288e-07, "loss": 8.5039, "step": 197020 }, { "epoch": 0.9839446677819671, "grad_norm": 0.09269113093614578, "learning_rate": 4.828656537084784e-07, "loss": 8.4991, "step": 197030 }, { "epoch": 0.9839946066069065, "grad_norm": 0.0884513109922409, "learning_rate": 4.813637387669279e-07, "loss": 8.4936, "step": 197040 }, { "epoch": 0.9840445454318459, "grad_norm": 0.09053158015012741, "learning_rate": 4.798618238253774e-07, "loss": 8.5058, "step": 197050 }, { "epoch": 0.9840944842567855, "grad_norm": 0.09161665290594101, "learning_rate": 4.783599088838269e-07, "loss": 8.5196, "step": 197060 }, { "epoch": 0.9841444230817249, "grad_norm": 0.09395797550678253, "learning_rate": 4.768579939422764e-07, "loss": 8.5149, "step": 197070 }, { "epoch": 0.9841943619066643, "grad_norm": 0.09031932055950165, "learning_rate": 4.7535607900072597e-07, "loss": 8.5077, "step": 197080 }, { "epoch": 0.9842443007316037, "grad_norm": 0.08975038677453995, "learning_rate": 4.738541640591755e-07, "loss": 8.5292, "step": 197090 }, { "epoch": 0.9842942395565433, "grad_norm": 0.09021060913801193, "learning_rate": 4.7235224911762505e-07, "loss": 8.4957, "step": 197100 }, { "epoch": 0.9843441783814827, "grad_norm": 0.08840472251176834, "learning_rate": 4.7085033417607454e-07, "loss": 8.5036, "step": 197110 }, { "epoch": 0.9843941172064221, "grad_norm": 0.08798038959503174, "learning_rate": 4.693484192345241e-07, "loss": 8.5017, "step": 197120 }, { "epoch": 0.9844440560313615, "grad_norm": 0.08750328421592712, "learning_rate": 4.678465042929735e-07, "loss": 8.5018, "step": 197130 }, { "epoch": 0.9844939948563011, "grad_norm": 0.0955323576927185, "learning_rate": 4.6634458935142307e-07, "loss": 8.4924, "step": 197140 }, { "epoch": 0.9845439336812405, "grad_norm": 0.08959522098302841, "learning_rate": 4.648426744098726e-07, "loss": 8.5035, "step": 197150 }, { "epoch": 0.9845938725061799, "grad_norm": 0.08650697767734528, "learning_rate": 4.633407594683221e-07, "loss": 8.4908, "step": 197160 }, { "epoch": 0.9846438113311193, "grad_norm": 0.091778464615345, "learning_rate": 4.6183884452677164e-07, "loss": 8.4966, "step": 197170 }, { "epoch": 0.9846937501560589, "grad_norm": 0.08719423413276672, "learning_rate": 4.603369295852212e-07, "loss": 8.5151, "step": 197180 }, { "epoch": 0.9847436889809983, "grad_norm": 0.09176063537597656, "learning_rate": 4.588350146436707e-07, "loss": 8.4962, "step": 197190 }, { "epoch": 0.9847936278059377, "grad_norm": 0.08916878700256348, "learning_rate": 4.573330997021202e-07, "loss": 8.4894, "step": 197200 }, { "epoch": 0.9848435666308771, "grad_norm": 0.08697172999382019, "learning_rate": 4.558311847605697e-07, "loss": 8.5025, "step": 197210 }, { "epoch": 0.9848935054558167, "grad_norm": 0.09053148329257965, "learning_rate": 4.5432926981901925e-07, "loss": 8.4972, "step": 197220 }, { "epoch": 0.9849434442807561, "grad_norm": 0.0926218330860138, "learning_rate": 4.528273548774688e-07, "loss": 8.4953, "step": 197230 }, { "epoch": 0.9849933831056955, "grad_norm": 0.0867324098944664, "learning_rate": 4.513254399359183e-07, "loss": 8.4862, "step": 197240 }, { "epoch": 0.9850433219306349, "grad_norm": 0.0981505885720253, "learning_rate": 4.4982352499436783e-07, "loss": 8.4912, "step": 197250 }, { "epoch": 0.9850932607555745, "grad_norm": 0.09056996554136276, "learning_rate": 4.4832161005281737e-07, "loss": 8.508, "step": 197260 }, { "epoch": 0.9851431995805139, "grad_norm": 0.08942002803087234, "learning_rate": 4.4681969511126686e-07, "loss": 8.5016, "step": 197270 }, { "epoch": 0.9851931384054533, "grad_norm": 0.09431149810552597, "learning_rate": 4.453177801697164e-07, "loss": 8.5033, "step": 197280 }, { "epoch": 0.9852430772303927, "grad_norm": 0.08968444168567657, "learning_rate": 4.438158652281659e-07, "loss": 8.5009, "step": 197290 }, { "epoch": 0.9852930160553323, "grad_norm": 0.09338608384132385, "learning_rate": 4.4231395028661544e-07, "loss": 8.4933, "step": 197300 }, { "epoch": 0.9853429548802717, "grad_norm": 0.09319829195737839, "learning_rate": 4.40812035345065e-07, "loss": 8.5047, "step": 197310 }, { "epoch": 0.9853928937052111, "grad_norm": 0.08871421962976456, "learning_rate": 4.3931012040351447e-07, "loss": 8.4965, "step": 197320 }, { "epoch": 0.9854428325301505, "grad_norm": 0.08973289281129837, "learning_rate": 4.37808205461964e-07, "loss": 8.5169, "step": 197330 }, { "epoch": 0.9854927713550901, "grad_norm": 0.0932137593626976, "learning_rate": 4.3630629052041356e-07, "loss": 8.5038, "step": 197340 }, { "epoch": 0.9855427101800295, "grad_norm": 0.09041199833154678, "learning_rate": 4.3480437557886305e-07, "loss": 8.4874, "step": 197350 }, { "epoch": 0.9855926490049689, "grad_norm": 0.08918718248605728, "learning_rate": 4.333024606373126e-07, "loss": 8.5049, "step": 197360 }, { "epoch": 0.9856425878299083, "grad_norm": 0.0872543603181839, "learning_rate": 4.3180054569576213e-07, "loss": 8.4965, "step": 197370 }, { "epoch": 0.9856925266548479, "grad_norm": 0.09441404789686203, "learning_rate": 4.302986307542116e-07, "loss": 8.5013, "step": 197380 }, { "epoch": 0.9857424654797873, "grad_norm": 0.08817983418703079, "learning_rate": 4.2879671581266117e-07, "loss": 8.4965, "step": 197390 }, { "epoch": 0.9857924043047267, "grad_norm": 0.09181421250104904, "learning_rate": 4.2729480087111066e-07, "loss": 8.4777, "step": 197400 }, { "epoch": 0.9858423431296661, "grad_norm": 0.0921471118927002, "learning_rate": 4.257928859295602e-07, "loss": 8.4848, "step": 197410 }, { "epoch": 0.9858922819546057, "grad_norm": 0.08574623614549637, "learning_rate": 4.2429097098800974e-07, "loss": 8.4989, "step": 197420 }, { "epoch": 0.9859422207795451, "grad_norm": 0.08747486025094986, "learning_rate": 4.2278905604645923e-07, "loss": 8.4846, "step": 197430 }, { "epoch": 0.9859921596044845, "grad_norm": 0.09855210781097412, "learning_rate": 4.212871411049088e-07, "loss": 8.505, "step": 197440 }, { "epoch": 0.9860420984294239, "grad_norm": 0.09506363421678543, "learning_rate": 4.197852261633583e-07, "loss": 8.5137, "step": 197450 }, { "epoch": 0.9860920372543635, "grad_norm": 0.09487252682447433, "learning_rate": 4.182833112218078e-07, "loss": 8.499, "step": 197460 }, { "epoch": 0.9861419760793029, "grad_norm": 0.08848896622657776, "learning_rate": 4.1678139628025735e-07, "loss": 8.4939, "step": 197470 }, { "epoch": 0.9861919149042423, "grad_norm": 0.09323852509260178, "learning_rate": 4.1527948133870684e-07, "loss": 8.5181, "step": 197480 }, { "epoch": 0.9862418537291817, "grad_norm": 0.09195881336927414, "learning_rate": 4.137775663971564e-07, "loss": 8.5032, "step": 197490 }, { "epoch": 0.9862917925541212, "grad_norm": 0.08648441731929779, "learning_rate": 4.1227565145560593e-07, "loss": 8.5266, "step": 197500 }, { "epoch": 0.9863417313790607, "grad_norm": 0.08899568021297455, "learning_rate": 4.107737365140554e-07, "loss": 8.4906, "step": 197510 }, { "epoch": 0.9863916702040001, "grad_norm": 0.09646333009004593, "learning_rate": 4.0927182157250496e-07, "loss": 8.481, "step": 197520 }, { "epoch": 0.9864416090289395, "grad_norm": 0.09079481661319733, "learning_rate": 4.077699066309545e-07, "loss": 8.4866, "step": 197530 }, { "epoch": 0.986491547853879, "grad_norm": 0.09302527457475662, "learning_rate": 4.06267991689404e-07, "loss": 8.4956, "step": 197540 }, { "epoch": 0.9865414866788185, "grad_norm": 0.08830315619707108, "learning_rate": 4.0476607674785354e-07, "loss": 8.4984, "step": 197550 }, { "epoch": 0.9865914255037579, "grad_norm": 0.085964635014534, "learning_rate": 4.0326416180630303e-07, "loss": 8.4988, "step": 197560 }, { "epoch": 0.9866413643286973, "grad_norm": 0.0878470242023468, "learning_rate": 4.0176224686475257e-07, "loss": 8.4891, "step": 197570 }, { "epoch": 0.9866913031536368, "grad_norm": 0.09524169564247131, "learning_rate": 4.002603319232021e-07, "loss": 8.4738, "step": 197580 }, { "epoch": 0.9867412419785763, "grad_norm": 0.09403923898935318, "learning_rate": 3.987584169816516e-07, "loss": 8.4943, "step": 197590 }, { "epoch": 0.9867911808035157, "grad_norm": 0.09067419916391373, "learning_rate": 3.9725650204010115e-07, "loss": 8.5038, "step": 197600 }, { "epoch": 0.9868411196284551, "grad_norm": 0.09345284849405289, "learning_rate": 3.957545870985507e-07, "loss": 8.4813, "step": 197610 }, { "epoch": 0.9868910584533946, "grad_norm": 0.0898512527346611, "learning_rate": 3.942526721570002e-07, "loss": 8.4934, "step": 197620 }, { "epoch": 0.9869409972783341, "grad_norm": 0.08990932255983353, "learning_rate": 3.927507572154497e-07, "loss": 8.5102, "step": 197630 }, { "epoch": 0.9869909361032735, "grad_norm": 0.08922252804040909, "learning_rate": 3.9124884227389927e-07, "loss": 8.5005, "step": 197640 }, { "epoch": 0.9870408749282129, "grad_norm": 0.09017761051654816, "learning_rate": 3.8974692733234876e-07, "loss": 8.4891, "step": 197650 }, { "epoch": 0.9870908137531524, "grad_norm": 0.09690330922603607, "learning_rate": 3.882450123907983e-07, "loss": 8.519, "step": 197660 }, { "epoch": 0.9871407525780919, "grad_norm": 0.08771155774593353, "learning_rate": 3.867430974492478e-07, "loss": 8.5076, "step": 197670 }, { "epoch": 0.9871906914030313, "grad_norm": 0.09097769856452942, "learning_rate": 3.8524118250769733e-07, "loss": 8.4906, "step": 197680 }, { "epoch": 0.9872406302279707, "grad_norm": 0.0901932418346405, "learning_rate": 3.837392675661469e-07, "loss": 8.4956, "step": 197690 }, { "epoch": 0.9872905690529102, "grad_norm": 0.08922629803419113, "learning_rate": 3.8223735262459637e-07, "loss": 8.5213, "step": 197700 }, { "epoch": 0.9873405078778497, "grad_norm": 0.09865950047969818, "learning_rate": 3.807354376830459e-07, "loss": 8.5128, "step": 197710 }, { "epoch": 0.9873904467027891, "grad_norm": 0.0916372612118721, "learning_rate": 3.7923352274149545e-07, "loss": 8.5038, "step": 197720 }, { "epoch": 0.9874403855277285, "grad_norm": 0.09816200286149979, "learning_rate": 3.7773160779994494e-07, "loss": 8.483, "step": 197730 }, { "epoch": 0.987490324352668, "grad_norm": 0.0914330706000328, "learning_rate": 3.762296928583945e-07, "loss": 8.5011, "step": 197740 }, { "epoch": 0.9875402631776075, "grad_norm": 0.09769126027822495, "learning_rate": 3.74727777916844e-07, "loss": 8.5084, "step": 197750 }, { "epoch": 0.9875902020025469, "grad_norm": 0.09237746149301529, "learning_rate": 3.732258629752935e-07, "loss": 8.4931, "step": 197760 }, { "epoch": 0.9876401408274863, "grad_norm": 0.09044523537158966, "learning_rate": 3.7172394803374306e-07, "loss": 8.5058, "step": 197770 }, { "epoch": 0.9876900796524257, "grad_norm": 0.09292086958885193, "learning_rate": 3.7022203309219255e-07, "loss": 8.486, "step": 197780 }, { "epoch": 0.9877400184773653, "grad_norm": 0.08766113966703415, "learning_rate": 3.687201181506421e-07, "loss": 8.5007, "step": 197790 }, { "epoch": 0.9877899573023047, "grad_norm": 0.09624643623828888, "learning_rate": 3.6721820320909164e-07, "loss": 8.4815, "step": 197800 }, { "epoch": 0.9878398961272441, "grad_norm": 0.0919470340013504, "learning_rate": 3.6571628826754113e-07, "loss": 8.5096, "step": 197810 }, { "epoch": 0.9878898349521835, "grad_norm": 0.09844647347927094, "learning_rate": 3.6421437332599067e-07, "loss": 8.4995, "step": 197820 }, { "epoch": 0.987939773777123, "grad_norm": 0.0885077640414238, "learning_rate": 3.6271245838444016e-07, "loss": 8.4987, "step": 197830 }, { "epoch": 0.9879897126020625, "grad_norm": 0.09276360273361206, "learning_rate": 3.612105434428897e-07, "loss": 8.496, "step": 197840 }, { "epoch": 0.9880396514270019, "grad_norm": 0.09369642287492752, "learning_rate": 3.5970862850133925e-07, "loss": 8.4979, "step": 197850 }, { "epoch": 0.9880895902519413, "grad_norm": 0.08917860686779022, "learning_rate": 3.5820671355978874e-07, "loss": 8.4985, "step": 197860 }, { "epoch": 0.9881395290768809, "grad_norm": 0.09895788133144379, "learning_rate": 3.567047986182383e-07, "loss": 8.5084, "step": 197870 }, { "epoch": 0.9881894679018203, "grad_norm": 0.09372813254594803, "learning_rate": 3.552028836766878e-07, "loss": 8.4956, "step": 197880 }, { "epoch": 0.9882394067267597, "grad_norm": 0.08584040403366089, "learning_rate": 3.537009687351373e-07, "loss": 8.4953, "step": 197890 }, { "epoch": 0.9882893455516991, "grad_norm": 0.09182565659284592, "learning_rate": 3.5219905379358686e-07, "loss": 8.4967, "step": 197900 }, { "epoch": 0.9883392843766386, "grad_norm": 0.09269486367702484, "learning_rate": 3.506971388520364e-07, "loss": 8.486, "step": 197910 }, { "epoch": 0.9883892232015781, "grad_norm": 0.09027913957834244, "learning_rate": 3.491952239104859e-07, "loss": 8.4925, "step": 197920 }, { "epoch": 0.9884391620265175, "grad_norm": 0.09345810860395432, "learning_rate": 3.4769330896893544e-07, "loss": 8.5197, "step": 197930 }, { "epoch": 0.9884891008514569, "grad_norm": 0.09321021288633347, "learning_rate": 3.461913940273849e-07, "loss": 8.5137, "step": 197940 }, { "epoch": 0.9885390396763964, "grad_norm": 0.09090397506952286, "learning_rate": 3.4468947908583447e-07, "loss": 8.4701, "step": 197950 }, { "epoch": 0.9885889785013359, "grad_norm": 0.08423801511526108, "learning_rate": 3.43187564144284e-07, "loss": 8.4907, "step": 197960 }, { "epoch": 0.9886389173262753, "grad_norm": 0.08930182456970215, "learning_rate": 3.416856492027335e-07, "loss": 8.506, "step": 197970 }, { "epoch": 0.9886888561512147, "grad_norm": 0.09574810415506363, "learning_rate": 3.4018373426118304e-07, "loss": 8.4963, "step": 197980 }, { "epoch": 0.9887387949761542, "grad_norm": 0.09295687824487686, "learning_rate": 3.386818193196326e-07, "loss": 8.4867, "step": 197990 }, { "epoch": 0.9887887338010937, "grad_norm": 0.09014062583446503, "learning_rate": 3.371799043780821e-07, "loss": 8.4981, "step": 198000 }, { "epoch": 0.9888386726260331, "grad_norm": 0.08868231624364853, "learning_rate": 3.356779894365316e-07, "loss": 8.4993, "step": 198010 }, { "epoch": 0.9888886114509725, "grad_norm": 0.09391395002603531, "learning_rate": 3.341760744949811e-07, "loss": 8.4724, "step": 198020 }, { "epoch": 0.988938550275912, "grad_norm": 0.08955026417970657, "learning_rate": 3.3267415955343065e-07, "loss": 8.5013, "step": 198030 }, { "epoch": 0.9889884891008515, "grad_norm": 0.09359218180179596, "learning_rate": 3.311722446118802e-07, "loss": 8.4972, "step": 198040 }, { "epoch": 0.9890384279257909, "grad_norm": 0.09413956850767136, "learning_rate": 3.296703296703297e-07, "loss": 8.5147, "step": 198050 }, { "epoch": 0.9890883667507303, "grad_norm": 0.09088245779275894, "learning_rate": 3.281684147287792e-07, "loss": 8.4853, "step": 198060 }, { "epoch": 0.9891383055756698, "grad_norm": 0.09208165854215622, "learning_rate": 3.266664997872287e-07, "loss": 8.4913, "step": 198070 }, { "epoch": 0.9891882444006093, "grad_norm": 0.09409578889608383, "learning_rate": 3.251645848456782e-07, "loss": 8.4958, "step": 198080 }, { "epoch": 0.9892381832255487, "grad_norm": 0.08956706523895264, "learning_rate": 3.2366266990412775e-07, "loss": 8.4896, "step": 198090 }, { "epoch": 0.9892881220504881, "grad_norm": 0.09481275826692581, "learning_rate": 3.2216075496257724e-07, "loss": 8.4854, "step": 198100 }, { "epoch": 0.9893380608754276, "grad_norm": 0.09185443073511124, "learning_rate": 3.206588400210268e-07, "loss": 8.5019, "step": 198110 }, { "epoch": 0.9893879997003671, "grad_norm": 0.09097184240818024, "learning_rate": 3.1915692507947633e-07, "loss": 8.5186, "step": 198120 }, { "epoch": 0.9894379385253065, "grad_norm": 0.0905596986413002, "learning_rate": 3.176550101379258e-07, "loss": 8.5126, "step": 198130 }, { "epoch": 0.9894878773502459, "grad_norm": 0.09233243763446808, "learning_rate": 3.1615309519637536e-07, "loss": 8.4988, "step": 198140 }, { "epoch": 0.9895378161751854, "grad_norm": 0.0961671769618988, "learning_rate": 3.146511802548249e-07, "loss": 8.4913, "step": 198150 }, { "epoch": 0.9895877550001249, "grad_norm": 0.09645285457372665, "learning_rate": 3.131492653132744e-07, "loss": 8.4986, "step": 198160 }, { "epoch": 0.9896376938250643, "grad_norm": 0.09721177816390991, "learning_rate": 3.1164735037172394e-07, "loss": 8.5021, "step": 198170 }, { "epoch": 0.9896876326500037, "grad_norm": 0.09604041278362274, "learning_rate": 3.101454354301735e-07, "loss": 8.5088, "step": 198180 }, { "epoch": 0.9897375714749432, "grad_norm": 0.09055788815021515, "learning_rate": 3.0864352048862297e-07, "loss": 8.4992, "step": 198190 }, { "epoch": 0.9897875102998827, "grad_norm": 0.08688069880008698, "learning_rate": 3.071416055470725e-07, "loss": 8.5091, "step": 198200 }, { "epoch": 0.9898374491248221, "grad_norm": 0.0961107388138771, "learning_rate": 3.05639690605522e-07, "loss": 8.5026, "step": 198210 }, { "epoch": 0.9898873879497615, "grad_norm": 0.0864131972193718, "learning_rate": 3.0413777566397155e-07, "loss": 8.5183, "step": 198220 }, { "epoch": 0.989937326774701, "grad_norm": 0.09623812884092331, "learning_rate": 3.026358607224211e-07, "loss": 8.5025, "step": 198230 }, { "epoch": 0.9899872655996405, "grad_norm": 0.0971948653459549, "learning_rate": 3.011339457808706e-07, "loss": 8.4985, "step": 198240 }, { "epoch": 0.9900372044245799, "grad_norm": 0.09427471458911896, "learning_rate": 2.996320308393201e-07, "loss": 8.4906, "step": 198250 }, { "epoch": 0.9900871432495193, "grad_norm": 0.09101586788892746, "learning_rate": 2.9813011589776967e-07, "loss": 8.5086, "step": 198260 }, { "epoch": 0.9901370820744588, "grad_norm": 0.09006375819444656, "learning_rate": 2.9662820095621916e-07, "loss": 8.493, "step": 198270 }, { "epoch": 0.9901870208993983, "grad_norm": 0.08943824470043182, "learning_rate": 2.951262860146687e-07, "loss": 8.5086, "step": 198280 }, { "epoch": 0.9902369597243377, "grad_norm": 0.08912531286478043, "learning_rate": 2.936243710731182e-07, "loss": 8.4813, "step": 198290 }, { "epoch": 0.9902868985492771, "grad_norm": 0.10092119127511978, "learning_rate": 2.9212245613156774e-07, "loss": 8.4847, "step": 198300 }, { "epoch": 0.9903368373742166, "grad_norm": 0.093541719019413, "learning_rate": 2.906205411900173e-07, "loss": 8.5023, "step": 198310 }, { "epoch": 0.990386776199156, "grad_norm": 0.09948543459177017, "learning_rate": 2.8911862624846677e-07, "loss": 8.4979, "step": 198320 }, { "epoch": 0.9904367150240955, "grad_norm": 0.0884837880730629, "learning_rate": 2.876167113069163e-07, "loss": 8.5008, "step": 198330 }, { "epoch": 0.9904866538490349, "grad_norm": 0.08703433722257614, "learning_rate": 2.8611479636536585e-07, "loss": 8.4779, "step": 198340 }, { "epoch": 0.9905365926739744, "grad_norm": 0.09166325628757477, "learning_rate": 2.8461288142381534e-07, "loss": 8.4882, "step": 198350 }, { "epoch": 0.9905865314989138, "grad_norm": 0.09063871204853058, "learning_rate": 2.831109664822649e-07, "loss": 8.5057, "step": 198360 }, { "epoch": 0.9906364703238533, "grad_norm": 0.08705553412437439, "learning_rate": 2.816090515407144e-07, "loss": 8.5068, "step": 198370 }, { "epoch": 0.9906864091487927, "grad_norm": 0.09029226005077362, "learning_rate": 2.801071365991639e-07, "loss": 8.4944, "step": 198380 }, { "epoch": 0.9907363479737322, "grad_norm": 0.09434723854064941, "learning_rate": 2.7860522165761346e-07, "loss": 8.4833, "step": 198390 }, { "epoch": 0.9907862867986716, "grad_norm": 0.0964699313044548, "learning_rate": 2.7710330671606295e-07, "loss": 8.4942, "step": 198400 }, { "epoch": 0.9908362256236111, "grad_norm": 0.08891110122203827, "learning_rate": 2.756013917745125e-07, "loss": 8.4942, "step": 198410 }, { "epoch": 0.9908861644485505, "grad_norm": 0.09020981937646866, "learning_rate": 2.7409947683296204e-07, "loss": 8.5053, "step": 198420 }, { "epoch": 0.99093610327349, "grad_norm": 0.09375789016485214, "learning_rate": 2.7259756189141153e-07, "loss": 8.503, "step": 198430 }, { "epoch": 0.9909860420984294, "grad_norm": 0.08944463729858398, "learning_rate": 2.7109564694986107e-07, "loss": 8.5068, "step": 198440 }, { "epoch": 0.9910359809233689, "grad_norm": 0.0924299880862236, "learning_rate": 2.695937320083106e-07, "loss": 8.4966, "step": 198450 }, { "epoch": 0.9910859197483083, "grad_norm": 0.09989719092845917, "learning_rate": 2.680918170667601e-07, "loss": 8.4982, "step": 198460 }, { "epoch": 0.9911358585732478, "grad_norm": 0.09110445529222488, "learning_rate": 2.6658990212520965e-07, "loss": 8.4971, "step": 198470 }, { "epoch": 0.9911857973981872, "grad_norm": 0.09000138938426971, "learning_rate": 2.6508798718365914e-07, "loss": 8.496, "step": 198480 }, { "epoch": 0.9912357362231267, "grad_norm": 0.09816454350948334, "learning_rate": 2.635860722421087e-07, "loss": 8.4862, "step": 198490 }, { "epoch": 0.9912856750480661, "grad_norm": 0.09222198277711868, "learning_rate": 2.620841573005582e-07, "loss": 8.495, "step": 198500 }, { "epoch": 0.9913356138730056, "grad_norm": 0.09800387173891068, "learning_rate": 2.605822423590077e-07, "loss": 8.5051, "step": 198510 }, { "epoch": 0.991385552697945, "grad_norm": 0.08874485641717911, "learning_rate": 2.5908032741745726e-07, "loss": 8.4951, "step": 198520 }, { "epoch": 0.9914354915228845, "grad_norm": 0.08928778022527695, "learning_rate": 2.575784124759068e-07, "loss": 8.4947, "step": 198530 }, { "epoch": 0.9914854303478239, "grad_norm": 0.08784019201993942, "learning_rate": 2.560764975343563e-07, "loss": 8.4991, "step": 198540 }, { "epoch": 0.9915353691727634, "grad_norm": 0.09007895737886429, "learning_rate": 2.5457458259280584e-07, "loss": 8.4845, "step": 198550 }, { "epoch": 0.9915853079977028, "grad_norm": 0.08894022554159164, "learning_rate": 2.530726676512553e-07, "loss": 8.5006, "step": 198560 }, { "epoch": 0.9916352468226423, "grad_norm": 0.08828302472829819, "learning_rate": 2.5157075270970487e-07, "loss": 8.52, "step": 198570 }, { "epoch": 0.9916851856475817, "grad_norm": 0.0931435376405716, "learning_rate": 2.500688377681544e-07, "loss": 8.5057, "step": 198580 }, { "epoch": 0.9917351244725212, "grad_norm": 0.09200121462345123, "learning_rate": 2.485669228266039e-07, "loss": 8.4947, "step": 198590 }, { "epoch": 0.9917850632974606, "grad_norm": 0.08936648070812225, "learning_rate": 2.4706500788505345e-07, "loss": 8.4976, "step": 198600 }, { "epoch": 0.9918350021224001, "grad_norm": 0.08992834389209747, "learning_rate": 2.45563092943503e-07, "loss": 8.5161, "step": 198610 }, { "epoch": 0.9918849409473395, "grad_norm": 0.0911891758441925, "learning_rate": 2.440611780019525e-07, "loss": 8.4884, "step": 198620 }, { "epoch": 0.991934879772279, "grad_norm": 0.08730587363243103, "learning_rate": 2.42559263060402e-07, "loss": 8.513, "step": 198630 }, { "epoch": 0.9919848185972184, "grad_norm": 0.09357264637947083, "learning_rate": 2.410573481188515e-07, "loss": 8.5056, "step": 198640 }, { "epoch": 0.9920347574221579, "grad_norm": 0.08683697134256363, "learning_rate": 2.3955543317730105e-07, "loss": 8.4803, "step": 198650 }, { "epoch": 0.9920846962470973, "grad_norm": 0.08779203146696091, "learning_rate": 2.3805351823575057e-07, "loss": 8.4917, "step": 198660 }, { "epoch": 0.9921346350720368, "grad_norm": 0.096683070063591, "learning_rate": 2.3655160329420011e-07, "loss": 8.5142, "step": 198670 }, { "epoch": 0.9921845738969762, "grad_norm": 0.08591101318597794, "learning_rate": 2.3504968835264963e-07, "loss": 8.5099, "step": 198680 }, { "epoch": 0.9922345127219157, "grad_norm": 0.08974206447601318, "learning_rate": 2.3354777341109915e-07, "loss": 8.5142, "step": 198690 }, { "epoch": 0.9922844515468551, "grad_norm": 0.09247114509344101, "learning_rate": 2.3204585846954866e-07, "loss": 8.4848, "step": 198700 }, { "epoch": 0.9923343903717946, "grad_norm": 0.08844613283872604, "learning_rate": 2.305439435279982e-07, "loss": 8.4997, "step": 198710 }, { "epoch": 0.992384329196734, "grad_norm": 0.08982079476118088, "learning_rate": 2.2904202858644772e-07, "loss": 8.4906, "step": 198720 }, { "epoch": 0.9924342680216734, "grad_norm": 0.08924318850040436, "learning_rate": 2.2754011364489724e-07, "loss": 8.4932, "step": 198730 }, { "epoch": 0.9924842068466129, "grad_norm": 0.08730906248092651, "learning_rate": 2.2603819870334678e-07, "loss": 8.5035, "step": 198740 }, { "epoch": 0.9925341456715524, "grad_norm": 0.08822734653949738, "learning_rate": 2.245362837617963e-07, "loss": 8.5039, "step": 198750 }, { "epoch": 0.9925840844964918, "grad_norm": 0.09212057292461395, "learning_rate": 2.2303436882024582e-07, "loss": 8.4992, "step": 198760 }, { "epoch": 0.9926340233214312, "grad_norm": 0.08733692765235901, "learning_rate": 2.2153245387869533e-07, "loss": 8.479, "step": 198770 }, { "epoch": 0.9926839621463707, "grad_norm": 0.08462866395711899, "learning_rate": 2.2003053893714488e-07, "loss": 8.4978, "step": 198780 }, { "epoch": 0.9927339009713101, "grad_norm": 0.09213852882385254, "learning_rate": 2.185286239955944e-07, "loss": 8.5001, "step": 198790 }, { "epoch": 0.9927838397962496, "grad_norm": 0.08344164490699768, "learning_rate": 2.170267090540439e-07, "loss": 8.5045, "step": 198800 }, { "epoch": 0.992833778621189, "grad_norm": 0.08867906033992767, "learning_rate": 2.1552479411249343e-07, "loss": 8.5079, "step": 198810 }, { "epoch": 0.9928837174461285, "grad_norm": 0.08968056738376617, "learning_rate": 2.1402287917094297e-07, "loss": 8.5275, "step": 198820 }, { "epoch": 0.9929336562710679, "grad_norm": 0.0994110107421875, "learning_rate": 2.1252096422939249e-07, "loss": 8.506, "step": 198830 }, { "epoch": 0.9929835950960074, "grad_norm": 0.09683678299188614, "learning_rate": 2.11019049287842e-07, "loss": 8.5073, "step": 198840 }, { "epoch": 0.9930335339209468, "grad_norm": 0.0983363687992096, "learning_rate": 2.0951713434629152e-07, "loss": 8.4861, "step": 198850 }, { "epoch": 0.9930834727458863, "grad_norm": 0.09034904837608337, "learning_rate": 2.0801521940474106e-07, "loss": 8.4945, "step": 198860 }, { "epoch": 0.9931334115708257, "grad_norm": 0.09096697717905045, "learning_rate": 2.0651330446319058e-07, "loss": 8.5022, "step": 198870 }, { "epoch": 0.9931833503957652, "grad_norm": 0.085932657122612, "learning_rate": 2.050113895216401e-07, "loss": 8.5177, "step": 198880 }, { "epoch": 0.9932332892207046, "grad_norm": 0.08621320128440857, "learning_rate": 2.035094745800896e-07, "loss": 8.4877, "step": 198890 }, { "epoch": 0.9932832280456441, "grad_norm": 0.0914321318268776, "learning_rate": 2.0200755963853916e-07, "loss": 8.4897, "step": 198900 }, { "epoch": 0.9933331668705835, "grad_norm": 0.08765504509210587, "learning_rate": 2.0050564469698867e-07, "loss": 8.502, "step": 198910 }, { "epoch": 0.993383105695523, "grad_norm": 0.09285347908735275, "learning_rate": 1.990037297554382e-07, "loss": 8.4892, "step": 198920 }, { "epoch": 0.9934330445204624, "grad_norm": 0.08891350775957108, "learning_rate": 1.975018148138877e-07, "loss": 8.5031, "step": 198930 }, { "epoch": 0.9934829833454019, "grad_norm": 0.09927456080913544, "learning_rate": 1.9599989987233725e-07, "loss": 8.5036, "step": 198940 }, { "epoch": 0.9935329221703413, "grad_norm": 0.08990452438592911, "learning_rate": 1.9449798493078676e-07, "loss": 8.4938, "step": 198950 }, { "epoch": 0.9935828609952808, "grad_norm": 0.08888816833496094, "learning_rate": 1.9299606998923628e-07, "loss": 8.4857, "step": 198960 }, { "epoch": 0.9936327998202202, "grad_norm": 0.09310969710350037, "learning_rate": 1.914941550476858e-07, "loss": 8.4864, "step": 198970 }, { "epoch": 0.9936827386451597, "grad_norm": 0.08977384865283966, "learning_rate": 1.8999224010613534e-07, "loss": 8.4948, "step": 198980 }, { "epoch": 0.9937326774700991, "grad_norm": 0.08601712435483932, "learning_rate": 1.8849032516458486e-07, "loss": 8.5055, "step": 198990 }, { "epoch": 0.9937826162950386, "grad_norm": 0.08776349574327469, "learning_rate": 1.8698841022303437e-07, "loss": 8.4916, "step": 199000 }, { "epoch": 0.993832555119978, "grad_norm": 0.09913208335638046, "learning_rate": 1.8548649528148392e-07, "loss": 8.4838, "step": 199010 }, { "epoch": 0.9938824939449175, "grad_norm": 0.09497472643852234, "learning_rate": 1.8398458033993343e-07, "loss": 8.4997, "step": 199020 }, { "epoch": 0.9939324327698569, "grad_norm": 0.08809848874807358, "learning_rate": 1.8248266539838295e-07, "loss": 8.4952, "step": 199030 }, { "epoch": 0.9939823715947964, "grad_norm": 0.09473247826099396, "learning_rate": 1.8098075045683247e-07, "loss": 8.4957, "step": 199040 }, { "epoch": 0.9940323104197358, "grad_norm": 0.08685030788183212, "learning_rate": 1.79478835515282e-07, "loss": 8.5072, "step": 199050 }, { "epoch": 0.9940822492446753, "grad_norm": 0.08791281282901764, "learning_rate": 1.7797692057373153e-07, "loss": 8.5036, "step": 199060 }, { "epoch": 0.9941321880696147, "grad_norm": 0.09338774532079697, "learning_rate": 1.7647500563218104e-07, "loss": 8.4922, "step": 199070 }, { "epoch": 0.9941821268945542, "grad_norm": 0.09999926388263702, "learning_rate": 1.7497309069063056e-07, "loss": 8.5116, "step": 199080 }, { "epoch": 0.9942320657194936, "grad_norm": 0.09071855247020721, "learning_rate": 1.734711757490801e-07, "loss": 8.5158, "step": 199090 }, { "epoch": 0.994282004544433, "grad_norm": 0.0905730351805687, "learning_rate": 1.7196926080752962e-07, "loss": 8.4812, "step": 199100 }, { "epoch": 0.9943319433693725, "grad_norm": 0.08825069665908813, "learning_rate": 1.7046734586597914e-07, "loss": 8.5107, "step": 199110 }, { "epoch": 0.994381882194312, "grad_norm": 0.08565869927406311, "learning_rate": 1.6896543092442865e-07, "loss": 8.5244, "step": 199120 }, { "epoch": 0.9944318210192514, "grad_norm": 0.09292873740196228, "learning_rate": 1.674635159828782e-07, "loss": 8.501, "step": 199130 }, { "epoch": 0.9944817598441908, "grad_norm": 0.09264521300792694, "learning_rate": 1.659616010413277e-07, "loss": 8.5132, "step": 199140 }, { "epoch": 0.9945316986691303, "grad_norm": 0.09055811911821365, "learning_rate": 1.6445968609977723e-07, "loss": 8.502, "step": 199150 }, { "epoch": 0.9945816374940698, "grad_norm": 0.09365162253379822, "learning_rate": 1.6295777115822672e-07, "loss": 8.5061, "step": 199160 }, { "epoch": 0.9946315763190092, "grad_norm": 0.08964534848928452, "learning_rate": 1.6145585621667626e-07, "loss": 8.4963, "step": 199170 }, { "epoch": 0.9946815151439486, "grad_norm": 0.08972986787557602, "learning_rate": 1.5995394127512578e-07, "loss": 8.501, "step": 199180 }, { "epoch": 0.9947314539688881, "grad_norm": 0.09291598945856094, "learning_rate": 1.584520263335753e-07, "loss": 8.5065, "step": 199190 }, { "epoch": 0.9947813927938276, "grad_norm": 0.09010519832372665, "learning_rate": 1.569501113920248e-07, "loss": 8.5005, "step": 199200 }, { "epoch": 0.994831331618767, "grad_norm": 0.08901604264974594, "learning_rate": 1.5544819645047436e-07, "loss": 8.4986, "step": 199210 }, { "epoch": 0.9948812704437064, "grad_norm": 0.09446748346090317, "learning_rate": 1.5394628150892387e-07, "loss": 8.491, "step": 199220 }, { "epoch": 0.9949312092686459, "grad_norm": 0.08962590992450714, "learning_rate": 1.524443665673734e-07, "loss": 8.499, "step": 199230 }, { "epoch": 0.9949811480935854, "grad_norm": 0.0894179418683052, "learning_rate": 1.509424516258229e-07, "loss": 8.5048, "step": 199240 }, { "epoch": 0.9950310869185248, "grad_norm": 0.0883219912648201, "learning_rate": 1.4944053668427245e-07, "loss": 8.5145, "step": 199250 }, { "epoch": 0.9950810257434642, "grad_norm": 0.09172813594341278, "learning_rate": 1.4793862174272197e-07, "loss": 8.5009, "step": 199260 }, { "epoch": 0.9951309645684037, "grad_norm": 0.09289974719285965, "learning_rate": 1.4643670680117148e-07, "loss": 8.5079, "step": 199270 }, { "epoch": 0.9951809033933432, "grad_norm": 0.09185164421796799, "learning_rate": 1.4493479185962102e-07, "loss": 8.4842, "step": 199280 }, { "epoch": 0.9952308422182826, "grad_norm": 0.089945949614048, "learning_rate": 1.4343287691807054e-07, "loss": 8.4803, "step": 199290 }, { "epoch": 0.995280781043222, "grad_norm": 0.09140554815530777, "learning_rate": 1.4193096197652006e-07, "loss": 8.5122, "step": 199300 }, { "epoch": 0.9953307198681615, "grad_norm": 0.0931864008307457, "learning_rate": 1.4042904703496957e-07, "loss": 8.4922, "step": 199310 }, { "epoch": 0.995380658693101, "grad_norm": 0.08751117438077927, "learning_rate": 1.3892713209341912e-07, "loss": 8.4979, "step": 199320 }, { "epoch": 0.9954305975180404, "grad_norm": 0.08914308249950409, "learning_rate": 1.3742521715186863e-07, "loss": 8.4794, "step": 199330 }, { "epoch": 0.9954805363429798, "grad_norm": 0.09185323864221573, "learning_rate": 1.3592330221031815e-07, "loss": 8.4852, "step": 199340 }, { "epoch": 0.9955304751679193, "grad_norm": 0.0920838937163353, "learning_rate": 1.3442138726876767e-07, "loss": 8.4852, "step": 199350 }, { "epoch": 0.9955804139928588, "grad_norm": 0.08701695501804352, "learning_rate": 1.329194723272172e-07, "loss": 8.4987, "step": 199360 }, { "epoch": 0.9956303528177982, "grad_norm": 0.09538272023200989, "learning_rate": 1.3141755738566673e-07, "loss": 8.4892, "step": 199370 }, { "epoch": 0.9956802916427376, "grad_norm": 0.08852861821651459, "learning_rate": 1.2991564244411624e-07, "loss": 8.5061, "step": 199380 }, { "epoch": 0.9957302304676771, "grad_norm": 0.09177656471729279, "learning_rate": 1.2841372750256576e-07, "loss": 8.4837, "step": 199390 }, { "epoch": 0.9957801692926166, "grad_norm": 0.09195193648338318, "learning_rate": 1.269118125610153e-07, "loss": 8.4985, "step": 199400 }, { "epoch": 0.995830108117556, "grad_norm": 0.09105399250984192, "learning_rate": 1.2540989761946482e-07, "loss": 8.5016, "step": 199410 }, { "epoch": 0.9958800469424954, "grad_norm": 0.09002146124839783, "learning_rate": 1.2390798267791434e-07, "loss": 8.4919, "step": 199420 }, { "epoch": 0.9959299857674349, "grad_norm": 0.09366229176521301, "learning_rate": 1.2240606773636385e-07, "loss": 8.5062, "step": 199430 }, { "epoch": 0.9959799245923744, "grad_norm": 0.09170850366353989, "learning_rate": 1.209041527948134e-07, "loss": 8.4988, "step": 199440 }, { "epoch": 0.9960298634173138, "grad_norm": 0.0911903902888298, "learning_rate": 1.194022378532629e-07, "loss": 8.4892, "step": 199450 }, { "epoch": 0.9960798022422532, "grad_norm": 0.09550126641988754, "learning_rate": 1.1790032291171243e-07, "loss": 8.4933, "step": 199460 }, { "epoch": 0.9961297410671927, "grad_norm": 0.08738602697849274, "learning_rate": 1.1639840797016196e-07, "loss": 8.4957, "step": 199470 }, { "epoch": 0.9961796798921322, "grad_norm": 0.09028680622577667, "learning_rate": 1.1489649302861148e-07, "loss": 8.5054, "step": 199480 }, { "epoch": 0.9962296187170716, "grad_norm": 0.08849211782217026, "learning_rate": 1.13394578087061e-07, "loss": 8.4965, "step": 199490 }, { "epoch": 0.996279557542011, "grad_norm": 0.09421442449092865, "learning_rate": 1.1189266314551052e-07, "loss": 8.5073, "step": 199500 }, { "epoch": 0.9963294963669505, "grad_norm": 0.09024062752723694, "learning_rate": 1.1039074820396005e-07, "loss": 8.5054, "step": 199510 }, { "epoch": 0.99637943519189, "grad_norm": 0.09820150583982468, "learning_rate": 1.0888883326240957e-07, "loss": 8.4772, "step": 199520 }, { "epoch": 0.9964293740168294, "grad_norm": 0.10558462142944336, "learning_rate": 1.073869183208591e-07, "loss": 8.495, "step": 199530 }, { "epoch": 0.9964793128417688, "grad_norm": 0.09310225397348404, "learning_rate": 1.0588500337930862e-07, "loss": 8.4867, "step": 199540 }, { "epoch": 0.9965292516667082, "grad_norm": 0.09150728583335876, "learning_rate": 1.0438308843775815e-07, "loss": 8.4969, "step": 199550 }, { "epoch": 0.9965791904916478, "grad_norm": 0.09338292479515076, "learning_rate": 1.0288117349620768e-07, "loss": 8.4907, "step": 199560 }, { "epoch": 0.9966291293165872, "grad_norm": 0.09587588161230087, "learning_rate": 1.0137925855465719e-07, "loss": 8.4943, "step": 199570 }, { "epoch": 0.9966790681415266, "grad_norm": 0.09336939454078674, "learning_rate": 9.987734361310672e-08, "loss": 8.5067, "step": 199580 }, { "epoch": 0.996729006966466, "grad_norm": 0.08930207043886185, "learning_rate": 9.837542867155624e-08, "loss": 8.5041, "step": 199590 }, { "epoch": 0.9967789457914056, "grad_norm": 0.08806464821100235, "learning_rate": 9.687351373000577e-08, "loss": 8.5115, "step": 199600 }, { "epoch": 0.996828884616345, "grad_norm": 0.09173772484064102, "learning_rate": 9.537159878845528e-08, "loss": 8.5085, "step": 199610 }, { "epoch": 0.9968788234412844, "grad_norm": 0.08926473557949066, "learning_rate": 9.386968384690481e-08, "loss": 8.5104, "step": 199620 }, { "epoch": 0.9969287622662238, "grad_norm": 0.09517572820186615, "learning_rate": 9.236776890535433e-08, "loss": 8.5092, "step": 199630 }, { "epoch": 0.9969787010911634, "grad_norm": 0.08836136758327484, "learning_rate": 9.086585396380386e-08, "loss": 8.5023, "step": 199640 }, { "epoch": 0.9970286399161028, "grad_norm": 0.09061846137046814, "learning_rate": 8.936393902225338e-08, "loss": 8.5222, "step": 199650 }, { "epoch": 0.9970785787410422, "grad_norm": 0.09391947835683823, "learning_rate": 8.786202408070291e-08, "loss": 8.5009, "step": 199660 }, { "epoch": 0.9971285175659816, "grad_norm": 0.09239717572927475, "learning_rate": 8.636010913915242e-08, "loss": 8.4959, "step": 199670 }, { "epoch": 0.9971784563909212, "grad_norm": 0.09715589135885239, "learning_rate": 8.485819419760195e-08, "loss": 8.5019, "step": 199680 }, { "epoch": 0.9972283952158606, "grad_norm": 0.09166194498538971, "learning_rate": 8.335627925605147e-08, "loss": 8.4841, "step": 199690 }, { "epoch": 0.9972783340408, "grad_norm": 0.0937894880771637, "learning_rate": 8.185436431450099e-08, "loss": 8.5135, "step": 199700 }, { "epoch": 0.9973282728657394, "grad_norm": 0.08937085419893265, "learning_rate": 8.03524493729505e-08, "loss": 8.5079, "step": 199710 }, { "epoch": 0.997378211690679, "grad_norm": 0.09397994726896286, "learning_rate": 7.885053443140003e-08, "loss": 8.4847, "step": 199720 }, { "epoch": 0.9974281505156184, "grad_norm": 0.09294033795595169, "learning_rate": 7.734861948984955e-08, "loss": 8.498, "step": 199730 }, { "epoch": 0.9974780893405578, "grad_norm": 0.08595110476016998, "learning_rate": 7.584670454829908e-08, "loss": 8.4964, "step": 199740 }, { "epoch": 0.9975280281654972, "grad_norm": 0.09340130537748337, "learning_rate": 7.43447896067486e-08, "loss": 8.4942, "step": 199750 }, { "epoch": 0.9975779669904367, "grad_norm": 0.08756370097398758, "learning_rate": 7.284287466519813e-08, "loss": 8.5186, "step": 199760 }, { "epoch": 0.9976279058153762, "grad_norm": 0.09118250012397766, "learning_rate": 7.134095972364764e-08, "loss": 8.4982, "step": 199770 }, { "epoch": 0.9976778446403156, "grad_norm": 0.09316319227218628, "learning_rate": 6.983904478209717e-08, "loss": 8.491, "step": 199780 }, { "epoch": 0.997727783465255, "grad_norm": 0.08617562055587769, "learning_rate": 6.833712984054669e-08, "loss": 8.4977, "step": 199790 }, { "epoch": 0.9977777222901945, "grad_norm": 0.09523026645183563, "learning_rate": 6.683521489899622e-08, "loss": 8.5039, "step": 199800 }, { "epoch": 0.997827661115134, "grad_norm": 0.09021805971860886, "learning_rate": 6.533329995744574e-08, "loss": 8.4985, "step": 199810 }, { "epoch": 0.9978775999400734, "grad_norm": 0.08965896815061569, "learning_rate": 6.383138501589527e-08, "loss": 8.4904, "step": 199820 }, { "epoch": 0.9979275387650128, "grad_norm": 0.09753076732158661, "learning_rate": 6.23294700743448e-08, "loss": 8.5033, "step": 199830 }, { "epoch": 0.9979774775899523, "grad_norm": 0.0896683931350708, "learning_rate": 6.082755513279431e-08, "loss": 8.503, "step": 199840 }, { "epoch": 0.9980274164148918, "grad_norm": 0.09268392622470856, "learning_rate": 5.9325640191243836e-08, "loss": 8.4949, "step": 199850 }, { "epoch": 0.9980773552398312, "grad_norm": 0.08605291694402695, "learning_rate": 5.782372524969336e-08, "loss": 8.4886, "step": 199860 }, { "epoch": 0.9981272940647706, "grad_norm": 0.09285443276166916, "learning_rate": 5.632181030814288e-08, "loss": 8.4869, "step": 199870 }, { "epoch": 0.99817723288971, "grad_norm": 0.08672482520341873, "learning_rate": 5.4819895366592405e-08, "loss": 8.501, "step": 199880 }, { "epoch": 0.9982271717146496, "grad_norm": 0.0957999974489212, "learning_rate": 5.331798042504193e-08, "loss": 8.5033, "step": 199890 }, { "epoch": 0.998277110539589, "grad_norm": 0.08614014834165573, "learning_rate": 5.181606548349145e-08, "loss": 8.5082, "step": 199900 }, { "epoch": 0.9983270493645284, "grad_norm": 0.09075914323329926, "learning_rate": 5.0314150541940975e-08, "loss": 8.4991, "step": 199910 }, { "epoch": 0.9983769881894679, "grad_norm": 0.09307090193033218, "learning_rate": 4.88122356003905e-08, "loss": 8.474, "step": 199920 }, { "epoch": 0.9984269270144074, "grad_norm": 0.09387535601854324, "learning_rate": 4.731032065884002e-08, "loss": 8.4924, "step": 199930 }, { "epoch": 0.9984768658393468, "grad_norm": 0.08672790229320526, "learning_rate": 4.5808405717289545e-08, "loss": 8.5078, "step": 199940 }, { "epoch": 0.9985268046642862, "grad_norm": 0.09689149260520935, "learning_rate": 4.430649077573907e-08, "loss": 8.5156, "step": 199950 }, { "epoch": 0.9985767434892256, "grad_norm": 0.08997116982936859, "learning_rate": 4.280457583418859e-08, "loss": 8.5033, "step": 199960 }, { "epoch": 0.9986266823141652, "grad_norm": 0.09200499206781387, "learning_rate": 4.130266089263812e-08, "loss": 8.499, "step": 199970 }, { "epoch": 0.9986766211391046, "grad_norm": 0.09014206379652023, "learning_rate": 3.980074595108764e-08, "loss": 8.4947, "step": 199980 }, { "epoch": 0.998726559964044, "grad_norm": 0.09259384125471115, "learning_rate": 3.829883100953716e-08, "loss": 8.4803, "step": 199990 }, { "epoch": 0.9987764987889834, "grad_norm": 0.09015609323978424, "learning_rate": 3.6796916067986684e-08, "loss": 8.4907, "step": 200000 }, { "epoch": 0.998826437613923, "grad_norm": 0.08941512554883957, "learning_rate": 3.529500112643621e-08, "loss": 8.5041, "step": 200010 }, { "epoch": 0.9988763764388624, "grad_norm": 0.09428403526544571, "learning_rate": 3.379308618488573e-08, "loss": 8.5041, "step": 200020 }, { "epoch": 0.9989263152638018, "grad_norm": 0.0898728296160698, "learning_rate": 3.2291171243335254e-08, "loss": 8.5173, "step": 200030 }, { "epoch": 0.9989762540887412, "grad_norm": 0.08989530801773071, "learning_rate": 3.078925630178478e-08, "loss": 8.5081, "step": 200040 }, { "epoch": 0.9990261929136808, "grad_norm": 0.09039604663848877, "learning_rate": 2.92873413602343e-08, "loss": 8.5074, "step": 200050 }, { "epoch": 0.9990761317386202, "grad_norm": 0.08606907725334167, "learning_rate": 2.7785426418683824e-08, "loss": 8.5027, "step": 200060 }, { "epoch": 0.9991260705635596, "grad_norm": 0.09312465786933899, "learning_rate": 2.6283511477133347e-08, "loss": 8.499, "step": 200070 }, { "epoch": 0.999176009388499, "grad_norm": 0.09412333369255066, "learning_rate": 2.478159653558287e-08, "loss": 8.4968, "step": 200080 }, { "epoch": 0.9992259482134386, "grad_norm": 0.09396173804998398, "learning_rate": 2.3279681594032393e-08, "loss": 8.4779, "step": 200090 }, { "epoch": 0.999275887038378, "grad_norm": 0.08685121685266495, "learning_rate": 2.1777766652481916e-08, "loss": 8.501, "step": 200100 }, { "epoch": 0.9993258258633174, "grad_norm": 0.091061532497406, "learning_rate": 2.0275851710931436e-08, "loss": 8.5031, "step": 200110 }, { "epoch": 0.9993757646882568, "grad_norm": 0.08946952223777771, "learning_rate": 1.877393676938096e-08, "loss": 8.5099, "step": 200120 }, { "epoch": 0.9994257035131964, "grad_norm": 0.09155212342739105, "learning_rate": 1.7272021827830483e-08, "loss": 8.4807, "step": 200130 }, { "epoch": 0.9994756423381358, "grad_norm": 0.09216777235269547, "learning_rate": 1.5770106886280006e-08, "loss": 8.5083, "step": 200140 }, { "epoch": 0.9995255811630752, "grad_norm": 0.08744364976882935, "learning_rate": 1.4268191944729531e-08, "loss": 8.4995, "step": 200150 }, { "epoch": 0.9995755199880146, "grad_norm": 0.09221003204584122, "learning_rate": 1.2766277003179054e-08, "loss": 8.493, "step": 200160 }, { "epoch": 0.9996254588129542, "grad_norm": 0.09411770850419998, "learning_rate": 1.1264362061628577e-08, "loss": 8.4741, "step": 200170 }, { "epoch": 0.9996753976378936, "grad_norm": 0.09217695146799088, "learning_rate": 9.762447120078099e-09, "loss": 8.4893, "step": 200180 }, { "epoch": 0.999725336462833, "grad_norm": 0.0921444296836853, "learning_rate": 8.260532178527622e-09, "loss": 8.4998, "step": 200190 }, { "epoch": 0.9997752752877724, "grad_norm": 0.0900796502828598, "learning_rate": 6.758617236977146e-09, "loss": 8.4963, "step": 200200 }, { "epoch": 0.999825214112712, "grad_norm": 0.08964385092258453, "learning_rate": 5.2567022954266695e-09, "loss": 8.4971, "step": 200210 }, { "epoch": 0.9998751529376514, "grad_norm": 0.08609479665756226, "learning_rate": 3.754787353876192e-09, "loss": 8.4869, "step": 200220 }, { "epoch": 0.9999250917625908, "grad_norm": 0.08690956234931946, "learning_rate": 2.252872412325715e-09, "loss": 8.4845, "step": 200230 }, { "epoch": 0.9999750305875302, "grad_norm": 0.09233241528272629, "learning_rate": 7.509574707752385e-10, "loss": 8.4693, "step": 200240 } ], "logging_steps": 10, "max_steps": 200245, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.875112543817114e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }