{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6658956296218413, "eval_steps": 500, "global_step": 190000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00021028283040689727, "grad_norm": 2.9733738899230957, "learning_rate": 4.99907125083237e-05, "loss": 9.4089, "step": 60 }, { "epoch": 0.00042056566081379455, "grad_norm": 2.3245558738708496, "learning_rate": 4.9980198366803356e-05, "loss": 3.6104, "step": 120 }, { "epoch": 0.0006308484912206919, "grad_norm": 2.0401523113250732, "learning_rate": 4.996968422528301e-05, "loss": 3.3119, "step": 180 }, { "epoch": 0.0008411313216275891, "grad_norm": 1.8371763229370117, "learning_rate": 4.995917008376266e-05, "loss": 3.2129, "step": 240 }, { "epoch": 0.0010514141520344864, "grad_norm": 1.6748260259628296, "learning_rate": 4.994865594224232e-05, "loss": 3.1313, "step": 300 }, { "epoch": 0.0012616969824413838, "grad_norm": 1.8345626592636108, "learning_rate": 4.993814180072197e-05, "loss": 3.1001, "step": 360 }, { "epoch": 0.0014719798128482809, "grad_norm": 1.54319429397583, "learning_rate": 4.992762765920163e-05, "loss": 3.0693, "step": 420 }, { "epoch": 0.0016822626432551782, "grad_norm": 1.4675054550170898, "learning_rate": 4.991711351768129e-05, "loss": 3.0448, "step": 480 }, { "epoch": 0.0018925454736620755, "grad_norm": 1.5000808238983154, "learning_rate": 4.990659937616094e-05, "loss": 3.0369, "step": 540 }, { "epoch": 0.002102828304068973, "grad_norm": 1.3771166801452637, "learning_rate": 4.989608523464059e-05, "loss": 3.021, "step": 600 }, { "epoch": 0.00231311113447587, "grad_norm": 1.3988029956817627, "learning_rate": 4.988557109312025e-05, "loss": 3.0092, "step": 660 }, { "epoch": 0.0025233939648827675, "grad_norm": 1.3383244276046753, "learning_rate": 4.98750569515999e-05, "loss": 2.9906, "step": 720 }, { "epoch": 0.0027336767952896644, "grad_norm": 1.3521662950515747, "learning_rate": 4.986454281007956e-05, "loss": 2.9699, "step": 780 }, { "epoch": 0.0029439596256965617, "grad_norm": 1.287575125694275, "learning_rate": 4.985402866855921e-05, "loss": 2.9622, "step": 840 }, { "epoch": 0.003154242456103459, "grad_norm": 1.2298351526260376, "learning_rate": 4.984351452703887e-05, "loss": 2.9578, "step": 900 }, { "epoch": 0.0033645252865103564, "grad_norm": 1.3640484809875488, "learning_rate": 4.9833000385518524e-05, "loss": 2.9551, "step": 960 }, { "epoch": 0.0035748081169172537, "grad_norm": 1.273837685585022, "learning_rate": 4.9822486243998175e-05, "loss": 2.9444, "step": 1020 }, { "epoch": 0.003785090947324151, "grad_norm": 1.2357532978057861, "learning_rate": 4.9811972102477834e-05, "loss": 2.9326, "step": 1080 }, { "epoch": 0.003995373777731048, "grad_norm": 1.211605429649353, "learning_rate": 4.980145796095749e-05, "loss": 2.923, "step": 1140 }, { "epoch": 0.004205656608137946, "grad_norm": 1.1577990055084229, "learning_rate": 4.9790943819437145e-05, "loss": 2.9242, "step": 1200 }, { "epoch": 0.004415939438544843, "grad_norm": 1.2026646137237549, "learning_rate": 4.97804296779168e-05, "loss": 2.9175, "step": 1260 }, { "epoch": 0.00462622226895174, "grad_norm": 1.2141482830047607, "learning_rate": 4.9769915536396455e-05, "loss": 2.9319, "step": 1320 }, { "epoch": 0.004836505099358637, "grad_norm": 1.1758204698562622, "learning_rate": 4.975940139487611e-05, "loss": 2.9047, "step": 1380 }, { "epoch": 0.005046787929765535, "grad_norm": 1.2033660411834717, "learning_rate": 4.9748887253355766e-05, "loss": 2.9102, "step": 1440 }, { "epoch": 0.005257070760172432, "grad_norm": 1.2934941053390503, "learning_rate": 4.9738373111835424e-05, "loss": 2.9022, "step": 1500 }, { "epoch": 0.005467353590579329, "grad_norm": 1.1089513301849365, "learning_rate": 4.9727858970315076e-05, "loss": 2.8968, "step": 1560 }, { "epoch": 0.005677636420986227, "grad_norm": 1.081939697265625, "learning_rate": 4.9717344828794735e-05, "loss": 2.8894, "step": 1620 }, { "epoch": 0.0058879192513931235, "grad_norm": 1.0430926084518433, "learning_rate": 4.970683068727439e-05, "loss": 2.8958, "step": 1680 }, { "epoch": 0.006098202081800021, "grad_norm": 1.118507742881775, "learning_rate": 4.969631654575404e-05, "loss": 2.8837, "step": 1740 }, { "epoch": 0.006308484912206918, "grad_norm": 1.163057804107666, "learning_rate": 4.96858024042337e-05, "loss": 2.8754, "step": 1800 }, { "epoch": 0.006518767742613816, "grad_norm": 1.0965882539749146, "learning_rate": 4.9675288262713356e-05, "loss": 2.8799, "step": 1860 }, { "epoch": 0.006729050573020713, "grad_norm": 1.0341143608093262, "learning_rate": 4.966477412119301e-05, "loss": 2.8794, "step": 1920 }, { "epoch": 0.0069393334034276105, "grad_norm": 0.9922598004341125, "learning_rate": 4.965425997967266e-05, "loss": 2.8769, "step": 1980 }, { "epoch": 0.0071496162338345074, "grad_norm": 1.0550235509872437, "learning_rate": 4.964374583815232e-05, "loss": 2.8557, "step": 2040 }, { "epoch": 0.007359899064241404, "grad_norm": 1.0422568321228027, "learning_rate": 4.963323169663197e-05, "loss": 2.8654, "step": 2100 }, { "epoch": 0.007570181894648302, "grad_norm": 1.0454896688461304, "learning_rate": 4.962271755511162e-05, "loss": 2.8542, "step": 2160 }, { "epoch": 0.007780464725055199, "grad_norm": 1.0227279663085938, "learning_rate": 4.961220341359129e-05, "loss": 2.8552, "step": 2220 }, { "epoch": 0.007990747555462096, "grad_norm": 1.0329540967941284, "learning_rate": 4.960168927207094e-05, "loss": 2.8527, "step": 2280 }, { "epoch": 0.008201030385868995, "grad_norm": 1.0209529399871826, "learning_rate": 4.959117513055059e-05, "loss": 2.8433, "step": 2340 }, { "epoch": 0.008411313216275891, "grad_norm": 1.0325701236724854, "learning_rate": 4.958066098903025e-05, "loss": 2.842, "step": 2400 }, { "epoch": 0.008621596046682788, "grad_norm": 0.9857275485992432, "learning_rate": 4.95701468475099e-05, "loss": 2.8324, "step": 2460 }, { "epoch": 0.008831878877089685, "grad_norm": 1.027936339378357, "learning_rate": 4.9559632705989554e-05, "loss": 2.8347, "step": 2520 }, { "epoch": 0.009042161707496582, "grad_norm": 1.045196771621704, "learning_rate": 4.954911856446921e-05, "loss": 2.8279, "step": 2580 }, { "epoch": 0.00925244453790348, "grad_norm": 1.0517592430114746, "learning_rate": 4.953860442294887e-05, "loss": 2.82, "step": 2640 }, { "epoch": 0.009462727368310378, "grad_norm": 1.0048534870147705, "learning_rate": 4.952809028142852e-05, "loss": 2.814, "step": 2700 }, { "epoch": 0.009673010198717275, "grad_norm": 1.0240980386734009, "learning_rate": 4.9517576139908175e-05, "loss": 2.7985, "step": 2760 }, { "epoch": 0.009883293029124171, "grad_norm": 1.019655704498291, "learning_rate": 4.9507061998387834e-05, "loss": 2.7932, "step": 2820 }, { "epoch": 0.01009357585953107, "grad_norm": 1.0587692260742188, "learning_rate": 4.9496547856867486e-05, "loss": 2.7943, "step": 2880 }, { "epoch": 0.010303858689937967, "grad_norm": 1.0488303899765015, "learning_rate": 4.9486033715347144e-05, "loss": 2.7792, "step": 2940 }, { "epoch": 0.010514141520344864, "grad_norm": 1.0349452495574951, "learning_rate": 4.94755195738268e-05, "loss": 2.7717, "step": 3000 }, { "epoch": 0.01072442435075176, "grad_norm": 1.0631664991378784, "learning_rate": 4.9465005432306455e-05, "loss": 2.7696, "step": 3060 }, { "epoch": 0.010934707181158658, "grad_norm": 1.026247501373291, "learning_rate": 4.945449129078611e-05, "loss": 2.7586, "step": 3120 }, { "epoch": 0.011144990011565556, "grad_norm": 1.049814224243164, "learning_rate": 4.9443977149265766e-05, "loss": 2.746, "step": 3180 }, { "epoch": 0.011355272841972453, "grad_norm": 1.0707862377166748, "learning_rate": 4.943346300774542e-05, "loss": 2.7169, "step": 3240 }, { "epoch": 0.01156555567237935, "grad_norm": 1.092517614364624, "learning_rate": 4.9422948866225076e-05, "loss": 2.719, "step": 3300 }, { "epoch": 0.011775838502786247, "grad_norm": 0.9983085989952087, "learning_rate": 4.9412434724704735e-05, "loss": 2.7114, "step": 3360 }, { "epoch": 0.011986121333193146, "grad_norm": 1.0133140087127686, "learning_rate": 4.940192058318439e-05, "loss": 2.6965, "step": 3420 }, { "epoch": 0.012196404163600042, "grad_norm": 1.1006369590759277, "learning_rate": 4.939140644166404e-05, "loss": 2.7053, "step": 3480 }, { "epoch": 0.01240668699400694, "grad_norm": 1.0474873781204224, "learning_rate": 4.93808923001437e-05, "loss": 2.6907, "step": 3540 }, { "epoch": 0.012616969824413836, "grad_norm": 1.033618450164795, "learning_rate": 4.937037815862335e-05, "loss": 2.6892, "step": 3600 }, { "epoch": 0.012827252654820733, "grad_norm": 1.0913538932800293, "learning_rate": 4.935986401710301e-05, "loss": 2.6782, "step": 3660 }, { "epoch": 0.013037535485227632, "grad_norm": 1.0223588943481445, "learning_rate": 4.934934987558266e-05, "loss": 2.6607, "step": 3720 }, { "epoch": 0.013247818315634529, "grad_norm": 1.0294113159179688, "learning_rate": 4.933883573406232e-05, "loss": 2.66, "step": 3780 }, { "epoch": 0.013458101146041426, "grad_norm": 0.9733858704566956, "learning_rate": 4.932832159254197e-05, "loss": 2.6583, "step": 3840 }, { "epoch": 0.013668383976448322, "grad_norm": 1.1031690835952759, "learning_rate": 4.931780745102162e-05, "loss": 2.6736, "step": 3900 }, { "epoch": 0.013878666806855221, "grad_norm": 1.0241620540618896, "learning_rate": 4.930729330950128e-05, "loss": 2.652, "step": 3960 }, { "epoch": 0.014088949637262118, "grad_norm": 1.017519235610962, "learning_rate": 4.929677916798094e-05, "loss": 2.6346, "step": 4020 }, { "epoch": 0.014299232467669015, "grad_norm": 1.0698292255401611, "learning_rate": 4.928626502646059e-05, "loss": 2.6272, "step": 4080 }, { "epoch": 0.014509515298075912, "grad_norm": 1.0114396810531616, "learning_rate": 4.927575088494025e-05, "loss": 2.6419, "step": 4140 }, { "epoch": 0.014719798128482809, "grad_norm": 1.0519788265228271, "learning_rate": 4.92652367434199e-05, "loss": 2.6147, "step": 4200 }, { "epoch": 0.014930080958889707, "grad_norm": 0.952059268951416, "learning_rate": 4.9254722601899554e-05, "loss": 2.6179, "step": 4260 }, { "epoch": 0.015140363789296604, "grad_norm": 0.9879328012466431, "learning_rate": 4.924420846037921e-05, "loss": 2.6115, "step": 4320 }, { "epoch": 0.015350646619703501, "grad_norm": 1.000038504600525, "learning_rate": 4.923369431885887e-05, "loss": 2.6155, "step": 4380 }, { "epoch": 0.015560929450110398, "grad_norm": 1.020724892616272, "learning_rate": 4.922318017733852e-05, "loss": 2.5966, "step": 4440 }, { "epoch": 0.015771212280517297, "grad_norm": 1.0393227338790894, "learning_rate": 4.9212666035818175e-05, "loss": 2.6104, "step": 4500 }, { "epoch": 0.015981495110924192, "grad_norm": 0.9846431612968445, "learning_rate": 4.9202151894297834e-05, "loss": 2.5907, "step": 4560 }, { "epoch": 0.01619177794133109, "grad_norm": 1.0070515871047974, "learning_rate": 4.9191637752777485e-05, "loss": 2.5962, "step": 4620 }, { "epoch": 0.01640206077173799, "grad_norm": 0.9761502742767334, "learning_rate": 4.918112361125714e-05, "loss": 2.6027, "step": 4680 }, { "epoch": 0.016612343602144884, "grad_norm": 0.9863872528076172, "learning_rate": 4.91706094697368e-05, "loss": 2.5745, "step": 4740 }, { "epoch": 0.016822626432551783, "grad_norm": 0.9633762240409851, "learning_rate": 4.9160095328216455e-05, "loss": 2.5901, "step": 4800 }, { "epoch": 0.017032909262958678, "grad_norm": 1.013360619544983, "learning_rate": 4.9149581186696107e-05, "loss": 2.57, "step": 4860 }, { "epoch": 0.017243192093365577, "grad_norm": 0.9540980458259583, "learning_rate": 4.9139067045175765e-05, "loss": 2.5718, "step": 4920 }, { "epoch": 0.017453474923772475, "grad_norm": 0.937667965888977, "learning_rate": 4.912855290365542e-05, "loss": 2.5899, "step": 4980 }, { "epoch": 0.01766375775417937, "grad_norm": 0.9429223537445068, "learning_rate": 4.911803876213507e-05, "loss": 2.591, "step": 5040 }, { "epoch": 0.01787404058458627, "grad_norm": 0.9466859102249146, "learning_rate": 4.910752462061473e-05, "loss": 2.5675, "step": 5100 }, { "epoch": 0.018084323414993164, "grad_norm": 0.9759580492973328, "learning_rate": 4.9097010479094386e-05, "loss": 2.5778, "step": 5160 }, { "epoch": 0.018294606245400063, "grad_norm": 0.974449634552002, "learning_rate": 4.908649633757404e-05, "loss": 2.5657, "step": 5220 }, { "epoch": 0.01850488907580696, "grad_norm": 0.9969838857650757, "learning_rate": 4.90759821960537e-05, "loss": 2.5451, "step": 5280 }, { "epoch": 0.018715171906213857, "grad_norm": 0.8910425901412964, "learning_rate": 4.906546805453335e-05, "loss": 2.5686, "step": 5340 }, { "epoch": 0.018925454736620755, "grad_norm": 0.964389979839325, "learning_rate": 4.905495391301301e-05, "loss": 2.5562, "step": 5400 }, { "epoch": 0.019135737567027654, "grad_norm": 0.9532023072242737, "learning_rate": 4.904443977149266e-05, "loss": 2.5487, "step": 5460 }, { "epoch": 0.01934602039743455, "grad_norm": 0.8950223326683044, "learning_rate": 4.903392562997232e-05, "loss": 2.5552, "step": 5520 }, { "epoch": 0.019556303227841448, "grad_norm": 0.97803795337677, "learning_rate": 4.902341148845197e-05, "loss": 2.5639, "step": 5580 }, { "epoch": 0.019766586058248343, "grad_norm": 0.9063622951507568, "learning_rate": 4.901289734693162e-05, "loss": 2.5484, "step": 5640 }, { "epoch": 0.01997686888865524, "grad_norm": 1.014898419380188, "learning_rate": 4.900238320541128e-05, "loss": 2.5646, "step": 5700 }, { "epoch": 0.02018715171906214, "grad_norm": 0.938632607460022, "learning_rate": 4.899186906389094e-05, "loss": 2.5457, "step": 5760 }, { "epoch": 0.020397434549469035, "grad_norm": 0.9069220423698425, "learning_rate": 4.898135492237059e-05, "loss": 2.5633, "step": 5820 }, { "epoch": 0.020607717379875934, "grad_norm": 0.8817732334136963, "learning_rate": 4.897084078085025e-05, "loss": 2.5471, "step": 5880 }, { "epoch": 0.02081800021028283, "grad_norm": 1.003666639328003, "learning_rate": 4.89603266393299e-05, "loss": 2.5588, "step": 5940 }, { "epoch": 0.021028283040689728, "grad_norm": 0.9436509609222412, "learning_rate": 4.8949812497809553e-05, "loss": 2.536, "step": 6000 }, { "epoch": 0.021238565871096626, "grad_norm": 0.9367396235466003, "learning_rate": 4.893929835628921e-05, "loss": 2.5427, "step": 6060 }, { "epoch": 0.02144884870150352, "grad_norm": 0.8808217644691467, "learning_rate": 4.892878421476887e-05, "loss": 2.5447, "step": 6120 }, { "epoch": 0.02165913153191042, "grad_norm": 0.9140152335166931, "learning_rate": 4.891827007324852e-05, "loss": 2.5298, "step": 6180 }, { "epoch": 0.021869414362317315, "grad_norm": 0.9449228644371033, "learning_rate": 4.8907755931728175e-05, "loss": 2.5421, "step": 6240 }, { "epoch": 0.022079697192724214, "grad_norm": 0.9297705292701721, "learning_rate": 4.889724179020783e-05, "loss": 2.5394, "step": 6300 }, { "epoch": 0.022289980023131113, "grad_norm": 0.8961461186408997, "learning_rate": 4.8886727648687485e-05, "loss": 2.5296, "step": 6360 }, { "epoch": 0.022500262853538008, "grad_norm": 0.9122869372367859, "learning_rate": 4.887621350716714e-05, "loss": 2.5292, "step": 6420 }, { "epoch": 0.022710545683944906, "grad_norm": 0.8803234100341797, "learning_rate": 4.88656993656468e-05, "loss": 2.5429, "step": 6480 }, { "epoch": 0.0229208285143518, "grad_norm": 0.9121540784835815, "learning_rate": 4.8855185224126454e-05, "loss": 2.5127, "step": 6540 }, { "epoch": 0.0231311113447587, "grad_norm": 0.9358422756195068, "learning_rate": 4.8844671082606106e-05, "loss": 2.535, "step": 6600 }, { "epoch": 0.0233413941751656, "grad_norm": 0.8811025023460388, "learning_rate": 4.8834156941085765e-05, "loss": 2.5332, "step": 6660 }, { "epoch": 0.023551677005572494, "grad_norm": 0.9080128073692322, "learning_rate": 4.882364279956542e-05, "loss": 2.5034, "step": 6720 }, { "epoch": 0.023761959835979393, "grad_norm": 0.8822489380836487, "learning_rate": 4.881312865804507e-05, "loss": 2.5028, "step": 6780 }, { "epoch": 0.02397224266638629, "grad_norm": 0.9486945271492004, "learning_rate": 4.880261451652473e-05, "loss": 2.519, "step": 6840 }, { "epoch": 0.024182525496793186, "grad_norm": 0.8917908072471619, "learning_rate": 4.8792100375004386e-05, "loss": 2.5171, "step": 6900 }, { "epoch": 0.024392808327200085, "grad_norm": 0.9071120023727417, "learning_rate": 4.878158623348404e-05, "loss": 2.5382, "step": 6960 }, { "epoch": 0.02460309115760698, "grad_norm": 0.8386140465736389, "learning_rate": 4.877107209196369e-05, "loss": 2.5238, "step": 7020 }, { "epoch": 0.02481337398801388, "grad_norm": 0.857929527759552, "learning_rate": 4.876055795044335e-05, "loss": 2.5112, "step": 7080 }, { "epoch": 0.025023656818420777, "grad_norm": 0.9252939820289612, "learning_rate": 4.8750043808923e-05, "loss": 2.5164, "step": 7140 }, { "epoch": 0.025233939648827673, "grad_norm": 0.8985560536384583, "learning_rate": 4.873952966740266e-05, "loss": 2.5206, "step": 7200 }, { "epoch": 0.02544422247923457, "grad_norm": 0.8806078433990479, "learning_rate": 4.872901552588232e-05, "loss": 2.5276, "step": 7260 }, { "epoch": 0.025654505309641466, "grad_norm": 0.8562397956848145, "learning_rate": 4.871850138436197e-05, "loss": 2.5144, "step": 7320 }, { "epoch": 0.025864788140048365, "grad_norm": 0.8808496594429016, "learning_rate": 4.870798724284162e-05, "loss": 2.5116, "step": 7380 }, { "epoch": 0.026075070970455264, "grad_norm": 0.8252414464950562, "learning_rate": 4.869747310132128e-05, "loss": 2.5146, "step": 7440 }, { "epoch": 0.02628535380086216, "grad_norm": 0.9716061949729919, "learning_rate": 4.868695895980093e-05, "loss": 2.5224, "step": 7500 }, { "epoch": 0.026495636631269057, "grad_norm": 0.9421200752258301, "learning_rate": 4.867644481828059e-05, "loss": 2.5087, "step": 7560 }, { "epoch": 0.026705919461675953, "grad_norm": 0.8816734552383423, "learning_rate": 4.866593067676025e-05, "loss": 2.5218, "step": 7620 }, { "epoch": 0.02691620229208285, "grad_norm": 0.8930853605270386, "learning_rate": 4.86554165352399e-05, "loss": 2.5142, "step": 7680 }, { "epoch": 0.02712648512248975, "grad_norm": 0.8809117078781128, "learning_rate": 4.864490239371955e-05, "loss": 2.5103, "step": 7740 }, { "epoch": 0.027336767952896645, "grad_norm": 0.8628544807434082, "learning_rate": 4.863438825219921e-05, "loss": 2.5115, "step": 7800 }, { "epoch": 0.027547050783303544, "grad_norm": 0.8847160339355469, "learning_rate": 4.8623874110678864e-05, "loss": 2.5095, "step": 7860 }, { "epoch": 0.027757333613710442, "grad_norm": 0.888922393321991, "learning_rate": 4.861335996915852e-05, "loss": 2.5135, "step": 7920 }, { "epoch": 0.027967616444117337, "grad_norm": 0.8904682993888855, "learning_rate": 4.8602845827638174e-05, "loss": 2.4958, "step": 7980 }, { "epoch": 0.028177899274524236, "grad_norm": 1.176128625869751, "learning_rate": 4.859233168611783e-05, "loss": 2.5173, "step": 8040 }, { "epoch": 0.02838818210493113, "grad_norm": 0.833441972732544, "learning_rate": 4.8581817544597485e-05, "loss": 2.4879, "step": 8100 }, { "epoch": 0.02859846493533803, "grad_norm": 0.8262945413589478, "learning_rate": 4.857130340307714e-05, "loss": 2.4856, "step": 8160 }, { "epoch": 0.02880874776574493, "grad_norm": 0.9492835402488708, "learning_rate": 4.8560789261556795e-05, "loss": 2.5002, "step": 8220 }, { "epoch": 0.029019030596151824, "grad_norm": 0.8316044211387634, "learning_rate": 4.8550275120036454e-05, "loss": 2.4947, "step": 8280 }, { "epoch": 0.029229313426558722, "grad_norm": 0.828999400138855, "learning_rate": 4.8539760978516106e-05, "loss": 2.4924, "step": 8340 }, { "epoch": 0.029439596256965617, "grad_norm": 0.8634353876113892, "learning_rate": 4.8529246836995765e-05, "loss": 2.5055, "step": 8400 }, { "epoch": 0.029649879087372516, "grad_norm": 0.8393489122390747, "learning_rate": 4.8518732695475417e-05, "loss": 2.4965, "step": 8460 }, { "epoch": 0.029860161917779415, "grad_norm": 0.824374794960022, "learning_rate": 4.850821855395507e-05, "loss": 2.4955, "step": 8520 }, { "epoch": 0.03007044474818631, "grad_norm": 0.8524509072303772, "learning_rate": 4.849770441243473e-05, "loss": 2.4894, "step": 8580 }, { "epoch": 0.03028072757859321, "grad_norm": 0.8297523856163025, "learning_rate": 4.8487190270914386e-05, "loss": 2.4959, "step": 8640 }, { "epoch": 0.030491010409000104, "grad_norm": 0.8216577768325806, "learning_rate": 4.847667612939404e-05, "loss": 2.4985, "step": 8700 }, { "epoch": 0.030701293239407002, "grad_norm": 0.7975873351097107, "learning_rate": 4.846616198787369e-05, "loss": 2.4817, "step": 8760 }, { "epoch": 0.0309115760698139, "grad_norm": 0.8472198843955994, "learning_rate": 4.845564784635335e-05, "loss": 2.4909, "step": 8820 }, { "epoch": 0.031121858900220796, "grad_norm": 0.837291955947876, "learning_rate": 4.8445133704833e-05, "loss": 2.4946, "step": 8880 }, { "epoch": 0.03133214173062769, "grad_norm": 0.8038502931594849, "learning_rate": 4.843461956331265e-05, "loss": 2.4971, "step": 8940 }, { "epoch": 0.03154242456103459, "grad_norm": 0.7669224739074707, "learning_rate": 4.842410542179232e-05, "loss": 2.4905, "step": 9000 }, { "epoch": 0.03175270739144149, "grad_norm": 0.8333732485771179, "learning_rate": 4.841359128027197e-05, "loss": 2.4949, "step": 9060 }, { "epoch": 0.031962990221848384, "grad_norm": 0.9027146697044373, "learning_rate": 4.840307713875162e-05, "loss": 2.4948, "step": 9120 }, { "epoch": 0.032173273052255286, "grad_norm": 0.8094586133956909, "learning_rate": 4.839256299723128e-05, "loss": 2.4733, "step": 9180 }, { "epoch": 0.03238355588266218, "grad_norm": 0.8115667104721069, "learning_rate": 4.838204885571093e-05, "loss": 2.4736, "step": 9240 }, { "epoch": 0.032593838713069076, "grad_norm": 0.8178938031196594, "learning_rate": 4.8371534714190584e-05, "loss": 2.4747, "step": 9300 }, { "epoch": 0.03280412154347598, "grad_norm": 0.8295975923538208, "learning_rate": 4.836102057267025e-05, "loss": 2.4927, "step": 9360 }, { "epoch": 0.03301440437388287, "grad_norm": 0.813774824142456, "learning_rate": 4.83505064311499e-05, "loss": 2.4935, "step": 9420 }, { "epoch": 0.03322468720428977, "grad_norm": 0.8150737881660461, "learning_rate": 4.833999228962955e-05, "loss": 2.4735, "step": 9480 }, { "epoch": 0.033434970034696664, "grad_norm": 0.8141120672225952, "learning_rate": 4.832947814810921e-05, "loss": 2.4798, "step": 9540 }, { "epoch": 0.033645252865103566, "grad_norm": 0.8410353064537048, "learning_rate": 4.8318964006588864e-05, "loss": 2.4706, "step": 9600 }, { "epoch": 0.03385553569551046, "grad_norm": 0.7936863303184509, "learning_rate": 4.8308449865068515e-05, "loss": 2.5007, "step": 9660 }, { "epoch": 0.034065818525917356, "grad_norm": 0.8532397150993347, "learning_rate": 4.8297935723548174e-05, "loss": 2.4819, "step": 9720 }, { "epoch": 0.03427610135632426, "grad_norm": 0.7777956128120422, "learning_rate": 4.828742158202783e-05, "loss": 2.465, "step": 9780 }, { "epoch": 0.03448638418673115, "grad_norm": 0.8209832310676575, "learning_rate": 4.8276907440507485e-05, "loss": 2.4618, "step": 9840 }, { "epoch": 0.03469666701713805, "grad_norm": 0.782342255115509, "learning_rate": 4.8266393298987137e-05, "loss": 2.4844, "step": 9900 }, { "epoch": 0.03490694984754495, "grad_norm": 0.8156614303588867, "learning_rate": 4.8255879157466795e-05, "loss": 2.4842, "step": 9960 }, { "epoch": 0.035117232677951846, "grad_norm": 0.8090491890907288, "learning_rate": 4.824536501594645e-05, "loss": 2.4631, "step": 10020 }, { "epoch": 0.03532751550835874, "grad_norm": 0.7655121684074402, "learning_rate": 4.8234850874426106e-05, "loss": 2.4701, "step": 10080 }, { "epoch": 0.03553779833876564, "grad_norm": 0.8753265738487244, "learning_rate": 4.8224336732905764e-05, "loss": 2.4683, "step": 10140 }, { "epoch": 0.03574808116917254, "grad_norm": 0.7905852794647217, "learning_rate": 4.8213822591385416e-05, "loss": 2.4685, "step": 10200 }, { "epoch": 0.03595836399957943, "grad_norm": 0.7805492281913757, "learning_rate": 4.820330844986507e-05, "loss": 2.4762, "step": 10260 }, { "epoch": 0.03616864682998633, "grad_norm": 0.8051442503929138, "learning_rate": 4.819279430834473e-05, "loss": 2.4542, "step": 10320 }, { "epoch": 0.03637892966039323, "grad_norm": 0.7971628308296204, "learning_rate": 4.818228016682438e-05, "loss": 2.4665, "step": 10380 }, { "epoch": 0.036589212490800126, "grad_norm": 0.8155962228775024, "learning_rate": 4.817176602530404e-05, "loss": 2.4912, "step": 10440 }, { "epoch": 0.03679949532120702, "grad_norm": 0.791641116142273, "learning_rate": 4.816125188378369e-05, "loss": 2.4663, "step": 10500 }, { "epoch": 0.03700977815161392, "grad_norm": 0.8577168583869934, "learning_rate": 4.815073774226335e-05, "loss": 2.4579, "step": 10560 }, { "epoch": 0.03722006098202082, "grad_norm": 0.7734136581420898, "learning_rate": 4.8140223600743e-05, "loss": 2.4546, "step": 10620 }, { "epoch": 0.03743034381242771, "grad_norm": 0.8331671357154846, "learning_rate": 4.812970945922265e-05, "loss": 2.4737, "step": 10680 }, { "epoch": 0.037640626642834615, "grad_norm": 0.791526734828949, "learning_rate": 4.811919531770231e-05, "loss": 2.4678, "step": 10740 }, { "epoch": 0.03785090947324151, "grad_norm": 0.7544797658920288, "learning_rate": 4.810868117618197e-05, "loss": 2.4595, "step": 10800 }, { "epoch": 0.038061192303648406, "grad_norm": 0.8361566066741943, "learning_rate": 4.809816703466162e-05, "loss": 2.4601, "step": 10860 }, { "epoch": 0.03827147513405531, "grad_norm": 0.7700713872909546, "learning_rate": 4.808765289314128e-05, "loss": 2.4651, "step": 10920 }, { "epoch": 0.0384817579644622, "grad_norm": 0.7866743803024292, "learning_rate": 4.807713875162093e-05, "loss": 2.4652, "step": 10980 }, { "epoch": 0.0386920407948691, "grad_norm": 0.7954710721969604, "learning_rate": 4.8066624610100583e-05, "loss": 2.4655, "step": 11040 }, { "epoch": 0.03890232362527599, "grad_norm": 0.8627223372459412, "learning_rate": 4.805611046858024e-05, "loss": 2.4638, "step": 11100 }, { "epoch": 0.039112606455682895, "grad_norm": 0.76466304063797, "learning_rate": 4.80455963270599e-05, "loss": 2.4467, "step": 11160 }, { "epoch": 0.03932288928608979, "grad_norm": 0.8471527099609375, "learning_rate": 4.803508218553955e-05, "loss": 2.4494, "step": 11220 }, { "epoch": 0.039533172116496686, "grad_norm": 0.7519867420196533, "learning_rate": 4.802456804401921e-05, "loss": 2.4583, "step": 11280 }, { "epoch": 0.03974345494690359, "grad_norm": 0.8605870008468628, "learning_rate": 4.801405390249886e-05, "loss": 2.4565, "step": 11340 }, { "epoch": 0.03995373777731048, "grad_norm": 0.7822230458259583, "learning_rate": 4.8003539760978515e-05, "loss": 2.4636, "step": 11400 }, { "epoch": 0.04016402060771738, "grad_norm": 0.7491495609283447, "learning_rate": 4.7993025619458174e-05, "loss": 2.4636, "step": 11460 }, { "epoch": 0.04037430343812428, "grad_norm": 0.81394362449646, "learning_rate": 4.798251147793783e-05, "loss": 2.4613, "step": 11520 }, { "epoch": 0.040584586268531175, "grad_norm": 0.8277611136436462, "learning_rate": 4.7971997336417484e-05, "loss": 2.445, "step": 11580 }, { "epoch": 0.04079486909893807, "grad_norm": 0.7311505675315857, "learning_rate": 4.7961483194897136e-05, "loss": 2.4439, "step": 11640 }, { "epoch": 0.041005151929344966, "grad_norm": 0.7802140712738037, "learning_rate": 4.7950969053376795e-05, "loss": 2.4629, "step": 11700 }, { "epoch": 0.04121543475975187, "grad_norm": 0.7538201212882996, "learning_rate": 4.794045491185645e-05, "loss": 2.4535, "step": 11760 }, { "epoch": 0.04142571759015876, "grad_norm": 0.7967063784599304, "learning_rate": 4.7929940770336105e-05, "loss": 2.4552, "step": 11820 }, { "epoch": 0.04163600042056566, "grad_norm": 0.819740891456604, "learning_rate": 4.7919426628815764e-05, "loss": 2.4601, "step": 11880 }, { "epoch": 0.04184628325097256, "grad_norm": 0.8193987607955933, "learning_rate": 4.7908912487295416e-05, "loss": 2.4774, "step": 11940 }, { "epoch": 0.042056566081379455, "grad_norm": 0.83516526222229, "learning_rate": 4.789839834577507e-05, "loss": 2.4546, "step": 12000 }, { "epoch": 0.04226684891178635, "grad_norm": 0.8305575847625732, "learning_rate": 4.7887884204254727e-05, "loss": 2.4476, "step": 12060 }, { "epoch": 0.04247713174219325, "grad_norm": 0.8162139654159546, "learning_rate": 4.787737006273438e-05, "loss": 2.4527, "step": 12120 }, { "epoch": 0.04268741457260015, "grad_norm": 0.8285430073738098, "learning_rate": 4.786685592121404e-05, "loss": 2.4485, "step": 12180 }, { "epoch": 0.04289769740300704, "grad_norm": 0.9468696713447571, "learning_rate": 4.785634177969369e-05, "loss": 2.4469, "step": 12240 }, { "epoch": 0.043107980233413945, "grad_norm": 0.7479498386383057, "learning_rate": 4.784582763817335e-05, "loss": 2.432, "step": 12300 }, { "epoch": 0.04331826306382084, "grad_norm": 0.7362015843391418, "learning_rate": 4.7835313496653e-05, "loss": 2.4626, "step": 12360 }, { "epoch": 0.043528545894227735, "grad_norm": 0.8287525177001953, "learning_rate": 4.782479935513265e-05, "loss": 2.456, "step": 12420 }, { "epoch": 0.04373882872463463, "grad_norm": 0.7658047080039978, "learning_rate": 4.781428521361231e-05, "loss": 2.4587, "step": 12480 }, { "epoch": 0.04394911155504153, "grad_norm": 0.7921282052993774, "learning_rate": 4.780377107209197e-05, "loss": 2.4617, "step": 12540 }, { "epoch": 0.04415939438544843, "grad_norm": 0.7566713690757751, "learning_rate": 4.779325693057162e-05, "loss": 2.4299, "step": 12600 }, { "epoch": 0.04436967721585532, "grad_norm": 0.7757437825202942, "learning_rate": 4.778274278905128e-05, "loss": 2.4452, "step": 12660 }, { "epoch": 0.044579960046262225, "grad_norm": 0.7873514294624329, "learning_rate": 4.777222864753093e-05, "loss": 2.4432, "step": 12720 }, { "epoch": 0.04479024287666912, "grad_norm": 0.7305336594581604, "learning_rate": 4.776171450601058e-05, "loss": 2.4468, "step": 12780 }, { "epoch": 0.045000525707076015, "grad_norm": 0.7629138231277466, "learning_rate": 4.775120036449024e-05, "loss": 2.4392, "step": 12840 }, { "epoch": 0.04521080853748292, "grad_norm": 0.7269495129585266, "learning_rate": 4.77406862229699e-05, "loss": 2.4201, "step": 12900 }, { "epoch": 0.04542109136788981, "grad_norm": 0.8234984278678894, "learning_rate": 4.773017208144955e-05, "loss": 2.4379, "step": 12960 }, { "epoch": 0.04563137419829671, "grad_norm": 0.7635879516601562, "learning_rate": 4.7719657939929204e-05, "loss": 2.4549, "step": 13020 }, { "epoch": 0.0458416570287036, "grad_norm": 0.8231070637702942, "learning_rate": 4.770914379840886e-05, "loss": 2.434, "step": 13080 }, { "epoch": 0.046051939859110505, "grad_norm": 0.7481459975242615, "learning_rate": 4.7698629656888515e-05, "loss": 2.4546, "step": 13140 }, { "epoch": 0.0462622226895174, "grad_norm": 0.7934229373931885, "learning_rate": 4.7688115515368174e-05, "loss": 2.4424, "step": 13200 }, { "epoch": 0.046472505519924295, "grad_norm": 0.7335939407348633, "learning_rate": 4.767760137384783e-05, "loss": 2.4549, "step": 13260 }, { "epoch": 0.0466827883503312, "grad_norm": 0.8233382105827332, "learning_rate": 4.7667087232327484e-05, "loss": 2.4531, "step": 13320 }, { "epoch": 0.04689307118073809, "grad_norm": 0.8387680053710938, "learning_rate": 4.7656573090807136e-05, "loss": 2.4555, "step": 13380 }, { "epoch": 0.04710335401114499, "grad_norm": 0.7505112886428833, "learning_rate": 4.7646058949286795e-05, "loss": 2.4447, "step": 13440 }, { "epoch": 0.04731363684155189, "grad_norm": 0.7546599507331848, "learning_rate": 4.7635544807766447e-05, "loss": 2.449, "step": 13500 }, { "epoch": 0.047523919671958785, "grad_norm": 0.7598650455474854, "learning_rate": 4.76250306662461e-05, "loss": 2.4502, "step": 13560 }, { "epoch": 0.04773420250236568, "grad_norm": 0.7494813203811646, "learning_rate": 4.7614516524725764e-05, "loss": 2.4391, "step": 13620 }, { "epoch": 0.04794448533277258, "grad_norm": 0.7718535661697388, "learning_rate": 4.7604002383205416e-05, "loss": 2.431, "step": 13680 }, { "epoch": 0.04815476816317948, "grad_norm": 0.6967277526855469, "learning_rate": 4.759348824168507e-05, "loss": 2.4261, "step": 13740 }, { "epoch": 0.04836505099358637, "grad_norm": 0.7197991013526917, "learning_rate": 4.7582974100164726e-05, "loss": 2.4463, "step": 13800 }, { "epoch": 0.04857533382399327, "grad_norm": 0.7507856488227844, "learning_rate": 4.757245995864438e-05, "loss": 2.4337, "step": 13860 }, { "epoch": 0.04878561665440017, "grad_norm": 0.7825746536254883, "learning_rate": 4.756194581712403e-05, "loss": 2.4446, "step": 13920 }, { "epoch": 0.048995899484807065, "grad_norm": 0.7825769782066345, "learning_rate": 4.755143167560369e-05, "loss": 2.441, "step": 13980 }, { "epoch": 0.04920618231521396, "grad_norm": 0.7822974324226379, "learning_rate": 4.754091753408335e-05, "loss": 2.4236, "step": 14040 }, { "epoch": 0.04941646514562086, "grad_norm": 0.7625136375427246, "learning_rate": 4.7530403392563e-05, "loss": 2.4392, "step": 14100 }, { "epoch": 0.04962674797602776, "grad_norm": 0.7651515007019043, "learning_rate": 4.751988925104265e-05, "loss": 2.4476, "step": 14160 }, { "epoch": 0.04983703080643465, "grad_norm": 0.6852614879608154, "learning_rate": 4.750937510952231e-05, "loss": 2.4496, "step": 14220 }, { "epoch": 0.050047313636841555, "grad_norm": 0.7274338006973267, "learning_rate": 4.749886096800196e-05, "loss": 2.4484, "step": 14280 }, { "epoch": 0.05025759646724845, "grad_norm": 0.7408413887023926, "learning_rate": 4.748834682648162e-05, "loss": 2.4413, "step": 14340 }, { "epoch": 0.050467879297655345, "grad_norm": 0.7533018589019775, "learning_rate": 4.747783268496128e-05, "loss": 2.4458, "step": 14400 }, { "epoch": 0.05067816212806225, "grad_norm": 0.7672397494316101, "learning_rate": 4.746731854344093e-05, "loss": 2.4379, "step": 14460 }, { "epoch": 0.05088844495846914, "grad_norm": 0.705220103263855, "learning_rate": 4.745680440192058e-05, "loss": 2.4361, "step": 14520 }, { "epoch": 0.05109872778887604, "grad_norm": 0.7222245335578918, "learning_rate": 4.744629026040024e-05, "loss": 2.4512, "step": 14580 }, { "epoch": 0.05130901061928293, "grad_norm": 0.7020009756088257, "learning_rate": 4.7435776118879893e-05, "loss": 2.4419, "step": 14640 }, { "epoch": 0.051519293449689835, "grad_norm": 0.7522382140159607, "learning_rate": 4.742526197735955e-05, "loss": 2.4284, "step": 14700 }, { "epoch": 0.05172957628009673, "grad_norm": 0.7863227128982544, "learning_rate": 4.7414923071531216e-05, "loss": 2.433, "step": 14760 }, { "epoch": 0.051939859110503625, "grad_norm": 0.7541500329971313, "learning_rate": 4.740440893001087e-05, "loss": 2.4364, "step": 14820 }, { "epoch": 0.05215014194091053, "grad_norm": 0.7301027178764343, "learning_rate": 4.739389478849052e-05, "loss": 2.4339, "step": 14880 }, { "epoch": 0.05236042477131742, "grad_norm": 0.7505426406860352, "learning_rate": 4.738338064697018e-05, "loss": 2.4269, "step": 14940 }, { "epoch": 0.05257070760172432, "grad_norm": 0.7464006543159485, "learning_rate": 4.737286650544983e-05, "loss": 2.4242, "step": 15000 }, { "epoch": 0.05278099043213122, "grad_norm": 0.7655802965164185, "learning_rate": 4.736235236392948e-05, "loss": 2.4396, "step": 15060 }, { "epoch": 0.052991273262538115, "grad_norm": 0.7519718408584595, "learning_rate": 4.735183822240915e-05, "loss": 2.4231, "step": 15120 }, { "epoch": 0.05320155609294501, "grad_norm": 0.8537678718566895, "learning_rate": 4.73413240808888e-05, "loss": 2.4434, "step": 15180 }, { "epoch": 0.053411838923351905, "grad_norm": 0.7247916460037231, "learning_rate": 4.733080993936845e-05, "loss": 2.4366, "step": 15240 }, { "epoch": 0.05362212175375881, "grad_norm": 0.7293427586555481, "learning_rate": 4.732029579784811e-05, "loss": 2.4335, "step": 15300 }, { "epoch": 0.0538324045841657, "grad_norm": 0.7361655831336975, "learning_rate": 4.730978165632776e-05, "loss": 2.4303, "step": 15360 }, { "epoch": 0.0540426874145726, "grad_norm": 0.7800238132476807, "learning_rate": 4.7299267514807414e-05, "loss": 2.4291, "step": 15420 }, { "epoch": 0.0542529702449795, "grad_norm": 0.7535472512245178, "learning_rate": 4.728875337328707e-05, "loss": 2.4263, "step": 15480 }, { "epoch": 0.054463253075386395, "grad_norm": 0.7635684609413147, "learning_rate": 4.727823923176673e-05, "loss": 2.4356, "step": 15540 }, { "epoch": 0.05467353590579329, "grad_norm": 0.7387227416038513, "learning_rate": 4.726772509024638e-05, "loss": 2.4258, "step": 15600 }, { "epoch": 0.05488381873620019, "grad_norm": 0.7634516358375549, "learning_rate": 4.7257210948726035e-05, "loss": 2.4369, "step": 15660 }, { "epoch": 0.05509410156660709, "grad_norm": 0.7039297223091125, "learning_rate": 4.7246696807205694e-05, "loss": 2.4319, "step": 15720 }, { "epoch": 0.05530438439701398, "grad_norm": 0.7102476954460144, "learning_rate": 4.7236182665685346e-05, "loss": 2.4395, "step": 15780 }, { "epoch": 0.055514667227420884, "grad_norm": 0.7064028382301331, "learning_rate": 4.7225668524165004e-05, "loss": 2.4184, "step": 15840 }, { "epoch": 0.05572495005782778, "grad_norm": 0.7453619241714478, "learning_rate": 4.721515438264466e-05, "loss": 2.4262, "step": 15900 }, { "epoch": 0.055935232888234675, "grad_norm": 0.7171881198883057, "learning_rate": 4.7204640241124315e-05, "loss": 2.4381, "step": 15960 }, { "epoch": 0.05614551571864157, "grad_norm": 0.6928536891937256, "learning_rate": 4.719412609960397e-05, "loss": 2.4174, "step": 16020 }, { "epoch": 0.05635579854904847, "grad_norm": 0.7249700427055359, "learning_rate": 4.7183611958083625e-05, "loss": 2.4245, "step": 16080 }, { "epoch": 0.05656608137945537, "grad_norm": 0.7394696474075317, "learning_rate": 4.717309781656328e-05, "loss": 2.4238, "step": 16140 }, { "epoch": 0.05677636420986226, "grad_norm": 0.7761276364326477, "learning_rate": 4.7162583675042936e-05, "loss": 2.4203, "step": 16200 }, { "epoch": 0.056986647040269164, "grad_norm": 0.8082882761955261, "learning_rate": 4.7152069533522595e-05, "loss": 2.4302, "step": 16260 }, { "epoch": 0.05719692987067606, "grad_norm": 0.7704823613166809, "learning_rate": 4.7141555392002247e-05, "loss": 2.4378, "step": 16320 }, { "epoch": 0.057407212701082955, "grad_norm": 0.7104964256286621, "learning_rate": 4.71310412504819e-05, "loss": 2.4272, "step": 16380 }, { "epoch": 0.05761749553148986, "grad_norm": 0.8135157823562622, "learning_rate": 4.712070234465356e-05, "loss": 2.4248, "step": 16440 }, { "epoch": 0.05782777836189675, "grad_norm": 0.7562858462333679, "learning_rate": 4.7110188203133214e-05, "loss": 2.4342, "step": 16500 }, { "epoch": 0.05803806119230365, "grad_norm": 0.7549763917922974, "learning_rate": 4.709967406161287e-05, "loss": 2.4231, "step": 16560 }, { "epoch": 0.05824834402271054, "grad_norm": 0.7127776741981506, "learning_rate": 4.708915992009253e-05, "loss": 2.4152, "step": 16620 }, { "epoch": 0.058458626853117444, "grad_norm": 0.7201217412948608, "learning_rate": 4.7078645778572183e-05, "loss": 2.4197, "step": 16680 }, { "epoch": 0.05866890968352434, "grad_norm": 0.7064502835273743, "learning_rate": 4.7068131637051835e-05, "loss": 2.4165, "step": 16740 }, { "epoch": 0.058879192513931235, "grad_norm": 0.7064214944839478, "learning_rate": 4.7057617495531494e-05, "loss": 2.4084, "step": 16800 }, { "epoch": 0.05908947534433814, "grad_norm": 0.6829055547714233, "learning_rate": 4.7047103354011146e-05, "loss": 2.4121, "step": 16860 }, { "epoch": 0.05929975817474503, "grad_norm": 0.7540745735168457, "learning_rate": 4.70365892124908e-05, "loss": 2.4226, "step": 16920 }, { "epoch": 0.05951004100515193, "grad_norm": 0.7283081412315369, "learning_rate": 4.702607507097046e-05, "loss": 2.4147, "step": 16980 }, { "epoch": 0.05972032383555883, "grad_norm": 0.7194704413414001, "learning_rate": 4.7015560929450115e-05, "loss": 2.4159, "step": 17040 }, { "epoch": 0.059930606665965724, "grad_norm": 0.7521179914474487, "learning_rate": 4.700504678792977e-05, "loss": 2.433, "step": 17100 }, { "epoch": 0.06014088949637262, "grad_norm": 0.7045489549636841, "learning_rate": 4.6994532646409426e-05, "loss": 2.4229, "step": 17160 }, { "epoch": 0.06035117232677952, "grad_norm": 0.7060059309005737, "learning_rate": 4.698401850488908e-05, "loss": 2.4234, "step": 17220 }, { "epoch": 0.06056145515718642, "grad_norm": 0.6854456663131714, "learning_rate": 4.697350436336873e-05, "loss": 2.4157, "step": 17280 }, { "epoch": 0.06077173798759331, "grad_norm": 0.7461974620819092, "learning_rate": 4.696299022184839e-05, "loss": 2.4253, "step": 17340 }, { "epoch": 0.06098202081800021, "grad_norm": 0.7423505783081055, "learning_rate": 4.695247608032805e-05, "loss": 2.411, "step": 17400 }, { "epoch": 0.06119230364840711, "grad_norm": 0.7172077298164368, "learning_rate": 4.69419619388077e-05, "loss": 2.4272, "step": 17460 }, { "epoch": 0.061402586478814004, "grad_norm": 0.7238907217979431, "learning_rate": 4.693144779728735e-05, "loss": 2.4395, "step": 17520 }, { "epoch": 0.0616128693092209, "grad_norm": 0.7185387015342712, "learning_rate": 4.692093365576701e-05, "loss": 2.424, "step": 17580 }, { "epoch": 0.0618231521396278, "grad_norm": 0.6714202165603638, "learning_rate": 4.691041951424666e-05, "loss": 2.396, "step": 17640 }, { "epoch": 0.0620334349700347, "grad_norm": 0.7396364212036133, "learning_rate": 4.689990537272632e-05, "loss": 2.4254, "step": 17700 }, { "epoch": 0.06224371780044159, "grad_norm": 0.6906604170799255, "learning_rate": 4.688939123120598e-05, "loss": 2.4044, "step": 17760 }, { "epoch": 0.062454000630848494, "grad_norm": 0.6801305413246155, "learning_rate": 4.687887708968563e-05, "loss": 2.4268, "step": 17820 }, { "epoch": 0.06266428346125538, "grad_norm": 0.7206404805183411, "learning_rate": 4.686836294816528e-05, "loss": 2.4268, "step": 17880 }, { "epoch": 0.06287456629166228, "grad_norm": 0.6780009865760803, "learning_rate": 4.685784880664494e-05, "loss": 2.4236, "step": 17940 }, { "epoch": 0.06308484912206919, "grad_norm": 0.7205548882484436, "learning_rate": 4.684733466512459e-05, "loss": 2.4329, "step": 18000 }, { "epoch": 0.06329513195247607, "grad_norm": 0.7508593797683716, "learning_rate": 4.683682052360425e-05, "loss": 2.4133, "step": 18060 }, { "epoch": 0.06350541478288298, "grad_norm": 0.6937260031700134, "learning_rate": 4.6826306382083903e-05, "loss": 2.4156, "step": 18120 }, { "epoch": 0.06371569761328988, "grad_norm": 0.7223955392837524, "learning_rate": 4.681579224056356e-05, "loss": 2.4226, "step": 18180 }, { "epoch": 0.06392598044369677, "grad_norm": 0.7173060774803162, "learning_rate": 4.6805278099043214e-05, "loss": 2.4309, "step": 18240 }, { "epoch": 0.06413626327410367, "grad_norm": 0.7185004949569702, "learning_rate": 4.6794763957522866e-05, "loss": 2.4121, "step": 18300 }, { "epoch": 0.06434654610451057, "grad_norm": 0.7266471982002258, "learning_rate": 4.6784249816002525e-05, "loss": 2.4137, "step": 18360 }, { "epoch": 0.06455682893491746, "grad_norm": 0.6976909637451172, "learning_rate": 4.677373567448218e-05, "loss": 2.4061, "step": 18420 }, { "epoch": 0.06476711176532436, "grad_norm": 0.6547685861587524, "learning_rate": 4.6763221532961835e-05, "loss": 2.4076, "step": 18480 }, { "epoch": 0.06497739459573126, "grad_norm": 0.7449997067451477, "learning_rate": 4.6752707391441494e-05, "loss": 2.4172, "step": 18540 }, { "epoch": 0.06518767742613815, "grad_norm": 0.7313099503517151, "learning_rate": 4.6742193249921146e-05, "loss": 2.4074, "step": 18600 }, { "epoch": 0.06539796025654505, "grad_norm": 0.6708427667617798, "learning_rate": 4.67316791084008e-05, "loss": 2.3998, "step": 18660 }, { "epoch": 0.06560824308695196, "grad_norm": 0.7288368940353394, "learning_rate": 4.6721164966880456e-05, "loss": 2.4041, "step": 18720 }, { "epoch": 0.06581852591735884, "grad_norm": 0.6853367686271667, "learning_rate": 4.6710650825360115e-05, "loss": 2.3981, "step": 18780 }, { "epoch": 0.06602880874776575, "grad_norm": 0.7069407105445862, "learning_rate": 4.670013668383977e-05, "loss": 2.4008, "step": 18840 }, { "epoch": 0.06623909157817265, "grad_norm": 0.7321661114692688, "learning_rate": 4.6689622542319425e-05, "loss": 2.3965, "step": 18900 }, { "epoch": 0.06644937440857954, "grad_norm": 0.6575349569320679, "learning_rate": 4.667910840079908e-05, "loss": 2.4077, "step": 18960 }, { "epoch": 0.06665965723898644, "grad_norm": 0.6651709675788879, "learning_rate": 4.666859425927873e-05, "loss": 2.4041, "step": 19020 }, { "epoch": 0.06686994006939333, "grad_norm": 0.7022081613540649, "learning_rate": 4.665808011775839e-05, "loss": 2.4001, "step": 19080 }, { "epoch": 0.06708022289980023, "grad_norm": 0.67169189453125, "learning_rate": 4.6647565976238047e-05, "loss": 2.4178, "step": 19140 }, { "epoch": 0.06729050573020713, "grad_norm": 0.6954366564750671, "learning_rate": 4.66370518347177e-05, "loss": 2.4271, "step": 19200 }, { "epoch": 0.06750078856061402, "grad_norm": 0.6861891150474548, "learning_rate": 4.662653769319735e-05, "loss": 2.4049, "step": 19260 }, { "epoch": 0.06771107139102092, "grad_norm": 0.7052733898162842, "learning_rate": 4.661602355167701e-05, "loss": 2.4011, "step": 19320 }, { "epoch": 0.06792135422142782, "grad_norm": 0.6501637697219849, "learning_rate": 4.660550941015666e-05, "loss": 2.4239, "step": 19380 }, { "epoch": 0.06813163705183471, "grad_norm": 0.6948041319847107, "learning_rate": 4.659499526863631e-05, "loss": 2.4054, "step": 19440 }, { "epoch": 0.06834191988224161, "grad_norm": 0.7107264995574951, "learning_rate": 4.658448112711598e-05, "loss": 2.4261, "step": 19500 }, { "epoch": 0.06855220271264852, "grad_norm": 0.7571120262145996, "learning_rate": 4.657396698559563e-05, "loss": 2.4051, "step": 19560 }, { "epoch": 0.0687624855430554, "grad_norm": 0.7028728127479553, "learning_rate": 4.656345284407528e-05, "loss": 2.4063, "step": 19620 }, { "epoch": 0.0689727683734623, "grad_norm": 0.699620246887207, "learning_rate": 4.655293870255494e-05, "loss": 2.4099, "step": 19680 }, { "epoch": 0.06918305120386921, "grad_norm": 0.6919541358947754, "learning_rate": 4.654242456103459e-05, "loss": 2.4114, "step": 19740 }, { "epoch": 0.0693933340342761, "grad_norm": 0.696038544178009, "learning_rate": 4.6531910419514244e-05, "loss": 2.4016, "step": 19800 }, { "epoch": 0.069603616864683, "grad_norm": 0.6753870248794556, "learning_rate": 4.65213962779939e-05, "loss": 2.3975, "step": 19860 }, { "epoch": 0.0698138996950899, "grad_norm": 0.719841718673706, "learning_rate": 4.651088213647356e-05, "loss": 2.4118, "step": 19920 }, { "epoch": 0.07002418252549679, "grad_norm": 0.6914800405502319, "learning_rate": 4.6500367994953214e-05, "loss": 2.4107, "step": 19980 }, { "epoch": 0.07023446535590369, "grad_norm": 0.6995266675949097, "learning_rate": 4.6489853853432866e-05, "loss": 2.4027, "step": 20040 }, { "epoch": 0.0704447481863106, "grad_norm": 0.7216110229492188, "learning_rate": 4.6479339711912524e-05, "loss": 2.4194, "step": 20100 }, { "epoch": 0.07065503101671748, "grad_norm": 0.6979327201843262, "learning_rate": 4.6468825570392176e-05, "loss": 2.3914, "step": 20160 }, { "epoch": 0.07086531384712438, "grad_norm": 0.6791154742240906, "learning_rate": 4.6458311428871835e-05, "loss": 2.4015, "step": 20220 }, { "epoch": 0.07107559667753129, "grad_norm": 0.6936553120613098, "learning_rate": 4.6447797287351493e-05, "loss": 2.4082, "step": 20280 }, { "epoch": 0.07128587950793817, "grad_norm": 0.6697074770927429, "learning_rate": 4.6437283145831145e-05, "loss": 2.408, "step": 20340 }, { "epoch": 0.07149616233834508, "grad_norm": 0.681769609451294, "learning_rate": 4.64267690043108e-05, "loss": 2.3901, "step": 20400 }, { "epoch": 0.07170644516875198, "grad_norm": 0.6847583055496216, "learning_rate": 4.6416254862790456e-05, "loss": 2.3975, "step": 20460 }, { "epoch": 0.07191672799915887, "grad_norm": 0.6366531848907471, "learning_rate": 4.640574072127011e-05, "loss": 2.4028, "step": 20520 }, { "epoch": 0.07212701082956577, "grad_norm": 0.6561765074729919, "learning_rate": 4.6395226579749766e-05, "loss": 2.4038, "step": 20580 }, { "epoch": 0.07233729365997266, "grad_norm": 0.7223414182662964, "learning_rate": 4.6384712438229425e-05, "loss": 2.4159, "step": 20640 }, { "epoch": 0.07254757649037956, "grad_norm": 0.7201683521270752, "learning_rate": 4.637419829670908e-05, "loss": 2.4054, "step": 20700 }, { "epoch": 0.07275785932078646, "grad_norm": 0.661088764667511, "learning_rate": 4.636368415518873e-05, "loss": 2.398, "step": 20760 }, { "epoch": 0.07296814215119335, "grad_norm": 0.6699717044830322, "learning_rate": 4.635317001366839e-05, "loss": 2.4058, "step": 20820 }, { "epoch": 0.07317842498160025, "grad_norm": 0.680035412311554, "learning_rate": 4.634265587214804e-05, "loss": 2.412, "step": 20880 }, { "epoch": 0.07338870781200715, "grad_norm": 0.6644302606582642, "learning_rate": 4.63321417306277e-05, "loss": 2.4015, "step": 20940 }, { "epoch": 0.07359899064241404, "grad_norm": 0.6539902091026306, "learning_rate": 4.632162758910735e-05, "loss": 2.3749, "step": 21000 }, { "epoch": 0.07380927347282094, "grad_norm": 0.6808243989944458, "learning_rate": 4.631111344758701e-05, "loss": 2.3918, "step": 21060 }, { "epoch": 0.07401955630322785, "grad_norm": 0.7031143307685852, "learning_rate": 4.630059930606666e-05, "loss": 2.4004, "step": 21120 }, { "epoch": 0.07422983913363473, "grad_norm": 0.6713913083076477, "learning_rate": 4.629008516454631e-05, "loss": 2.4042, "step": 21180 }, { "epoch": 0.07444012196404164, "grad_norm": 0.7224510312080383, "learning_rate": 4.627957102302598e-05, "loss": 2.4053, "step": 21240 }, { "epoch": 0.07465040479444854, "grad_norm": 0.7205802798271179, "learning_rate": 4.626905688150563e-05, "loss": 2.4049, "step": 21300 }, { "epoch": 0.07486068762485543, "grad_norm": 0.6812241077423096, "learning_rate": 4.625854273998528e-05, "loss": 2.3989, "step": 21360 }, { "epoch": 0.07507097045526233, "grad_norm": 0.699661374092102, "learning_rate": 4.624802859846494e-05, "loss": 2.388, "step": 21420 }, { "epoch": 0.07528125328566923, "grad_norm": 0.6710296869277954, "learning_rate": 4.623751445694459e-05, "loss": 2.3954, "step": 21480 }, { "epoch": 0.07549153611607612, "grad_norm": 0.6275144219398499, "learning_rate": 4.6227000315424244e-05, "loss": 2.3954, "step": 21540 }, { "epoch": 0.07570181894648302, "grad_norm": 0.7540141344070435, "learning_rate": 4.62164861739039e-05, "loss": 2.4054, "step": 21600 }, { "epoch": 0.07591210177688992, "grad_norm": Infinity, "learning_rate": 4.620614726807556e-05, "loss": 2.3994, "step": 21660 }, { "epoch": 0.07612238460729681, "grad_norm": 0.6901912093162537, "learning_rate": 4.619563312655522e-05, "loss": 2.3948, "step": 21720 }, { "epoch": 0.07633266743770371, "grad_norm": 0.6476166844367981, "learning_rate": 4.618511898503488e-05, "loss": 2.3979, "step": 21780 }, { "epoch": 0.07654295026811062, "grad_norm": 0.7146350741386414, "learning_rate": 4.617460484351453e-05, "loss": 2.3922, "step": 21840 }, { "epoch": 0.0767532330985175, "grad_norm": 0.6991199851036072, "learning_rate": 4.616409070199418e-05, "loss": 2.3935, "step": 21900 }, { "epoch": 0.0769635159289244, "grad_norm": 0.6704103946685791, "learning_rate": 4.615357656047384e-05, "loss": 2.3735, "step": 21960 }, { "epoch": 0.0771737987593313, "grad_norm": 0.6334531903266907, "learning_rate": 4.614306241895349e-05, "loss": 2.3881, "step": 22020 }, { "epoch": 0.0773840815897382, "grad_norm": 0.642454981803894, "learning_rate": 4.613254827743315e-05, "loss": 2.4056, "step": 22080 }, { "epoch": 0.0775943644201451, "grad_norm": 0.6881276965141296, "learning_rate": 4.612203413591281e-05, "loss": 2.398, "step": 22140 }, { "epoch": 0.07780464725055199, "grad_norm": 0.6868687272071838, "learning_rate": 4.611151999439246e-05, "loss": 2.392, "step": 22200 }, { "epoch": 0.07801493008095889, "grad_norm": 0.6720156073570251, "learning_rate": 4.610100585287211e-05, "loss": 2.4043, "step": 22260 }, { "epoch": 0.07822521291136579, "grad_norm": 0.679771900177002, "learning_rate": 4.609049171135177e-05, "loss": 2.3936, "step": 22320 }, { "epoch": 0.07843549574177268, "grad_norm": 0.7134901285171509, "learning_rate": 4.607997756983142e-05, "loss": 2.3959, "step": 22380 }, { "epoch": 0.07864577857217958, "grad_norm": 0.7048931121826172, "learning_rate": 4.606946342831108e-05, "loss": 2.3805, "step": 22440 }, { "epoch": 0.07885606140258648, "grad_norm": 0.7281864285469055, "learning_rate": 4.6058949286790734e-05, "loss": 2.3867, "step": 22500 }, { "epoch": 0.07906634423299337, "grad_norm": 0.6903355121612549, "learning_rate": 4.604843514527039e-05, "loss": 2.3922, "step": 22560 }, { "epoch": 0.07927662706340027, "grad_norm": 0.6869490146636963, "learning_rate": 4.6037921003750044e-05, "loss": 2.3956, "step": 22620 }, { "epoch": 0.07948690989380718, "grad_norm": 0.6450217962265015, "learning_rate": 4.6027406862229696e-05, "loss": 2.399, "step": 22680 }, { "epoch": 0.07969719272421406, "grad_norm": 0.6367782354354858, "learning_rate": 4.6016892720709355e-05, "loss": 2.3827, "step": 22740 }, { "epoch": 0.07990747555462097, "grad_norm": 0.6485390067100525, "learning_rate": 4.6006378579189014e-05, "loss": 2.3938, "step": 22800 }, { "epoch": 0.08011775838502787, "grad_norm": 0.7188836932182312, "learning_rate": 4.5995864437668666e-05, "loss": 2.3812, "step": 22860 }, { "epoch": 0.08032804121543476, "grad_norm": 0.67445969581604, "learning_rate": 4.5985350296148324e-05, "loss": 2.3891, "step": 22920 }, { "epoch": 0.08053832404584166, "grad_norm": 0.7052184343338013, "learning_rate": 4.5974836154627976e-05, "loss": 2.3974, "step": 22980 }, { "epoch": 0.08074860687624856, "grad_norm": 0.6907039284706116, "learning_rate": 4.596432201310763e-05, "loss": 2.386, "step": 23040 }, { "epoch": 0.08095888970665545, "grad_norm": 0.7139337062835693, "learning_rate": 4.595380787158729e-05, "loss": 2.3803, "step": 23100 }, { "epoch": 0.08116917253706235, "grad_norm": 0.684907853603363, "learning_rate": 4.5943293730066945e-05, "loss": 2.3946, "step": 23160 }, { "epoch": 0.08137945536746925, "grad_norm": 0.6158885955810547, "learning_rate": 4.59327795885466e-05, "loss": 2.4151, "step": 23220 }, { "epoch": 0.08158973819787614, "grad_norm": 0.701888382434845, "learning_rate": 4.592226544702625e-05, "loss": 2.3844, "step": 23280 }, { "epoch": 0.08180002102828304, "grad_norm": 0.668491542339325, "learning_rate": 4.591175130550591e-05, "loss": 2.3991, "step": 23340 }, { "epoch": 0.08201030385868993, "grad_norm": 0.7079835534095764, "learning_rate": 4.590123716398556e-05, "loss": 2.386, "step": 23400 }, { "epoch": 0.08222058668909683, "grad_norm": 0.725784957408905, "learning_rate": 4.589072302246522e-05, "loss": 2.3815, "step": 23460 }, { "epoch": 0.08243086951950374, "grad_norm": 0.6603190302848816, "learning_rate": 4.588020888094488e-05, "loss": 2.3864, "step": 23520 }, { "epoch": 0.08264115234991062, "grad_norm": 0.7041152715682983, "learning_rate": 4.586969473942453e-05, "loss": 2.3903, "step": 23580 }, { "epoch": 0.08285143518031753, "grad_norm": 0.6685366034507751, "learning_rate": 4.585918059790418e-05, "loss": 2.3856, "step": 23640 }, { "epoch": 0.08306171801072443, "grad_norm": 0.6484189033508301, "learning_rate": 4.584866645638384e-05, "loss": 2.3873, "step": 23700 }, { "epoch": 0.08327200084113132, "grad_norm": 0.6959061026573181, "learning_rate": 4.58383275505555e-05, "loss": 2.3866, "step": 23760 }, { "epoch": 0.08348228367153822, "grad_norm": 0.6646124720573425, "learning_rate": 4.5827813409035155e-05, "loss": 2.3863, "step": 23820 }, { "epoch": 0.08369256650194512, "grad_norm": 0.6963408589363098, "learning_rate": 4.581729926751481e-05, "loss": 2.3821, "step": 23880 }, { "epoch": 0.08390284933235201, "grad_norm": 0.667786717414856, "learning_rate": 4.5806785125994466e-05, "loss": 2.393, "step": 23940 }, { "epoch": 0.08411313216275891, "grad_norm": 0.6659138202667236, "learning_rate": 4.579627098447412e-05, "loss": 2.3862, "step": 24000 }, { "epoch": 0.08432341499316581, "grad_norm": 0.7164613008499146, "learning_rate": 4.5785756842953776e-05, "loss": 2.3981, "step": 24060 }, { "epoch": 0.0845336978235727, "grad_norm": 0.6524563431739807, "learning_rate": 4.577524270143343e-05, "loss": 2.395, "step": 24120 }, { "epoch": 0.0847439806539796, "grad_norm": 1.5108562707901, "learning_rate": 4.576472855991309e-05, "loss": 2.3708, "step": 24180 }, { "epoch": 0.0849542634843865, "grad_norm": 0.641019344329834, "learning_rate": 4.575421441839274e-05, "loss": 2.3911, "step": 24240 }, { "epoch": 0.0851645463147934, "grad_norm": 0.7017983198165894, "learning_rate": 4.57437002768724e-05, "loss": 2.3829, "step": 24300 }, { "epoch": 0.0853748291452003, "grad_norm": 0.6863337755203247, "learning_rate": 4.573318613535205e-05, "loss": 2.3787, "step": 24360 }, { "epoch": 0.0855851119756072, "grad_norm": 0.623172402381897, "learning_rate": 4.572267199383171e-05, "loss": 2.3866, "step": 24420 }, { "epoch": 0.08579539480601409, "grad_norm": 0.6611483693122864, "learning_rate": 4.571215785231136e-05, "loss": 2.3928, "step": 24480 }, { "epoch": 0.08600567763642099, "grad_norm": 0.6451998353004456, "learning_rate": 4.570164371079101e-05, "loss": 2.3727, "step": 24540 }, { "epoch": 0.08621596046682789, "grad_norm": 0.6573619246482849, "learning_rate": 4.569112956927067e-05, "loss": 2.3647, "step": 24600 }, { "epoch": 0.08642624329723478, "grad_norm": 0.67535001039505, "learning_rate": 4.568061542775033e-05, "loss": 2.3882, "step": 24660 }, { "epoch": 0.08663652612764168, "grad_norm": 0.6458430290222168, "learning_rate": 4.567010128622998e-05, "loss": 2.393, "step": 24720 }, { "epoch": 0.08684680895804857, "grad_norm": 0.6228342652320862, "learning_rate": 4.565958714470964e-05, "loss": 2.3856, "step": 24780 }, { "epoch": 0.08705709178845547, "grad_norm": 0.6460654735565186, "learning_rate": 4.564907300318929e-05, "loss": 2.3851, "step": 24840 }, { "epoch": 0.08726737461886237, "grad_norm": 0.694816529750824, "learning_rate": 4.5638558861668944e-05, "loss": 2.3789, "step": 24900 }, { "epoch": 0.08747765744926926, "grad_norm": 0.7187425494194031, "learning_rate": 4.56280447201486e-05, "loss": 2.375, "step": 24960 }, { "epoch": 0.08768794027967616, "grad_norm": 0.6361470222473145, "learning_rate": 4.561753057862826e-05, "loss": 2.3805, "step": 25020 }, { "epoch": 0.08789822311008307, "grad_norm": 0.6624230146408081, "learning_rate": 4.560701643710791e-05, "loss": 2.385, "step": 25080 }, { "epoch": 0.08810850594048995, "grad_norm": 0.6224386096000671, "learning_rate": 4.5596502295587565e-05, "loss": 2.3751, "step": 25140 }, { "epoch": 0.08831878877089686, "grad_norm": 0.6870352029800415, "learning_rate": 4.558598815406722e-05, "loss": 2.3736, "step": 25200 }, { "epoch": 0.08852907160130376, "grad_norm": 0.6708427667617798, "learning_rate": 4.5575474012546875e-05, "loss": 2.405, "step": 25260 }, { "epoch": 0.08873935443171065, "grad_norm": 0.6544594168663025, "learning_rate": 4.556495987102653e-05, "loss": 2.3976, "step": 25320 }, { "epoch": 0.08894963726211755, "grad_norm": 0.6882736086845398, "learning_rate": 4.555444572950619e-05, "loss": 2.393, "step": 25380 }, { "epoch": 0.08915992009252445, "grad_norm": 0.6833152770996094, "learning_rate": 4.5543931587985844e-05, "loss": 2.3912, "step": 25440 }, { "epoch": 0.08937020292293134, "grad_norm": 0.6555745005607605, "learning_rate": 4.5533417446465496e-05, "loss": 2.386, "step": 25500 }, { "epoch": 0.08958048575333824, "grad_norm": 0.6967830657958984, "learning_rate": 4.5522903304945155e-05, "loss": 2.3719, "step": 25560 }, { "epoch": 0.08979076858374514, "grad_norm": 0.6678874492645264, "learning_rate": 4.551238916342481e-05, "loss": 2.3911, "step": 25620 }, { "epoch": 0.09000105141415203, "grad_norm": 0.6675252914428711, "learning_rate": 4.550187502190446e-05, "loss": 2.3802, "step": 25680 }, { "epoch": 0.09021133424455893, "grad_norm": 0.6637398600578308, "learning_rate": 4.549136088038412e-05, "loss": 2.3885, "step": 25740 }, { "epoch": 0.09042161707496583, "grad_norm": 0.6243078112602234, "learning_rate": 4.5480846738863776e-05, "loss": 2.3837, "step": 25800 }, { "epoch": 0.09063189990537272, "grad_norm": 0.7010598182678223, "learning_rate": 4.547033259734343e-05, "loss": 2.3577, "step": 25860 }, { "epoch": 0.09084218273577963, "grad_norm": 0.6444143652915955, "learning_rate": 4.545981845582308e-05, "loss": 2.3873, "step": 25920 }, { "epoch": 0.09105246556618653, "grad_norm": 0.6662720441818237, "learning_rate": 4.544930431430274e-05, "loss": 2.378, "step": 25980 }, { "epoch": 0.09126274839659342, "grad_norm": 0.7123090624809265, "learning_rate": 4.54387901727824e-05, "loss": 2.3748, "step": 26040 }, { "epoch": 0.09147303122700032, "grad_norm": 0.6969013214111328, "learning_rate": 4.542827603126205e-05, "loss": 2.3761, "step": 26100 }, { "epoch": 0.0916833140574072, "grad_norm": 0.6556299328804016, "learning_rate": 4.541776188974171e-05, "loss": 2.3884, "step": 26160 }, { "epoch": 0.09189359688781411, "grad_norm": 0.6420555710792542, "learning_rate": 4.5407422983913365e-05, "loss": 2.3777, "step": 26220 }, { "epoch": 0.09210387971822101, "grad_norm": 0.6758638024330139, "learning_rate": 4.5396908842393024e-05, "loss": 2.3934, "step": 26280 }, { "epoch": 0.0923141625486279, "grad_norm": 0.6616193652153015, "learning_rate": 4.5386394700872676e-05, "loss": 2.3729, "step": 26340 }, { "epoch": 0.0925244453790348, "grad_norm": 0.7162423729896545, "learning_rate": 4.537588055935233e-05, "loss": 2.3803, "step": 26400 }, { "epoch": 0.0927347282094417, "grad_norm": 0.6722175478935242, "learning_rate": 4.5365366417831986e-05, "loss": 2.3835, "step": 26460 }, { "epoch": 0.09294501103984859, "grad_norm": 0.6524060368537903, "learning_rate": 4.5354852276311645e-05, "loss": 2.3756, "step": 26520 }, { "epoch": 0.09315529387025549, "grad_norm": 0.6283015608787537, "learning_rate": 4.53443381347913e-05, "loss": 2.3789, "step": 26580 }, { "epoch": 0.0933655767006624, "grad_norm": 0.6441583037376404, "learning_rate": 4.533382399327095e-05, "loss": 2.3858, "step": 26640 }, { "epoch": 0.09357585953106928, "grad_norm": 0.6654236316680908, "learning_rate": 4.532330985175061e-05, "loss": 2.3774, "step": 26700 }, { "epoch": 0.09378614236147619, "grad_norm": 0.6412244439125061, "learning_rate": 4.531279571023026e-05, "loss": 2.3693, "step": 26760 }, { "epoch": 0.09399642519188309, "grad_norm": 0.7081546783447266, "learning_rate": 4.530228156870992e-05, "loss": 2.3834, "step": 26820 }, { "epoch": 0.09420670802228998, "grad_norm": 0.6857917904853821, "learning_rate": 4.5291767427189576e-05, "loss": 2.3694, "step": 26880 }, { "epoch": 0.09441699085269688, "grad_norm": 0.6636059880256653, "learning_rate": 4.528125328566923e-05, "loss": 2.375, "step": 26940 }, { "epoch": 0.09462727368310378, "grad_norm": 0.6544620990753174, "learning_rate": 4.527073914414888e-05, "loss": 2.3687, "step": 27000 }, { "epoch": 0.09483755651351067, "grad_norm": 0.6736893653869629, "learning_rate": 4.526022500262854e-05, "loss": 2.3776, "step": 27060 }, { "epoch": 0.09504783934391757, "grad_norm": 0.6209065318107605, "learning_rate": 4.524971086110819e-05, "loss": 2.3871, "step": 27120 }, { "epoch": 0.09525812217432447, "grad_norm": 0.6604148149490356, "learning_rate": 4.523919671958784e-05, "loss": 2.3553, "step": 27180 }, { "epoch": 0.09546840500473136, "grad_norm": 0.6733397245407104, "learning_rate": 4.522868257806751e-05, "loss": 2.3763, "step": 27240 }, { "epoch": 0.09567868783513826, "grad_norm": 0.6688505411148071, "learning_rate": 4.521816843654716e-05, "loss": 2.3845, "step": 27300 }, { "epoch": 0.09588897066554516, "grad_norm": 0.6299710273742676, "learning_rate": 4.520765429502681e-05, "loss": 2.3823, "step": 27360 }, { "epoch": 0.09609925349595205, "grad_norm": 0.6818243861198425, "learning_rate": 4.519714015350647e-05, "loss": 2.3822, "step": 27420 }, { "epoch": 0.09630953632635895, "grad_norm": 0.671530544757843, "learning_rate": 4.518662601198612e-05, "loss": 2.3907, "step": 27480 }, { "epoch": 0.09651981915676586, "grad_norm": 0.7086201310157776, "learning_rate": 4.5176111870465774e-05, "loss": 2.3716, "step": 27540 }, { "epoch": 0.09673010198717275, "grad_norm": 0.6694490909576416, "learning_rate": 4.516559772894543e-05, "loss": 2.3833, "step": 27600 }, { "epoch": 0.09694038481757965, "grad_norm": 0.630606472492218, "learning_rate": 4.51552588231171e-05, "loss": 2.3933, "step": 27660 }, { "epoch": 0.09715066764798654, "grad_norm": 0.6745237708091736, "learning_rate": 4.514474468159675e-05, "loss": 2.3681, "step": 27720 }, { "epoch": 0.09736095047839344, "grad_norm": 0.6740299463272095, "learning_rate": 4.513423054007641e-05, "loss": 2.3852, "step": 27780 }, { "epoch": 0.09757123330880034, "grad_norm": 0.6464400887489319, "learning_rate": 4.512371639855606e-05, "loss": 2.3856, "step": 27840 }, { "epoch": 0.09778151613920723, "grad_norm": 0.6964374780654907, "learning_rate": 4.511320225703571e-05, "loss": 2.3825, "step": 27900 }, { "epoch": 0.09799179896961413, "grad_norm": 0.6555659770965576, "learning_rate": 4.510268811551537e-05, "loss": 2.3811, "step": 27960 }, { "epoch": 0.09820208180002103, "grad_norm": 0.6736533045768738, "learning_rate": 4.509217397399503e-05, "loss": 2.3686, "step": 28020 }, { "epoch": 0.09841236463042792, "grad_norm": 0.710355818271637, "learning_rate": 4.508165983247468e-05, "loss": 2.3703, "step": 28080 }, { "epoch": 0.09862264746083482, "grad_norm": 0.6771557331085205, "learning_rate": 4.507114569095434e-05, "loss": 2.3663, "step": 28140 }, { "epoch": 0.09883293029124172, "grad_norm": 0.6405073404312134, "learning_rate": 4.506063154943399e-05, "loss": 2.3757, "step": 28200 }, { "epoch": 0.09904321312164861, "grad_norm": 0.6605597734451294, "learning_rate": 4.505011740791364e-05, "loss": 2.3752, "step": 28260 }, { "epoch": 0.09925349595205551, "grad_norm": 0.6761056184768677, "learning_rate": 4.50396032663933e-05, "loss": 2.3628, "step": 28320 }, { "epoch": 0.09946377878246242, "grad_norm": 0.6279546022415161, "learning_rate": 4.502908912487296e-05, "loss": 2.3638, "step": 28380 }, { "epoch": 0.0996740616128693, "grad_norm": 0.7104046940803528, "learning_rate": 4.501857498335261e-05, "loss": 2.3686, "step": 28440 }, { "epoch": 0.09988434444327621, "grad_norm": 0.625745415687561, "learning_rate": 4.5008060841832264e-05, "loss": 2.3646, "step": 28500 }, { "epoch": 0.10009462727368311, "grad_norm": 0.6523934602737427, "learning_rate": 4.499754670031192e-05, "loss": 2.3637, "step": 28560 }, { "epoch": 0.10030491010409, "grad_norm": 0.678256630897522, "learning_rate": 4.4987032558791575e-05, "loss": 2.3852, "step": 28620 }, { "epoch": 0.1005151929344969, "grad_norm": 0.6635131239891052, "learning_rate": 4.4976518417271227e-05, "loss": 2.3798, "step": 28680 }, { "epoch": 0.1007254757649038, "grad_norm": 0.728843629360199, "learning_rate": 4.496600427575089e-05, "loss": 2.3787, "step": 28740 }, { "epoch": 0.10093575859531069, "grad_norm": 0.6350687146186829, "learning_rate": 4.4955490134230544e-05, "loss": 2.3804, "step": 28800 }, { "epoch": 0.10114604142571759, "grad_norm": 0.732272207736969, "learning_rate": 4.4944975992710196e-05, "loss": 2.3734, "step": 28860 }, { "epoch": 0.1013563242561245, "grad_norm": 0.6215546727180481, "learning_rate": 4.4934461851189854e-05, "loss": 2.3583, "step": 28920 }, { "epoch": 0.10156660708653138, "grad_norm": 0.600857138633728, "learning_rate": 4.4923947709669506e-05, "loss": 2.3719, "step": 28980 }, { "epoch": 0.10177688991693828, "grad_norm": 0.6272040009498596, "learning_rate": 4.491343356814916e-05, "loss": 2.3675, "step": 29040 }, { "epoch": 0.10198717274734517, "grad_norm": 0.6435662508010864, "learning_rate": 4.490291942662882e-05, "loss": 2.3619, "step": 29100 }, { "epoch": 0.10219745557775207, "grad_norm": 0.6466601490974426, "learning_rate": 4.4892405285108476e-05, "loss": 2.3576, "step": 29160 }, { "epoch": 0.10240773840815898, "grad_norm": 0.6503751873970032, "learning_rate": 4.488189114358813e-05, "loss": 2.3659, "step": 29220 }, { "epoch": 0.10261802123856587, "grad_norm": 0.6413028240203857, "learning_rate": 4.487137700206778e-05, "loss": 2.3686, "step": 29280 }, { "epoch": 0.10282830406897277, "grad_norm": 0.6793463826179504, "learning_rate": 4.486086286054744e-05, "loss": 2.357, "step": 29340 }, { "epoch": 0.10303858689937967, "grad_norm": 0.6493575572967529, "learning_rate": 4.485034871902709e-05, "loss": 2.3692, "step": 29400 }, { "epoch": 0.10324886972978656, "grad_norm": 0.6862606406211853, "learning_rate": 4.483983457750675e-05, "loss": 2.3631, "step": 29460 }, { "epoch": 0.10345915256019346, "grad_norm": 0.688431441783905, "learning_rate": 4.482932043598641e-05, "loss": 2.3779, "step": 29520 }, { "epoch": 0.10366943539060036, "grad_norm": 0.6671889424324036, "learning_rate": 4.481880629446606e-05, "loss": 2.372, "step": 29580 }, { "epoch": 0.10387971822100725, "grad_norm": 0.6443668603897095, "learning_rate": 4.480829215294571e-05, "loss": 2.3927, "step": 29640 }, { "epoch": 0.10409000105141415, "grad_norm": 0.6309412717819214, "learning_rate": 4.479777801142537e-05, "loss": 2.3609, "step": 29700 }, { "epoch": 0.10430028388182105, "grad_norm": 0.6563585996627808, "learning_rate": 4.478726386990502e-05, "loss": 2.3551, "step": 29760 }, { "epoch": 0.10451056671222794, "grad_norm": 0.6455681920051575, "learning_rate": 4.477674972838468e-05, "loss": 2.3806, "step": 29820 }, { "epoch": 0.10472084954263484, "grad_norm": 0.6657047867774963, "learning_rate": 4.476623558686434e-05, "loss": 2.3618, "step": 29880 }, { "epoch": 0.10493113237304175, "grad_norm": 0.6885735392570496, "learning_rate": 4.475572144534399e-05, "loss": 2.3622, "step": 29940 }, { "epoch": 0.10514141520344863, "grad_norm": 0.6443381905555725, "learning_rate": 4.474520730382364e-05, "loss": 2.3561, "step": 30000 }, { "epoch": 0.10535169803385554, "grad_norm": 0.6550012826919556, "learning_rate": 4.47346931623033e-05, "loss": 2.3585, "step": 30060 }, { "epoch": 0.10556198086426244, "grad_norm": 0.651603102684021, "learning_rate": 4.472417902078295e-05, "loss": 2.3669, "step": 30120 }, { "epoch": 0.10577226369466933, "grad_norm": 0.676773726940155, "learning_rate": 4.471366487926261e-05, "loss": 2.3702, "step": 30180 }, { "epoch": 0.10598254652507623, "grad_norm": 0.6347634196281433, "learning_rate": 4.4703150737742264e-05, "loss": 2.371, "step": 30240 }, { "epoch": 0.10619282935548313, "grad_norm": 0.67087721824646, "learning_rate": 4.469263659622192e-05, "loss": 2.3546, "step": 30300 }, { "epoch": 0.10640311218589002, "grad_norm": 0.6727715730667114, "learning_rate": 4.4682122454701574e-05, "loss": 2.3523, "step": 30360 }, { "epoch": 0.10661339501629692, "grad_norm": 0.6418828368186951, "learning_rate": 4.4671608313181226e-05, "loss": 2.3702, "step": 30420 }, { "epoch": 0.10682367784670381, "grad_norm": 0.6562620997428894, "learning_rate": 4.4661094171660885e-05, "loss": 2.3854, "step": 30480 }, { "epoch": 0.10703396067711071, "grad_norm": 0.6004977822303772, "learning_rate": 4.4650580030140544e-05, "loss": 2.3542, "step": 30540 }, { "epoch": 0.10724424350751761, "grad_norm": 0.6666843891143799, "learning_rate": 4.4640065888620195e-05, "loss": 2.3609, "step": 30600 }, { "epoch": 0.1074545263379245, "grad_norm": 0.5965372323989868, "learning_rate": 4.4629551747099854e-05, "loss": 2.3746, "step": 30660 }, { "epoch": 0.1076648091683314, "grad_norm": 0.6549592018127441, "learning_rate": 4.4619037605579506e-05, "loss": 2.3612, "step": 30720 }, { "epoch": 0.1078750919987383, "grad_norm": 0.679852306842804, "learning_rate": 4.460852346405916e-05, "loss": 2.3665, "step": 30780 }, { "epoch": 0.1080853748291452, "grad_norm": 0.6266508102416992, "learning_rate": 4.4598009322538817e-05, "loss": 2.3547, "step": 30840 }, { "epoch": 0.1082956576595521, "grad_norm": 0.6630394458770752, "learning_rate": 4.4587495181018475e-05, "loss": 2.3633, "step": 30900 }, { "epoch": 0.108505940489959, "grad_norm": 0.645500898361206, "learning_rate": 4.457715627519013e-05, "loss": 2.3528, "step": 30960 }, { "epoch": 0.10871622332036589, "grad_norm": 0.6527357697486877, "learning_rate": 4.456664213366979e-05, "loss": 2.3743, "step": 31020 }, { "epoch": 0.10892650615077279, "grad_norm": 0.7146820425987244, "learning_rate": 4.455612799214944e-05, "loss": 2.3685, "step": 31080 }, { "epoch": 0.10913678898117969, "grad_norm": 0.6792079210281372, "learning_rate": 4.4545613850629095e-05, "loss": 2.3618, "step": 31140 }, { "epoch": 0.10934707181158658, "grad_norm": 0.6431834697723389, "learning_rate": 4.4535099709108754e-05, "loss": 2.3716, "step": 31200 }, { "epoch": 0.10955735464199348, "grad_norm": 0.7037909030914307, "learning_rate": 4.4524585567588405e-05, "loss": 2.3665, "step": 31260 }, { "epoch": 0.10976763747240038, "grad_norm": 0.637527585029602, "learning_rate": 4.4514071426068064e-05, "loss": 2.3625, "step": 31320 }, { "epoch": 0.10997792030280727, "grad_norm": 0.6294242739677429, "learning_rate": 4.450355728454772e-05, "loss": 2.3697, "step": 31380 }, { "epoch": 0.11018820313321417, "grad_norm": 0.6632541418075562, "learning_rate": 4.4493043143027375e-05, "loss": 2.3689, "step": 31440 }, { "epoch": 0.11039848596362108, "grad_norm": 0.6736090183258057, "learning_rate": 4.4482529001507027e-05, "loss": 2.3597, "step": 31500 }, { "epoch": 0.11060876879402796, "grad_norm": 0.6545177698135376, "learning_rate": 4.4472014859986685e-05, "loss": 2.3501, "step": 31560 }, { "epoch": 0.11081905162443487, "grad_norm": 0.6885281801223755, "learning_rate": 4.446150071846634e-05, "loss": 2.3684, "step": 31620 }, { "epoch": 0.11102933445484177, "grad_norm": 0.6284212470054626, "learning_rate": 4.4450986576945996e-05, "loss": 2.3692, "step": 31680 }, { "epoch": 0.11123961728524866, "grad_norm": 0.6197623014450073, "learning_rate": 4.444047243542565e-05, "loss": 2.3573, "step": 31740 }, { "epoch": 0.11144990011565556, "grad_norm": 0.6184737086296082, "learning_rate": 4.4429958293905306e-05, "loss": 2.3704, "step": 31800 }, { "epoch": 0.11166018294606245, "grad_norm": 0.6636645793914795, "learning_rate": 4.441944415238496e-05, "loss": 2.3807, "step": 31860 }, { "epoch": 0.11187046577646935, "grad_norm": 0.6567378640174866, "learning_rate": 4.440893001086461e-05, "loss": 2.3758, "step": 31920 }, { "epoch": 0.11208074860687625, "grad_norm": 0.6212893724441528, "learning_rate": 4.439841586934427e-05, "loss": 2.3483, "step": 31980 }, { "epoch": 0.11229103143728314, "grad_norm": 0.6712754964828491, "learning_rate": 4.438790172782393e-05, "loss": 2.3579, "step": 32040 }, { "epoch": 0.11250131426769004, "grad_norm": 0.6450668573379517, "learning_rate": 4.437738758630358e-05, "loss": 2.3723, "step": 32100 }, { "epoch": 0.11271159709809694, "grad_norm": 0.6592389345169067, "learning_rate": 4.436687344478324e-05, "loss": 2.3569, "step": 32160 }, { "epoch": 0.11292187992850383, "grad_norm": 0.6862047910690308, "learning_rate": 4.435635930326289e-05, "loss": 2.3567, "step": 32220 }, { "epoch": 0.11313216275891073, "grad_norm": 0.6675071716308594, "learning_rate": 4.434584516174254e-05, "loss": 2.3642, "step": 32280 }, { "epoch": 0.11334244558931764, "grad_norm": 0.6118093132972717, "learning_rate": 4.43353310202222e-05, "loss": 2.3598, "step": 32340 }, { "epoch": 0.11355272841972452, "grad_norm": 0.6482064127922058, "learning_rate": 4.432481687870186e-05, "loss": 2.3656, "step": 32400 }, { "epoch": 0.11376301125013143, "grad_norm": 0.6413975954055786, "learning_rate": 4.431430273718151e-05, "loss": 2.3498, "step": 32460 }, { "epoch": 0.11397329408053833, "grad_norm": 0.596386194229126, "learning_rate": 4.430378859566117e-05, "loss": 2.3548, "step": 32520 }, { "epoch": 0.11418357691094522, "grad_norm": 0.618043839931488, "learning_rate": 4.429327445414082e-05, "loss": 2.3578, "step": 32580 }, { "epoch": 0.11439385974135212, "grad_norm": 0.629632294178009, "learning_rate": 4.4282760312620473e-05, "loss": 2.3597, "step": 32640 }, { "epoch": 0.11460414257175902, "grad_norm": 0.6268094182014465, "learning_rate": 4.427224617110013e-05, "loss": 2.3605, "step": 32700 }, { "epoch": 0.11481442540216591, "grad_norm": 0.608088493347168, "learning_rate": 4.426173202957979e-05, "loss": 2.3664, "step": 32760 }, { "epoch": 0.11502470823257281, "grad_norm": 0.7458576560020447, "learning_rate": 4.425121788805944e-05, "loss": 2.3507, "step": 32820 }, { "epoch": 0.11523499106297971, "grad_norm": 0.6672834753990173, "learning_rate": 4.4240703746539095e-05, "loss": 2.3508, "step": 32880 }, { "epoch": 0.1154452738933866, "grad_norm": 0.5789446830749512, "learning_rate": 4.423018960501875e-05, "loss": 2.36, "step": 32940 }, { "epoch": 0.1156555567237935, "grad_norm": 0.694132924079895, "learning_rate": 4.4219675463498405e-05, "loss": 2.3433, "step": 33000 }, { "epoch": 0.1158658395542004, "grad_norm": 0.6312335729598999, "learning_rate": 4.4209161321978064e-05, "loss": 2.3366, "step": 33060 }, { "epoch": 0.1160761223846073, "grad_norm": 0.6660616993904114, "learning_rate": 4.419864718045772e-05, "loss": 2.3512, "step": 33120 }, { "epoch": 0.1162864052150142, "grad_norm": 0.6671555638313293, "learning_rate": 4.4188133038937374e-05, "loss": 2.3515, "step": 33180 }, { "epoch": 0.11649668804542108, "grad_norm": 0.645706057548523, "learning_rate": 4.4177618897417026e-05, "loss": 2.3594, "step": 33240 }, { "epoch": 0.11670697087582799, "grad_norm": 0.6335362792015076, "learning_rate": 4.4167104755896685e-05, "loss": 2.3508, "step": 33300 }, { "epoch": 0.11691725370623489, "grad_norm": 0.6129640340805054, "learning_rate": 4.415659061437634e-05, "loss": 2.3498, "step": 33360 }, { "epoch": 0.11712753653664178, "grad_norm": 0.6517692804336548, "learning_rate": 4.4146076472855995e-05, "loss": 2.3586, "step": 33420 }, { "epoch": 0.11733781936704868, "grad_norm": 0.6196044683456421, "learning_rate": 4.413556233133565e-05, "loss": 2.3515, "step": 33480 }, { "epoch": 0.11754810219745558, "grad_norm": 0.634680986404419, "learning_rate": 4.4125048189815306e-05, "loss": 2.3583, "step": 33540 }, { "epoch": 0.11775838502786247, "grad_norm": 0.6536049246788025, "learning_rate": 4.411453404829496e-05, "loss": 2.3498, "step": 33600 }, { "epoch": 0.11796866785826937, "grad_norm": 0.6144905090332031, "learning_rate": 4.410401990677461e-05, "loss": 2.3533, "step": 33660 }, { "epoch": 0.11817895068867627, "grad_norm": 0.6263406872749329, "learning_rate": 4.409350576525427e-05, "loss": 2.3425, "step": 33720 }, { "epoch": 0.11838923351908316, "grad_norm": 0.5995636582374573, "learning_rate": 4.408299162373393e-05, "loss": 2.355, "step": 33780 }, { "epoch": 0.11859951634949006, "grad_norm": 0.6604700684547424, "learning_rate": 4.407247748221358e-05, "loss": 2.3319, "step": 33840 }, { "epoch": 0.11880979917989697, "grad_norm": 0.683630645275116, "learning_rate": 4.406196334069324e-05, "loss": 2.3547, "step": 33900 }, { "epoch": 0.11902008201030385, "grad_norm": 0.6503610610961914, "learning_rate": 4.405144919917289e-05, "loss": 2.3574, "step": 33960 }, { "epoch": 0.11923036484071076, "grad_norm": 0.659476101398468, "learning_rate": 4.404093505765254e-05, "loss": 2.3298, "step": 34020 }, { "epoch": 0.11944064767111766, "grad_norm": 0.636908233165741, "learning_rate": 4.40304209161322e-05, "loss": 2.3475, "step": 34080 }, { "epoch": 0.11965093050152455, "grad_norm": 0.6105729937553406, "learning_rate": 4.401990677461186e-05, "loss": 2.3422, "step": 34140 }, { "epoch": 0.11986121333193145, "grad_norm": 0.6372390389442444, "learning_rate": 4.400939263309151e-05, "loss": 2.3513, "step": 34200 }, { "epoch": 0.12007149616233835, "grad_norm": 0.6719101071357727, "learning_rate": 4.399887849157117e-05, "loss": 2.3412, "step": 34260 }, { "epoch": 0.12028177899274524, "grad_norm": 0.6186497807502747, "learning_rate": 4.398836435005082e-05, "loss": 2.3487, "step": 34320 }, { "epoch": 0.12049206182315214, "grad_norm": 0.6582601070404053, "learning_rate": 4.397785020853047e-05, "loss": 2.3543, "step": 34380 }, { "epoch": 0.12070234465355904, "grad_norm": 0.5645360946655273, "learning_rate": 4.396733606701013e-05, "loss": 2.3708, "step": 34440 }, { "epoch": 0.12091262748396593, "grad_norm": 0.6398698687553406, "learning_rate": 4.395682192548979e-05, "loss": 2.3539, "step": 34500 }, { "epoch": 0.12112291031437283, "grad_norm": 0.6094596982002258, "learning_rate": 4.394630778396944e-05, "loss": 2.3537, "step": 34560 }, { "epoch": 0.12133319314477972, "grad_norm": 0.6569386720657349, "learning_rate": 4.3935793642449094e-05, "loss": 2.3436, "step": 34620 }, { "epoch": 0.12154347597518662, "grad_norm": 0.6402146816253662, "learning_rate": 4.392527950092875e-05, "loss": 2.3567, "step": 34680 }, { "epoch": 0.12175375880559353, "grad_norm": 0.7214573621749878, "learning_rate": 4.3914765359408405e-05, "loss": 2.357, "step": 34740 }, { "epoch": 0.12196404163600041, "grad_norm": 0.6399426460266113, "learning_rate": 4.390425121788806e-05, "loss": 2.3413, "step": 34800 }, { "epoch": 0.12217432446640732, "grad_norm": 0.6547138094902039, "learning_rate": 4.389373707636772e-05, "loss": 2.3567, "step": 34860 }, { "epoch": 0.12238460729681422, "grad_norm": 0.6688662171363831, "learning_rate": 4.3883222934847374e-05, "loss": 2.3543, "step": 34920 }, { "epoch": 0.1225948901272211, "grad_norm": 0.6135913729667664, "learning_rate": 4.3872708793327026e-05, "loss": 2.3516, "step": 34980 }, { "epoch": 0.12280517295762801, "grad_norm": 0.6633687615394592, "learning_rate": 4.3862194651806685e-05, "loss": 2.363, "step": 35040 }, { "epoch": 0.12301545578803491, "grad_norm": 0.6762205958366394, "learning_rate": 4.3851680510286337e-05, "loss": 2.3551, "step": 35100 }, { "epoch": 0.1232257386184418, "grad_norm": 0.6045565009117126, "learning_rate": 4.384116636876599e-05, "loss": 2.3614, "step": 35160 }, { "epoch": 0.1234360214488487, "grad_norm": 0.6369898915290833, "learning_rate": 4.383065222724565e-05, "loss": 2.3384, "step": 35220 }, { "epoch": 0.1236463042792556, "grad_norm": 0.607323169708252, "learning_rate": 4.3820138085725306e-05, "loss": 2.3433, "step": 35280 }, { "epoch": 0.12385658710966249, "grad_norm": 0.6833062767982483, "learning_rate": 4.380962394420496e-05, "loss": 2.351, "step": 35340 }, { "epoch": 0.1240668699400694, "grad_norm": 0.6462218761444092, "learning_rate": 4.379910980268461e-05, "loss": 2.3512, "step": 35400 }, { "epoch": 0.1242771527704763, "grad_norm": 0.628410279750824, "learning_rate": 4.378859566116427e-05, "loss": 2.3415, "step": 35460 }, { "epoch": 0.12448743560088318, "grad_norm": 0.6561073064804077, "learning_rate": 4.377808151964392e-05, "loss": 2.3513, "step": 35520 }, { "epoch": 0.12469771843129009, "grad_norm": 0.605626106262207, "learning_rate": 4.376756737812358e-05, "loss": 2.3387, "step": 35580 }, { "epoch": 0.12490800126169699, "grad_norm": 0.6336895823478699, "learning_rate": 4.375705323660324e-05, "loss": 2.3564, "step": 35640 }, { "epoch": 0.12511828409210388, "grad_norm": 0.6178215742111206, "learning_rate": 4.374653909508289e-05, "loss": 2.3507, "step": 35700 }, { "epoch": 0.12532856692251076, "grad_norm": 0.5754424333572388, "learning_rate": 4.373602495356254e-05, "loss": 2.3547, "step": 35760 }, { "epoch": 0.12553884975291768, "grad_norm": 0.6096973419189453, "learning_rate": 4.37255108120422e-05, "loss": 2.3464, "step": 35820 }, { "epoch": 0.12574913258332457, "grad_norm": 0.6017065048217773, "learning_rate": 4.371499667052185e-05, "loss": 2.3488, "step": 35880 }, { "epoch": 0.12595941541373146, "grad_norm": 0.6396207809448242, "learning_rate": 4.370448252900151e-05, "loss": 2.334, "step": 35940 }, { "epoch": 0.12616969824413837, "grad_norm": 0.6168996691703796, "learning_rate": 4.369396838748116e-05, "loss": 2.3496, "step": 36000 }, { "epoch": 0.12637998107454526, "grad_norm": 0.62862229347229, "learning_rate": 4.368345424596082e-05, "loss": 2.3338, "step": 36060 }, { "epoch": 0.12659026390495215, "grad_norm": 0.6999679207801819, "learning_rate": 4.367294010444047e-05, "loss": 2.3498, "step": 36120 }, { "epoch": 0.12680054673535907, "grad_norm": 0.6003287434577942, "learning_rate": 4.366242596292013e-05, "loss": 2.3495, "step": 36180 }, { "epoch": 0.12701082956576595, "grad_norm": 0.6159112453460693, "learning_rate": 4.3651911821399783e-05, "loss": 2.3434, "step": 36240 }, { "epoch": 0.12722111239617284, "grad_norm": 0.6516057252883911, "learning_rate": 4.364139767987944e-05, "loss": 2.3495, "step": 36300 }, { "epoch": 0.12743139522657976, "grad_norm": 0.6326673626899719, "learning_rate": 4.3630883538359094e-05, "loss": 2.3433, "step": 36360 }, { "epoch": 0.12764167805698665, "grad_norm": 0.6475709676742554, "learning_rate": 4.362036939683875e-05, "loss": 2.3693, "step": 36420 }, { "epoch": 0.12785196088739353, "grad_norm": 0.6244445443153381, "learning_rate": 4.3609855255318405e-05, "loss": 2.3536, "step": 36480 }, { "epoch": 0.12806224371780045, "grad_norm": 0.6203553080558777, "learning_rate": 4.3599341113798056e-05, "loss": 2.3531, "step": 36540 }, { "epoch": 0.12827252654820734, "grad_norm": 0.6234232187271118, "learning_rate": 4.3588826972277715e-05, "loss": 2.3542, "step": 36600 }, { "epoch": 0.12848280937861423, "grad_norm": 0.6129841208457947, "learning_rate": 4.3578312830757374e-05, "loss": 2.344, "step": 36660 }, { "epoch": 0.12869309220902114, "grad_norm": 0.6914127469062805, "learning_rate": 4.3567798689237026e-05, "loss": 2.343, "step": 36720 }, { "epoch": 0.12890337503942803, "grad_norm": 0.6331791281700134, "learning_rate": 4.3557284547716684e-05, "loss": 2.3464, "step": 36780 }, { "epoch": 0.12911365786983492, "grad_norm": 0.6370502710342407, "learning_rate": 4.3546770406196336e-05, "loss": 2.3419, "step": 36840 }, { "epoch": 0.12932394070024184, "grad_norm": 0.6535687446594238, "learning_rate": 4.353625626467599e-05, "loss": 2.3459, "step": 36900 }, { "epoch": 0.12953422353064872, "grad_norm": 0.6368798613548279, "learning_rate": 4.352591735884765e-05, "loss": 2.3495, "step": 36960 }, { "epoch": 0.1297445063610556, "grad_norm": 0.6685027480125427, "learning_rate": 4.351557845301931e-05, "loss": 2.3321, "step": 37020 }, { "epoch": 0.12995478919146253, "grad_norm": 0.6186925768852234, "learning_rate": 4.350506431149897e-05, "loss": 2.3328, "step": 37080 }, { "epoch": 0.13016507202186942, "grad_norm": 0.6169596314430237, "learning_rate": 4.349455016997862e-05, "loss": 2.3439, "step": 37140 }, { "epoch": 0.1303753548522763, "grad_norm": 0.6129602789878845, "learning_rate": 4.348403602845828e-05, "loss": 2.344, "step": 37200 }, { "epoch": 0.13058563768268322, "grad_norm": 0.6711073517799377, "learning_rate": 4.347352188693794e-05, "loss": 2.349, "step": 37260 }, { "epoch": 0.1307959205130901, "grad_norm": 0.6636022925376892, "learning_rate": 4.346300774541759e-05, "loss": 2.3578, "step": 37320 }, { "epoch": 0.131006203343497, "grad_norm": 0.6326186060905457, "learning_rate": 4.345249360389724e-05, "loss": 2.3404, "step": 37380 }, { "epoch": 0.1312164861739039, "grad_norm": 0.6219196915626526, "learning_rate": 4.34419794623769e-05, "loss": 2.3423, "step": 37440 }, { "epoch": 0.1314267690043108, "grad_norm": 0.6334632635116577, "learning_rate": 4.343146532085655e-05, "loss": 2.3453, "step": 37500 }, { "epoch": 0.1316370518347177, "grad_norm": 0.6453350782394409, "learning_rate": 4.342095117933621e-05, "loss": 2.3581, "step": 37560 }, { "epoch": 0.1318473346651246, "grad_norm": 0.637695848941803, "learning_rate": 4.341043703781586e-05, "loss": 2.3506, "step": 37620 }, { "epoch": 0.1320576174955315, "grad_norm": 0.6321239471435547, "learning_rate": 4.339992289629552e-05, "loss": 2.3492, "step": 37680 }, { "epoch": 0.13226790032593838, "grad_norm": 0.5901476740837097, "learning_rate": 4.338940875477517e-05, "loss": 2.343, "step": 37740 }, { "epoch": 0.1324781831563453, "grad_norm": 0.6054787635803223, "learning_rate": 4.337889461325483e-05, "loss": 2.3409, "step": 37800 }, { "epoch": 0.13268846598675219, "grad_norm": 0.6033499240875244, "learning_rate": 4.336838047173449e-05, "loss": 2.3468, "step": 37860 }, { "epoch": 0.13289874881715907, "grad_norm": 0.6457664370536804, "learning_rate": 4.335786633021414e-05, "loss": 2.3436, "step": 37920 }, { "epoch": 0.133109031647566, "grad_norm": 0.6080107688903809, "learning_rate": 4.3347352188693794e-05, "loss": 2.3483, "step": 37980 }, { "epoch": 0.13331931447797288, "grad_norm": 0.6590386629104614, "learning_rate": 4.333683804717345e-05, "loss": 2.3528, "step": 38040 }, { "epoch": 0.13352959730837977, "grad_norm": 0.6028419137001038, "learning_rate": 4.3326323905653104e-05, "loss": 2.3359, "step": 38100 }, { "epoch": 0.13373988013878665, "grad_norm": 0.7010486721992493, "learning_rate": 4.3315809764132756e-05, "loss": 2.3474, "step": 38160 }, { "epoch": 0.13395016296919357, "grad_norm": 0.6186957955360413, "learning_rate": 4.330529562261242e-05, "loss": 2.3457, "step": 38220 }, { "epoch": 0.13416044579960046, "grad_norm": 0.6543868184089661, "learning_rate": 4.3294781481092073e-05, "loss": 2.3572, "step": 38280 }, { "epoch": 0.13437072863000735, "grad_norm": 0.613800048828125, "learning_rate": 4.3284267339571725e-05, "loss": 2.3444, "step": 38340 }, { "epoch": 0.13458101146041426, "grad_norm": 0.6139937043190002, "learning_rate": 4.3273753198051384e-05, "loss": 2.3632, "step": 38400 }, { "epoch": 0.13479129429082115, "grad_norm": 0.6320657730102539, "learning_rate": 4.3263239056531036e-05, "loss": 2.3523, "step": 38460 }, { "epoch": 0.13500157712122804, "grad_norm": 0.6350893974304199, "learning_rate": 4.325272491501069e-05, "loss": 2.3477, "step": 38520 }, { "epoch": 0.13521185995163496, "grad_norm": 0.6405870914459229, "learning_rate": 4.3242210773490346e-05, "loss": 2.3401, "step": 38580 }, { "epoch": 0.13542214278204184, "grad_norm": 0.6332811117172241, "learning_rate": 4.3231696631970005e-05, "loss": 2.3313, "step": 38640 }, { "epoch": 0.13563242561244873, "grad_norm": 0.6080389022827148, "learning_rate": 4.322118249044966e-05, "loss": 2.3336, "step": 38700 }, { "epoch": 0.13584270844285565, "grad_norm": 0.5950392484664917, "learning_rate": 4.321066834892931e-05, "loss": 2.3321, "step": 38760 }, { "epoch": 0.13605299127326254, "grad_norm": 0.5927405953407288, "learning_rate": 4.320015420740897e-05, "loss": 2.3487, "step": 38820 }, { "epoch": 0.13626327410366942, "grad_norm": 0.6823748350143433, "learning_rate": 4.318964006588862e-05, "loss": 2.3424, "step": 38880 }, { "epoch": 0.13647355693407634, "grad_norm": 0.6291486024856567, "learning_rate": 4.317912592436828e-05, "loss": 2.348, "step": 38940 }, { "epoch": 0.13668383976448323, "grad_norm": 0.608342170715332, "learning_rate": 4.316861178284794e-05, "loss": 2.3439, "step": 39000 }, { "epoch": 0.13689412259489012, "grad_norm": 0.6415091156959534, "learning_rate": 4.315809764132759e-05, "loss": 2.3409, "step": 39060 }, { "epoch": 0.13710440542529703, "grad_norm": 0.6027123928070068, "learning_rate": 4.314758349980724e-05, "loss": 2.3452, "step": 39120 }, { "epoch": 0.13731468825570392, "grad_norm": 0.6144362688064575, "learning_rate": 4.31370693582869e-05, "loss": 2.3268, "step": 39180 }, { "epoch": 0.1375249710861108, "grad_norm": 0.616381824016571, "learning_rate": 4.3126730452458556e-05, "loss": 2.3301, "step": 39240 }, { "epoch": 0.13773525391651772, "grad_norm": 0.5856786370277405, "learning_rate": 4.3116216310938215e-05, "loss": 2.3289, "step": 39300 }, { "epoch": 0.1379455367469246, "grad_norm": 0.612820565700531, "learning_rate": 4.310570216941787e-05, "loss": 2.3392, "step": 39360 }, { "epoch": 0.1381558195773315, "grad_norm": 0.653420090675354, "learning_rate": 4.3095188027897526e-05, "loss": 2.3422, "step": 39420 }, { "epoch": 0.13836610240773842, "grad_norm": 0.6993768215179443, "learning_rate": 4.308467388637718e-05, "loss": 2.3276, "step": 39480 }, { "epoch": 0.1385763852381453, "grad_norm": 0.620498538017273, "learning_rate": 4.3074159744856836e-05, "loss": 2.327, "step": 39540 }, { "epoch": 0.1387866680685522, "grad_norm": 0.6352885365486145, "learning_rate": 4.306364560333649e-05, "loss": 2.3349, "step": 39600 }, { "epoch": 0.1389969508989591, "grad_norm": 0.6150739789009094, "learning_rate": 4.305313146181614e-05, "loss": 2.34, "step": 39660 }, { "epoch": 0.139207233729366, "grad_norm": 0.6017259359359741, "learning_rate": 4.30426173202958e-05, "loss": 2.3376, "step": 39720 }, { "epoch": 0.13941751655977289, "grad_norm": 0.6711851358413696, "learning_rate": 4.303210317877546e-05, "loss": 2.351, "step": 39780 }, { "epoch": 0.1396277993901798, "grad_norm": 0.7069224715232849, "learning_rate": 4.302158903725511e-05, "loss": 2.3402, "step": 39840 }, { "epoch": 0.1398380822205867, "grad_norm": 0.5861261487007141, "learning_rate": 4.301107489573477e-05, "loss": 2.3418, "step": 39900 }, { "epoch": 0.14004836505099358, "grad_norm": 0.6228580474853516, "learning_rate": 4.300056075421442e-05, "loss": 2.3228, "step": 39960 }, { "epoch": 0.1402586478814005, "grad_norm": 0.6415411233901978, "learning_rate": 4.299004661269407e-05, "loss": 2.3498, "step": 40020 }, { "epoch": 0.14046893071180738, "grad_norm": 0.6277108192443848, "learning_rate": 4.297953247117373e-05, "loss": 2.348, "step": 40080 }, { "epoch": 0.14067921354221427, "grad_norm": 0.6571968793869019, "learning_rate": 4.296901832965339e-05, "loss": 2.3394, "step": 40140 }, { "epoch": 0.1408894963726212, "grad_norm": 0.6432043313980103, "learning_rate": 4.295850418813304e-05, "loss": 2.3297, "step": 40200 }, { "epoch": 0.14109977920302808, "grad_norm": 0.6432401537895203, "learning_rate": 4.294799004661269e-05, "loss": 2.3301, "step": 40260 }, { "epoch": 0.14131006203343496, "grad_norm": 0.6234648823738098, "learning_rate": 4.293747590509235e-05, "loss": 2.3284, "step": 40320 }, { "epoch": 0.14152034486384188, "grad_norm": 0.6374291181564331, "learning_rate": 4.2926961763572e-05, "loss": 2.3368, "step": 40380 }, { "epoch": 0.14173062769424877, "grad_norm": 0.6450021862983704, "learning_rate": 4.291644762205166e-05, "loss": 2.3456, "step": 40440 }, { "epoch": 0.14194091052465566, "grad_norm": 0.5875654220581055, "learning_rate": 4.290593348053132e-05, "loss": 2.3504, "step": 40500 }, { "epoch": 0.14215119335506257, "grad_norm": 0.5929096937179565, "learning_rate": 4.289541933901097e-05, "loss": 2.3415, "step": 40560 }, { "epoch": 0.14236147618546946, "grad_norm": 0.6588292121887207, "learning_rate": 4.2884905197490624e-05, "loss": 2.3225, "step": 40620 }, { "epoch": 0.14257175901587635, "grad_norm": 0.6448934078216553, "learning_rate": 4.287439105597028e-05, "loss": 2.3272, "step": 40680 }, { "epoch": 0.14278204184628326, "grad_norm": 0.6730201840400696, "learning_rate": 4.2863876914449935e-05, "loss": 2.3358, "step": 40740 }, { "epoch": 0.14299232467669015, "grad_norm": 0.6035958528518677, "learning_rate": 4.2853362772929594e-05, "loss": 2.32, "step": 40800 }, { "epoch": 0.14320260750709704, "grad_norm": 0.5733596086502075, "learning_rate": 4.284284863140925e-05, "loss": 2.3386, "step": 40860 }, { "epoch": 0.14341289033750396, "grad_norm": 0.6223961710929871, "learning_rate": 4.2832334489888904e-05, "loss": 2.339, "step": 40920 }, { "epoch": 0.14362317316791084, "grad_norm": 0.6346945762634277, "learning_rate": 4.2821820348368556e-05, "loss": 2.3336, "step": 40980 }, { "epoch": 0.14383345599831773, "grad_norm": 0.6121115684509277, "learning_rate": 4.2811306206848215e-05, "loss": 2.3331, "step": 41040 }, { "epoch": 0.14404373882872462, "grad_norm": 0.6739269495010376, "learning_rate": 4.280079206532787e-05, "loss": 2.3314, "step": 41100 }, { "epoch": 0.14425402165913154, "grad_norm": 0.6606531143188477, "learning_rate": 4.2790277923807525e-05, "loss": 2.3389, "step": 41160 }, { "epoch": 0.14446430448953843, "grad_norm": 0.6564496159553528, "learning_rate": 4.277976378228718e-05, "loss": 2.3303, "step": 41220 }, { "epoch": 0.1446745873199453, "grad_norm": 0.6618686318397522, "learning_rate": 4.2769249640766836e-05, "loss": 2.3352, "step": 41280 }, { "epoch": 0.14488487015035223, "grad_norm": 0.6552087068557739, "learning_rate": 4.275873549924649e-05, "loss": 2.3315, "step": 41340 }, { "epoch": 0.14509515298075912, "grad_norm": 0.6370447874069214, "learning_rate": 4.274822135772614e-05, "loss": 2.3395, "step": 41400 }, { "epoch": 0.145305435811166, "grad_norm": 0.6118485927581787, "learning_rate": 4.27377072162058e-05, "loss": 2.3397, "step": 41460 }, { "epoch": 0.14551571864157292, "grad_norm": 0.6253863573074341, "learning_rate": 4.272719307468546e-05, "loss": 2.3379, "step": 41520 }, { "epoch": 0.1457260014719798, "grad_norm": 0.6244592666625977, "learning_rate": 4.271667893316511e-05, "loss": 2.3415, "step": 41580 }, { "epoch": 0.1459362843023867, "grad_norm": 0.6361585259437561, "learning_rate": 4.270616479164477e-05, "loss": 2.3334, "step": 41640 }, { "epoch": 0.14614656713279361, "grad_norm": 0.6284440159797668, "learning_rate": 4.2695825885816425e-05, "loss": 2.3311, "step": 41700 }, { "epoch": 0.1463568499632005, "grad_norm": 0.6048732995986938, "learning_rate": 4.2685311744296083e-05, "loss": 2.3239, "step": 41760 }, { "epoch": 0.1465671327936074, "grad_norm": 0.6068998575210571, "learning_rate": 4.2674797602775735e-05, "loss": 2.3411, "step": 41820 }, { "epoch": 0.1467774156240143, "grad_norm": 0.6173762083053589, "learning_rate": 4.266428346125539e-05, "loss": 2.3328, "step": 41880 }, { "epoch": 0.1469876984544212, "grad_norm": 0.6197360157966614, "learning_rate": 4.2653769319735046e-05, "loss": 2.329, "step": 41940 }, { "epoch": 0.14719798128482808, "grad_norm": 0.6429170966148376, "learning_rate": 4.2643255178214705e-05, "loss": 2.3444, "step": 42000 }, { "epoch": 0.147408264115235, "grad_norm": 0.6106884479522705, "learning_rate": 4.2632741036694356e-05, "loss": 2.3446, "step": 42060 }, { "epoch": 0.1476185469456419, "grad_norm": 0.6438376903533936, "learning_rate": 4.262222689517401e-05, "loss": 2.3502, "step": 42120 }, { "epoch": 0.14782882977604878, "grad_norm": 0.6259479522705078, "learning_rate": 4.261171275365367e-05, "loss": 2.3399, "step": 42180 }, { "epoch": 0.1480391126064557, "grad_norm": 0.5971313714981079, "learning_rate": 4.260119861213332e-05, "loss": 2.325, "step": 42240 }, { "epoch": 0.14824939543686258, "grad_norm": 0.6188226342201233, "learning_rate": 4.259068447061297e-05, "loss": 2.3217, "step": 42300 }, { "epoch": 0.14845967826726947, "grad_norm": 0.6155485510826111, "learning_rate": 4.2580170329092636e-05, "loss": 2.336, "step": 42360 }, { "epoch": 0.14866996109767638, "grad_norm": 0.5590870976448059, "learning_rate": 4.256965618757229e-05, "loss": 2.3412, "step": 42420 }, { "epoch": 0.14888024392808327, "grad_norm": 0.686099112033844, "learning_rate": 4.255914204605194e-05, "loss": 2.3134, "step": 42480 }, { "epoch": 0.14909052675849016, "grad_norm": 0.6008979082107544, "learning_rate": 4.25486279045316e-05, "loss": 2.3503, "step": 42540 }, { "epoch": 0.14930080958889708, "grad_norm": 0.6094452738761902, "learning_rate": 4.253811376301125e-05, "loss": 2.3473, "step": 42600 }, { "epoch": 0.14951109241930396, "grad_norm": 0.6370200514793396, "learning_rate": 4.252759962149091e-05, "loss": 2.3404, "step": 42660 }, { "epoch": 0.14972137524971085, "grad_norm": 0.653773844242096, "learning_rate": 4.251708547997056e-05, "loss": 2.3368, "step": 42720 }, { "epoch": 0.14993165808011777, "grad_norm": 0.6160172820091248, "learning_rate": 4.250657133845022e-05, "loss": 2.3275, "step": 42780 }, { "epoch": 0.15014194091052466, "grad_norm": 0.5883677005767822, "learning_rate": 4.249605719692987e-05, "loss": 2.324, "step": 42840 }, { "epoch": 0.15035222374093155, "grad_norm": 0.6456954479217529, "learning_rate": 4.2485543055409524e-05, "loss": 2.3437, "step": 42900 }, { "epoch": 0.15056250657133846, "grad_norm": 0.6041179299354553, "learning_rate": 4.247502891388918e-05, "loss": 2.3269, "step": 42960 }, { "epoch": 0.15077278940174535, "grad_norm": 0.5934765338897705, "learning_rate": 4.246451477236884e-05, "loss": 2.3242, "step": 43020 }, { "epoch": 0.15098307223215224, "grad_norm": 0.6217716336250305, "learning_rate": 4.245400063084849e-05, "loss": 2.3276, "step": 43080 }, { "epoch": 0.15119335506255915, "grad_norm": 0.58519446849823, "learning_rate": 4.244348648932815e-05, "loss": 2.3267, "step": 43140 }, { "epoch": 0.15140363789296604, "grad_norm": 0.6344335675239563, "learning_rate": 4.24329723478078e-05, "loss": 2.3307, "step": 43200 }, { "epoch": 0.15161392072337293, "grad_norm": 0.6420978903770447, "learning_rate": 4.2422458206287455e-05, "loss": 2.332, "step": 43260 }, { "epoch": 0.15182420355377985, "grad_norm": 0.6562826037406921, "learning_rate": 4.2411944064767114e-05, "loss": 2.3135, "step": 43320 }, { "epoch": 0.15203448638418673, "grad_norm": 0.6492423415184021, "learning_rate": 4.240142992324677e-05, "loss": 2.3133, "step": 43380 }, { "epoch": 0.15224476921459362, "grad_norm": 0.6478837132453918, "learning_rate": 4.2390915781726424e-05, "loss": 2.3247, "step": 43440 }, { "epoch": 0.15245505204500054, "grad_norm": 0.5964265465736389, "learning_rate": 4.238040164020608e-05, "loss": 2.3222, "step": 43500 }, { "epoch": 0.15266533487540743, "grad_norm": 0.6163307428359985, "learning_rate": 4.2369887498685735e-05, "loss": 2.3225, "step": 43560 }, { "epoch": 0.15287561770581432, "grad_norm": 0.6812646389007568, "learning_rate": 4.235937335716539e-05, "loss": 2.3266, "step": 43620 }, { "epoch": 0.15308590053622123, "grad_norm": 0.6045469045639038, "learning_rate": 4.2348859215645046e-05, "loss": 2.3195, "step": 43680 }, { "epoch": 0.15329618336662812, "grad_norm": 0.6138345003128052, "learning_rate": 4.2338345074124704e-05, "loss": 2.3475, "step": 43740 }, { "epoch": 0.153506466197035, "grad_norm": 0.6312504410743713, "learning_rate": 4.2327830932604356e-05, "loss": 2.3244, "step": 43800 }, { "epoch": 0.1537167490274419, "grad_norm": 0.5887283086776733, "learning_rate": 4.231731679108401e-05, "loss": 2.3253, "step": 43860 }, { "epoch": 0.1539270318578488, "grad_norm": 0.644916296005249, "learning_rate": 4.230680264956367e-05, "loss": 2.3534, "step": 43920 }, { "epoch": 0.1541373146882557, "grad_norm": 0.6598151922225952, "learning_rate": 4.229628850804332e-05, "loss": 2.3247, "step": 43980 }, { "epoch": 0.1543475975186626, "grad_norm": 0.6058512330055237, "learning_rate": 4.228577436652297e-05, "loss": 2.3278, "step": 44040 }, { "epoch": 0.1545578803490695, "grad_norm": 0.7185304760932922, "learning_rate": 4.2275260225002636e-05, "loss": 2.3231, "step": 44100 }, { "epoch": 0.1547681631794764, "grad_norm": 0.6237541437149048, "learning_rate": 4.226474608348229e-05, "loss": 2.3131, "step": 44160 }, { "epoch": 0.15497844600988328, "grad_norm": 0.6727291941642761, "learning_rate": 4.225423194196194e-05, "loss": 2.3176, "step": 44220 }, { "epoch": 0.1551887288402902, "grad_norm": 0.6596560478210449, "learning_rate": 4.22437178004416e-05, "loss": 2.324, "step": 44280 }, { "epoch": 0.15539901167069708, "grad_norm": 0.6178886890411377, "learning_rate": 4.223320365892125e-05, "loss": 2.3304, "step": 44340 }, { "epoch": 0.15560929450110397, "grad_norm": 0.6122422814369202, "learning_rate": 4.2222864753092914e-05, "loss": 2.3411, "step": 44400 }, { "epoch": 0.1558195773315109, "grad_norm": 0.6111167669296265, "learning_rate": 4.2212350611572566e-05, "loss": 2.327, "step": 44460 }, { "epoch": 0.15602986016191778, "grad_norm": 0.6797990798950195, "learning_rate": 4.220183647005222e-05, "loss": 2.3295, "step": 44520 }, { "epoch": 0.15624014299232467, "grad_norm": 0.5918498039245605, "learning_rate": 4.219132232853188e-05, "loss": 2.3297, "step": 44580 }, { "epoch": 0.15645042582273158, "grad_norm": 0.6331505179405212, "learning_rate": 4.2180808187011535e-05, "loss": 2.3268, "step": 44640 }, { "epoch": 0.15666070865313847, "grad_norm": 0.5968581438064575, "learning_rate": 4.217029404549119e-05, "loss": 2.3499, "step": 44700 }, { "epoch": 0.15687099148354536, "grad_norm": 0.7291754484176636, "learning_rate": 4.215977990397084e-05, "loss": 2.3382, "step": 44760 }, { "epoch": 0.15708127431395227, "grad_norm": 0.62226802110672, "learning_rate": 4.21492657624505e-05, "loss": 2.3379, "step": 44820 }, { "epoch": 0.15729155714435916, "grad_norm": 0.6174754500389099, "learning_rate": 4.2138751620930156e-05, "loss": 2.3331, "step": 44880 }, { "epoch": 0.15750183997476605, "grad_norm": 0.6238417625427246, "learning_rate": 4.212823747940981e-05, "loss": 2.3259, "step": 44940 }, { "epoch": 0.15771212280517297, "grad_norm": 0.6212115287780762, "learning_rate": 4.211772333788947e-05, "loss": 2.3301, "step": 45000 }, { "epoch": 0.15792240563557985, "grad_norm": 0.6135154962539673, "learning_rate": 4.210720919636912e-05, "loss": 2.3235, "step": 45060 }, { "epoch": 0.15813268846598674, "grad_norm": 0.6578890085220337, "learning_rate": 4.209669505484877e-05, "loss": 2.3273, "step": 45120 }, { "epoch": 0.15834297129639366, "grad_norm": 0.6077913045883179, "learning_rate": 4.208618091332843e-05, "loss": 2.3394, "step": 45180 }, { "epoch": 0.15855325412680055, "grad_norm": 0.6092454791069031, "learning_rate": 4.207566677180809e-05, "loss": 2.332, "step": 45240 }, { "epoch": 0.15876353695720744, "grad_norm": 0.6163653135299683, "learning_rate": 4.206515263028774e-05, "loss": 2.3237, "step": 45300 }, { "epoch": 0.15897381978761435, "grad_norm": 0.5905401706695557, "learning_rate": 4.205463848876739e-05, "loss": 2.3199, "step": 45360 }, { "epoch": 0.15918410261802124, "grad_norm": 0.6775098443031311, "learning_rate": 4.204412434724705e-05, "loss": 2.323, "step": 45420 }, { "epoch": 0.15939438544842813, "grad_norm": 0.5804296135902405, "learning_rate": 4.20336102057267e-05, "loss": 2.3197, "step": 45480 }, { "epoch": 0.15960466827883504, "grad_norm": 0.5796772241592407, "learning_rate": 4.2023096064206354e-05, "loss": 2.3177, "step": 45540 }, { "epoch": 0.15981495110924193, "grad_norm": 0.6753705739974976, "learning_rate": 4.201258192268602e-05, "loss": 2.3467, "step": 45600 }, { "epoch": 0.16002523393964882, "grad_norm": 0.580149233341217, "learning_rate": 4.200206778116567e-05, "loss": 2.3185, "step": 45660 }, { "epoch": 0.16023551677005574, "grad_norm": 0.5798102617263794, "learning_rate": 4.1991553639645324e-05, "loss": 2.3195, "step": 45720 }, { "epoch": 0.16044579960046262, "grad_norm": 0.6332999467849731, "learning_rate": 4.198103949812498e-05, "loss": 2.3237, "step": 45780 }, { "epoch": 0.1606560824308695, "grad_norm": 0.5849390029907227, "learning_rate": 4.1970525356604634e-05, "loss": 2.3137, "step": 45840 }, { "epoch": 0.16086636526127643, "grad_norm": 0.5851735472679138, "learning_rate": 4.1960011215084286e-05, "loss": 2.3095, "step": 45900 }, { "epoch": 0.16107664809168332, "grad_norm": 0.6217033267021179, "learning_rate": 4.194949707356395e-05, "loss": 2.343, "step": 45960 }, { "epoch": 0.1612869309220902, "grad_norm": 0.6043165922164917, "learning_rate": 4.19389829320436e-05, "loss": 2.3256, "step": 46020 }, { "epoch": 0.16149721375249712, "grad_norm": 0.608424186706543, "learning_rate": 4.1928468790523255e-05, "loss": 2.3435, "step": 46080 }, { "epoch": 0.161707496582904, "grad_norm": 0.6008337736129761, "learning_rate": 4.1917954649002914e-05, "loss": 2.3228, "step": 46140 }, { "epoch": 0.1619177794133109, "grad_norm": 0.5890358090400696, "learning_rate": 4.1907440507482566e-05, "loss": 2.3246, "step": 46200 }, { "epoch": 0.1621280622437178, "grad_norm": 0.5866249799728394, "learning_rate": 4.189692636596222e-05, "loss": 2.3048, "step": 46260 }, { "epoch": 0.1623383450741247, "grad_norm": 0.6539163589477539, "learning_rate": 4.1886412224441876e-05, "loss": 2.3154, "step": 46320 }, { "epoch": 0.1625486279045316, "grad_norm": 0.6630322933197021, "learning_rate": 4.1875898082921535e-05, "loss": 2.332, "step": 46380 }, { "epoch": 0.1627589107349385, "grad_norm": 0.6159778237342834, "learning_rate": 4.186538394140119e-05, "loss": 2.3144, "step": 46440 }, { "epoch": 0.1629691935653454, "grad_norm": 0.6124069690704346, "learning_rate": 4.185504503557285e-05, "loss": 2.3312, "step": 46500 }, { "epoch": 0.16317947639575228, "grad_norm": 0.5926678776741028, "learning_rate": 4.18445308940525e-05, "loss": 2.3286, "step": 46560 }, { "epoch": 0.16338975922615917, "grad_norm": 0.6444182395935059, "learning_rate": 4.1834016752532155e-05, "loss": 2.3426, "step": 46620 }, { "epoch": 0.1636000420565661, "grad_norm": 0.5816701650619507, "learning_rate": 4.182350261101181e-05, "loss": 2.3237, "step": 46680 }, { "epoch": 0.16381032488697297, "grad_norm": 0.6558911800384521, "learning_rate": 4.1812988469491465e-05, "loss": 2.3219, "step": 46740 }, { "epoch": 0.16402060771737986, "grad_norm": 0.5754097700119019, "learning_rate": 4.1802474327971124e-05, "loss": 2.3159, "step": 46800 }, { "epoch": 0.16423089054778678, "grad_norm": 0.6297584176063538, "learning_rate": 4.179196018645078e-05, "loss": 2.3223, "step": 46860 }, { "epoch": 0.16444117337819367, "grad_norm": 0.6096706390380859, "learning_rate": 4.1781446044930434e-05, "loss": 2.324, "step": 46920 }, { "epoch": 0.16465145620860056, "grad_norm": 0.5916000008583069, "learning_rate": 4.1770931903410086e-05, "loss": 2.3375, "step": 46980 }, { "epoch": 0.16486173903900747, "grad_norm": 0.5777989029884338, "learning_rate": 4.1760417761889745e-05, "loss": 2.3196, "step": 47040 }, { "epoch": 0.16507202186941436, "grad_norm": 0.6046702861785889, "learning_rate": 4.17499036203694e-05, "loss": 2.329, "step": 47100 }, { "epoch": 0.16528230469982125, "grad_norm": 0.5970363020896912, "learning_rate": 4.1739389478849056e-05, "loss": 2.3165, "step": 47160 }, { "epoch": 0.16549258753022816, "grad_norm": 0.5796226263046265, "learning_rate": 4.172887533732871e-05, "loss": 2.3139, "step": 47220 }, { "epoch": 0.16570287036063505, "grad_norm": 0.6245923042297363, "learning_rate": 4.1718361195808366e-05, "loss": 2.3177, "step": 47280 }, { "epoch": 0.16591315319104194, "grad_norm": 0.6365753412246704, "learning_rate": 4.170784705428802e-05, "loss": 2.3105, "step": 47340 }, { "epoch": 0.16612343602144886, "grad_norm": 0.6340218186378479, "learning_rate": 4.169733291276767e-05, "loss": 2.3289, "step": 47400 }, { "epoch": 0.16633371885185574, "grad_norm": 0.6164852976799011, "learning_rate": 4.1686818771247335e-05, "loss": 2.3347, "step": 47460 }, { "epoch": 0.16654400168226263, "grad_norm": 0.6079014539718628, "learning_rate": 4.167630462972699e-05, "loss": 2.3058, "step": 47520 }, { "epoch": 0.16675428451266955, "grad_norm": 0.6427564024925232, "learning_rate": 4.166579048820664e-05, "loss": 2.2973, "step": 47580 }, { "epoch": 0.16696456734307644, "grad_norm": 0.6605886220932007, "learning_rate": 4.16552763466863e-05, "loss": 2.3225, "step": 47640 }, { "epoch": 0.16717485017348332, "grad_norm": 0.6974831223487854, "learning_rate": 4.164476220516595e-05, "loss": 2.3205, "step": 47700 }, { "epoch": 0.16738513300389024, "grad_norm": 0.6362879872322083, "learning_rate": 4.16342480636456e-05, "loss": 2.3157, "step": 47760 }, { "epoch": 0.16759541583429713, "grad_norm": 0.5881654024124146, "learning_rate": 4.162373392212526e-05, "loss": 2.3122, "step": 47820 }, { "epoch": 0.16780569866470402, "grad_norm": 0.6076338291168213, "learning_rate": 4.161321978060492e-05, "loss": 2.3195, "step": 47880 }, { "epoch": 0.16801598149511093, "grad_norm": 0.6087121367454529, "learning_rate": 4.160270563908457e-05, "loss": 2.3136, "step": 47940 }, { "epoch": 0.16822626432551782, "grad_norm": 0.5977998971939087, "learning_rate": 4.159219149756422e-05, "loss": 2.3206, "step": 48000 }, { "epoch": 0.1684365471559247, "grad_norm": 0.638401985168457, "learning_rate": 4.158167735604388e-05, "loss": 2.3223, "step": 48060 }, { "epoch": 0.16864682998633163, "grad_norm": 0.5896310806274414, "learning_rate": 4.157116321452353e-05, "loss": 2.3258, "step": 48120 }, { "epoch": 0.1688571128167385, "grad_norm": 0.6041130423545837, "learning_rate": 4.156064907300319e-05, "loss": 2.3068, "step": 48180 }, { "epoch": 0.1690673956471454, "grad_norm": 0.6105507016181946, "learning_rate": 4.155013493148285e-05, "loss": 2.3208, "step": 48240 }, { "epoch": 0.16927767847755232, "grad_norm": 0.6303457021713257, "learning_rate": 4.15396207899625e-05, "loss": 2.3028, "step": 48300 }, { "epoch": 0.1694879613079592, "grad_norm": 0.6402522325515747, "learning_rate": 4.1529106648442154e-05, "loss": 2.3328, "step": 48360 }, { "epoch": 0.1696982441383661, "grad_norm": 0.622160017490387, "learning_rate": 4.151859250692181e-05, "loss": 2.314, "step": 48420 }, { "epoch": 0.169908526968773, "grad_norm": 0.5710818767547607, "learning_rate": 4.1508078365401465e-05, "loss": 2.3165, "step": 48480 }, { "epoch": 0.1701188097991799, "grad_norm": 0.6494461297988892, "learning_rate": 4.1497564223881124e-05, "loss": 2.3201, "step": 48540 }, { "epoch": 0.1703290926295868, "grad_norm": 0.586834728717804, "learning_rate": 4.1487050082360775e-05, "loss": 2.3251, "step": 48600 }, { "epoch": 0.1705393754599937, "grad_norm": 0.6685080528259277, "learning_rate": 4.1476535940840434e-05, "loss": 2.3327, "step": 48660 }, { "epoch": 0.1707496582904006, "grad_norm": 0.5974341034889221, "learning_rate": 4.1466021799320086e-05, "loss": 2.3246, "step": 48720 }, { "epoch": 0.17095994112080748, "grad_norm": 0.5994948744773865, "learning_rate": 4.1455507657799745e-05, "loss": 2.3205, "step": 48780 }, { "epoch": 0.1711702239512144, "grad_norm": 0.6069355607032776, "learning_rate": 4.1444993516279397e-05, "loss": 2.3273, "step": 48840 }, { "epoch": 0.17138050678162128, "grad_norm": 0.6142827272415161, "learning_rate": 4.1434479374759055e-05, "loss": 2.3084, "step": 48900 }, { "epoch": 0.17159078961202817, "grad_norm": 0.6005485653877258, "learning_rate": 4.142396523323871e-05, "loss": 2.3108, "step": 48960 }, { "epoch": 0.1718010724424351, "grad_norm": 0.6493135690689087, "learning_rate": 4.1413451091718366e-05, "loss": 2.3054, "step": 49020 }, { "epoch": 0.17201135527284198, "grad_norm": 0.628297746181488, "learning_rate": 4.140293695019802e-05, "loss": 2.3217, "step": 49080 }, { "epoch": 0.17222163810324886, "grad_norm": 0.5488650798797607, "learning_rate": 4.139242280867767e-05, "loss": 2.3233, "step": 49140 }, { "epoch": 0.17243192093365578, "grad_norm": 0.6183356642723083, "learning_rate": 4.138190866715733e-05, "loss": 2.3258, "step": 49200 }, { "epoch": 0.17264220376406267, "grad_norm": 0.6453129649162292, "learning_rate": 4.1371569761328985e-05, "loss": 2.3094, "step": 49260 }, { "epoch": 0.17285248659446956, "grad_norm": 0.638854444026947, "learning_rate": 4.1361055619808644e-05, "loss": 2.3394, "step": 49320 }, { "epoch": 0.17306276942487647, "grad_norm": 0.6387530565261841, "learning_rate": 4.13505414782883e-05, "loss": 2.3375, "step": 49380 }, { "epoch": 0.17327305225528336, "grad_norm": 0.6037068963050842, "learning_rate": 4.1340027336767955e-05, "loss": 2.3175, "step": 49440 }, { "epoch": 0.17348333508569025, "grad_norm": 0.6161598563194275, "learning_rate": 4.132951319524761e-05, "loss": 2.3104, "step": 49500 }, { "epoch": 0.17369361791609714, "grad_norm": 0.5830373167991638, "learning_rate": 4.1318999053727265e-05, "loss": 2.3251, "step": 49560 }, { "epoch": 0.17390390074650405, "grad_norm": 0.6282650828361511, "learning_rate": 4.130848491220692e-05, "loss": 2.3182, "step": 49620 }, { "epoch": 0.17411418357691094, "grad_norm": 0.6157172918319702, "learning_rate": 4.1297970770686576e-05, "loss": 2.323, "step": 49680 }, { "epoch": 0.17432446640731783, "grad_norm": 0.5994360446929932, "learning_rate": 4.1287456629166234e-05, "loss": 2.3281, "step": 49740 }, { "epoch": 0.17453474923772475, "grad_norm": 0.6262275576591492, "learning_rate": 4.1276942487645886e-05, "loss": 2.3086, "step": 49800 }, { "epoch": 0.17474503206813163, "grad_norm": 0.672465443611145, "learning_rate": 4.126642834612554e-05, "loss": 2.2974, "step": 49860 }, { "epoch": 0.17495531489853852, "grad_norm": 0.586807370185852, "learning_rate": 4.12559142046052e-05, "loss": 2.3197, "step": 49920 }, { "epoch": 0.17516559772894544, "grad_norm": 0.6669567823410034, "learning_rate": 4.124540006308485e-05, "loss": 2.3177, "step": 49980 }, { "epoch": 0.17537588055935233, "grad_norm": 0.5670884847640991, "learning_rate": 4.123488592156451e-05, "loss": 2.3208, "step": 50040 }, { "epoch": 0.17558616338975921, "grad_norm": 0.5929347276687622, "learning_rate": 4.1224371780044166e-05, "loss": 2.324, "step": 50100 }, { "epoch": 0.17579644622016613, "grad_norm": 0.5971206426620483, "learning_rate": 4.121385763852382e-05, "loss": 2.3158, "step": 50160 }, { "epoch": 0.17600672905057302, "grad_norm": 0.5975629687309265, "learning_rate": 4.120334349700347e-05, "loss": 2.3187, "step": 50220 }, { "epoch": 0.1762170118809799, "grad_norm": 0.5721208453178406, "learning_rate": 4.119282935548313e-05, "loss": 2.3269, "step": 50280 }, { "epoch": 0.17642729471138682, "grad_norm": 0.593453586101532, "learning_rate": 4.118231521396278e-05, "loss": 2.3168, "step": 50340 }, { "epoch": 0.1766375775417937, "grad_norm": 0.6251164078712463, "learning_rate": 4.117180107244244e-05, "loss": 2.334, "step": 50400 }, { "epoch": 0.1768478603722006, "grad_norm": 0.616355836391449, "learning_rate": 4.116128693092209e-05, "loss": 2.3135, "step": 50460 }, { "epoch": 0.17705814320260752, "grad_norm": 0.6078146696090698, "learning_rate": 4.115077278940175e-05, "loss": 2.3236, "step": 50520 }, { "epoch": 0.1772684260330144, "grad_norm": 0.5824019908905029, "learning_rate": 4.11402586478814e-05, "loss": 2.3015, "step": 50580 }, { "epoch": 0.1774787088634213, "grad_norm": 0.6472707986831665, "learning_rate": 4.1129744506361053e-05, "loss": 2.2933, "step": 50640 }, { "epoch": 0.1776889916938282, "grad_norm": 0.599815845489502, "learning_rate": 4.111923036484071e-05, "loss": 2.3065, "step": 50700 }, { "epoch": 0.1778992745242351, "grad_norm": 0.591781735420227, "learning_rate": 4.110871622332037e-05, "loss": 2.3104, "step": 50760 }, { "epoch": 0.17810955735464198, "grad_norm": 0.5977340340614319, "learning_rate": 4.109820208180002e-05, "loss": 2.3213, "step": 50820 }, { "epoch": 0.1783198401850489, "grad_norm": 0.6299940347671509, "learning_rate": 4.108768794027968e-05, "loss": 2.3093, "step": 50880 }, { "epoch": 0.1785301230154558, "grad_norm": 0.6001994013786316, "learning_rate": 4.107717379875933e-05, "loss": 2.314, "step": 50940 }, { "epoch": 0.17874040584586268, "grad_norm": 0.6062608361244202, "learning_rate": 4.1066659657238985e-05, "loss": 2.3098, "step": 51000 }, { "epoch": 0.1789506886762696, "grad_norm": 0.6256763935089111, "learning_rate": 4.1056145515718644e-05, "loss": 2.3324, "step": 51060 }, { "epoch": 0.17916097150667648, "grad_norm": 0.5938847064971924, "learning_rate": 4.10456313741983e-05, "loss": 2.3, "step": 51120 }, { "epoch": 0.17937125433708337, "grad_norm": 0.5945448875427246, "learning_rate": 4.1035117232677954e-05, "loss": 2.3263, "step": 51180 }, { "epoch": 0.17958153716749028, "grad_norm": 0.5838569402694702, "learning_rate": 4.1024603091157606e-05, "loss": 2.3236, "step": 51240 }, { "epoch": 0.17979181999789717, "grad_norm": 0.6150086522102356, "learning_rate": 4.1014088949637265e-05, "loss": 2.3205, "step": 51300 }, { "epoch": 0.18000210282830406, "grad_norm": 0.6472041606903076, "learning_rate": 4.100357480811692e-05, "loss": 2.3125, "step": 51360 }, { "epoch": 0.18021238565871098, "grad_norm": 0.6316036581993103, "learning_rate": 4.0993060666596575e-05, "loss": 2.3159, "step": 51420 }, { "epoch": 0.18042266848911787, "grad_norm": 0.5957314968109131, "learning_rate": 4.0982546525076234e-05, "loss": 2.3227, "step": 51480 }, { "epoch": 0.18063295131952475, "grad_norm": 0.5779405236244202, "learning_rate": 4.0972032383555886e-05, "loss": 2.314, "step": 51540 }, { "epoch": 0.18084323414993167, "grad_norm": 0.5753687024116516, "learning_rate": 4.096151824203554e-05, "loss": 2.3037, "step": 51600 }, { "epoch": 0.18105351698033856, "grad_norm": 0.6188888549804688, "learning_rate": 4.0951004100515197e-05, "loss": 2.3152, "step": 51660 }, { "epoch": 0.18126379981074545, "grad_norm": 0.5780879259109497, "learning_rate": 4.094048995899485e-05, "loss": 2.3216, "step": 51720 }, { "epoch": 0.18147408264115236, "grad_norm": 0.6299008727073669, "learning_rate": 4.09299758174745e-05, "loss": 2.3047, "step": 51780 }, { "epoch": 0.18168436547155925, "grad_norm": 0.575741708278656, "learning_rate": 4.0919461675954166e-05, "loss": 2.305, "step": 51840 }, { "epoch": 0.18189464830196614, "grad_norm": 0.6241801977157593, "learning_rate": 4.090894753443382e-05, "loss": 2.2994, "step": 51900 }, { "epoch": 0.18210493113237305, "grad_norm": 0.5695961713790894, "learning_rate": 4.089843339291347e-05, "loss": 2.3188, "step": 51960 }, { "epoch": 0.18231521396277994, "grad_norm": 0.6108229160308838, "learning_rate": 4.088791925139313e-05, "loss": 2.3052, "step": 52020 }, { "epoch": 0.18252549679318683, "grad_norm": 0.6097207069396973, "learning_rate": 4.087740510987278e-05, "loss": 2.3067, "step": 52080 }, { "epoch": 0.18273577962359375, "grad_norm": 0.5715197324752808, "learning_rate": 4.086689096835243e-05, "loss": 2.307, "step": 52140 }, { "epoch": 0.18294606245400064, "grad_norm": 0.5958813428878784, "learning_rate": 4.085637682683209e-05, "loss": 2.3099, "step": 52200 }, { "epoch": 0.18315634528440752, "grad_norm": 0.6055279970169067, "learning_rate": 4.084586268531175e-05, "loss": 2.3367, "step": 52260 }, { "epoch": 0.1833666281148144, "grad_norm": 0.6448001861572266, "learning_rate": 4.08353485437914e-05, "loss": 2.3191, "step": 52320 }, { "epoch": 0.18357691094522133, "grad_norm": 0.5830652713775635, "learning_rate": 4.082483440227105e-05, "loss": 2.3125, "step": 52380 }, { "epoch": 0.18378719377562822, "grad_norm": 0.645237922668457, "learning_rate": 4.081432026075071e-05, "loss": 2.3147, "step": 52440 }, { "epoch": 0.1839974766060351, "grad_norm": 0.6656805276870728, "learning_rate": 4.0803806119230364e-05, "loss": 2.2889, "step": 52500 }, { "epoch": 0.18420775943644202, "grad_norm": 0.6012744307518005, "learning_rate": 4.079329197771002e-05, "loss": 2.3095, "step": 52560 }, { "epoch": 0.1844180422668489, "grad_norm": 0.6312623023986816, "learning_rate": 4.0782953071881686e-05, "loss": 2.3014, "step": 52620 }, { "epoch": 0.1846283250972558, "grad_norm": 0.6151764392852783, "learning_rate": 4.077243893036134e-05, "loss": 2.3117, "step": 52680 }, { "epoch": 0.1848386079276627, "grad_norm": 0.6274310350418091, "learning_rate": 4.0761924788841e-05, "loss": 2.3017, "step": 52740 }, { "epoch": 0.1850488907580696, "grad_norm": 0.6389456987380981, "learning_rate": 4.075141064732065e-05, "loss": 2.2992, "step": 52800 }, { "epoch": 0.1852591735884765, "grad_norm": 0.6331002116203308, "learning_rate": 4.07408965058003e-05, "loss": 2.3032, "step": 52860 }, { "epoch": 0.1854694564188834, "grad_norm": 0.5969839692115784, "learning_rate": 4.073038236427996e-05, "loss": 2.3023, "step": 52920 }, { "epoch": 0.1856797392492903, "grad_norm": 0.6058233976364136, "learning_rate": 4.071986822275962e-05, "loss": 2.3079, "step": 52980 }, { "epoch": 0.18589002207969718, "grad_norm": 0.600603461265564, "learning_rate": 4.070935408123927e-05, "loss": 2.3255, "step": 53040 }, { "epoch": 0.1861003049101041, "grad_norm": 0.5847122073173523, "learning_rate": 4.069883993971892e-05, "loss": 2.2967, "step": 53100 }, { "epoch": 0.18631058774051099, "grad_norm": 0.6589469313621521, "learning_rate": 4.068832579819858e-05, "loss": 2.3096, "step": 53160 }, { "epoch": 0.18652087057091787, "grad_norm": 0.5959056615829468, "learning_rate": 4.067781165667823e-05, "loss": 2.3039, "step": 53220 }, { "epoch": 0.1867311534013248, "grad_norm": 0.6054456830024719, "learning_rate": 4.0667297515157884e-05, "loss": 2.3166, "step": 53280 }, { "epoch": 0.18694143623173168, "grad_norm": 0.670484721660614, "learning_rate": 4.065678337363755e-05, "loss": 2.3216, "step": 53340 }, { "epoch": 0.18715171906213857, "grad_norm": 0.6493387222290039, "learning_rate": 4.06462692321172e-05, "loss": 2.3181, "step": 53400 }, { "epoch": 0.18736200189254548, "grad_norm": 0.5590341687202454, "learning_rate": 4.0635755090596853e-05, "loss": 2.3218, "step": 53460 }, { "epoch": 0.18757228472295237, "grad_norm": 0.5755470395088196, "learning_rate": 4.062524094907651e-05, "loss": 2.3121, "step": 53520 }, { "epoch": 0.18778256755335926, "grad_norm": 0.5840538144111633, "learning_rate": 4.0614726807556164e-05, "loss": 2.2996, "step": 53580 }, { "epoch": 0.18799285038376617, "grad_norm": 0.5699300765991211, "learning_rate": 4.0604212666035816e-05, "loss": 2.3193, "step": 53640 }, { "epoch": 0.18820313321417306, "grad_norm": 0.6303991675376892, "learning_rate": 4.0593698524515475e-05, "loss": 2.3158, "step": 53700 }, { "epoch": 0.18841341604457995, "grad_norm": 0.5751924514770508, "learning_rate": 4.058318438299513e-05, "loss": 2.2999, "step": 53760 }, { "epoch": 0.18862369887498687, "grad_norm": 0.5767543911933899, "learning_rate": 4.0572670241474785e-05, "loss": 2.3222, "step": 53820 }, { "epoch": 0.18883398170539376, "grad_norm": 0.6244462728500366, "learning_rate": 4.056215609995444e-05, "loss": 2.3067, "step": 53880 }, { "epoch": 0.18904426453580064, "grad_norm": 0.6316798329353333, "learning_rate": 4.05518171941261e-05, "loss": 2.3029, "step": 53940 }, { "epoch": 0.18925454736620756, "grad_norm": 0.6083167791366577, "learning_rate": 4.054130305260575e-05, "loss": 2.3053, "step": 54000 }, { "epoch": 0.18946483019661445, "grad_norm": 0.6451045870780945, "learning_rate": 4.053078891108541e-05, "loss": 2.3087, "step": 54060 }, { "epoch": 0.18967511302702134, "grad_norm": 0.558189868927002, "learning_rate": 4.052027476956506e-05, "loss": 2.309, "step": 54120 }, { "epoch": 0.18988539585742825, "grad_norm": 0.5685776472091675, "learning_rate": 4.050976062804472e-05, "loss": 2.2987, "step": 54180 }, { "epoch": 0.19009567868783514, "grad_norm": 0.5583335757255554, "learning_rate": 4.049924648652438e-05, "loss": 2.2946, "step": 54240 }, { "epoch": 0.19030596151824203, "grad_norm": 0.6071650981903076, "learning_rate": 4.048873234500403e-05, "loss": 2.3021, "step": 54300 }, { "epoch": 0.19051624434864894, "grad_norm": 0.5754538178443909, "learning_rate": 4.0478218203483684e-05, "loss": 2.3088, "step": 54360 }, { "epoch": 0.19072652717905583, "grad_norm": 0.5936737060546875, "learning_rate": 4.046770406196334e-05, "loss": 2.3125, "step": 54420 }, { "epoch": 0.19093681000946272, "grad_norm": 0.609310507774353, "learning_rate": 4.0457189920443e-05, "loss": 2.3399, "step": 54480 }, { "epoch": 0.19114709283986964, "grad_norm": 0.60650235414505, "learning_rate": 4.0446675778922654e-05, "loss": 2.3021, "step": 54540 }, { "epoch": 0.19135737567027652, "grad_norm": 0.5556688904762268, "learning_rate": 4.0436161637402306e-05, "loss": 2.3061, "step": 54600 }, { "epoch": 0.1915676585006834, "grad_norm": 0.668526291847229, "learning_rate": 4.0425647495881964e-05, "loss": 2.3005, "step": 54660 }, { "epoch": 0.19177794133109033, "grad_norm": 0.5854858160018921, "learning_rate": 4.0415133354361616e-05, "loss": 2.3084, "step": 54720 }, { "epoch": 0.19198822416149722, "grad_norm": 0.5923261642456055, "learning_rate": 4.0404619212841275e-05, "loss": 2.3094, "step": 54780 }, { "epoch": 0.1921985069919041, "grad_norm": 0.5517012476921082, "learning_rate": 4.0394105071320934e-05, "loss": 2.2968, "step": 54840 }, { "epoch": 0.19240878982231102, "grad_norm": 0.5993329286575317, "learning_rate": 4.0383590929800585e-05, "loss": 2.3174, "step": 54900 }, { "epoch": 0.1926190726527179, "grad_norm": 0.5918632745742798, "learning_rate": 4.037307678828024e-05, "loss": 2.3063, "step": 54960 }, { "epoch": 0.1928293554831248, "grad_norm": 0.5919117331504822, "learning_rate": 4.0362562646759896e-05, "loss": 2.2953, "step": 55020 }, { "epoch": 0.19303963831353171, "grad_norm": 0.6217793226242065, "learning_rate": 4.035204850523955e-05, "loss": 2.3082, "step": 55080 }, { "epoch": 0.1932499211439386, "grad_norm": 0.6075329780578613, "learning_rate": 4.03415343637192e-05, "loss": 2.3098, "step": 55140 }, { "epoch": 0.1934602039743455, "grad_norm": 0.5899872183799744, "learning_rate": 4.0331020222198865e-05, "loss": 2.312, "step": 55200 }, { "epoch": 0.19367048680475238, "grad_norm": 0.5910099744796753, "learning_rate": 4.032050608067852e-05, "loss": 2.3145, "step": 55260 }, { "epoch": 0.1938807696351593, "grad_norm": 0.6759595274925232, "learning_rate": 4.030999193915817e-05, "loss": 2.3064, "step": 55320 }, { "epoch": 0.19409105246556618, "grad_norm": 0.5908769369125366, "learning_rate": 4.029947779763783e-05, "loss": 2.3159, "step": 55380 }, { "epoch": 0.19430133529597307, "grad_norm": 0.5796085596084595, "learning_rate": 4.028896365611748e-05, "loss": 2.2936, "step": 55440 }, { "epoch": 0.19451161812638, "grad_norm": 0.623555064201355, "learning_rate": 4.027844951459713e-05, "loss": 2.3031, "step": 55500 }, { "epoch": 0.19472190095678688, "grad_norm": 0.610206663608551, "learning_rate": 4.026793537307679e-05, "loss": 2.3181, "step": 55560 }, { "epoch": 0.19493218378719376, "grad_norm": 0.5862197875976562, "learning_rate": 4.025742123155645e-05, "loss": 2.2862, "step": 55620 }, { "epoch": 0.19514246661760068, "grad_norm": 0.5683301091194153, "learning_rate": 4.02469070900361e-05, "loss": 2.3057, "step": 55680 }, { "epoch": 0.19535274944800757, "grad_norm": 0.5699648261070251, "learning_rate": 4.023639294851575e-05, "loss": 2.311, "step": 55740 }, { "epoch": 0.19556303227841446, "grad_norm": 0.5977410674095154, "learning_rate": 4.022587880699541e-05, "loss": 2.3056, "step": 55800 }, { "epoch": 0.19577331510882137, "grad_norm": 0.5807138681411743, "learning_rate": 4.021536466547506e-05, "loss": 2.3131, "step": 55860 }, { "epoch": 0.19598359793922826, "grad_norm": 0.5496934056282043, "learning_rate": 4.020485052395472e-05, "loss": 2.3164, "step": 55920 }, { "epoch": 0.19619388076963515, "grad_norm": 0.6061669588088989, "learning_rate": 4.019433638243438e-05, "loss": 2.3044, "step": 55980 }, { "epoch": 0.19640416360004206, "grad_norm": 0.5988995432853699, "learning_rate": 4.018382224091403e-05, "loss": 2.2983, "step": 56040 }, { "epoch": 0.19661444643044895, "grad_norm": 0.6225556135177612, "learning_rate": 4.0173308099393684e-05, "loss": 2.293, "step": 56100 }, { "epoch": 0.19682472926085584, "grad_norm": 0.5846222639083862, "learning_rate": 4.016279395787334e-05, "loss": 2.2929, "step": 56160 }, { "epoch": 0.19703501209126276, "grad_norm": 0.5997016429901123, "learning_rate": 4.0152279816352995e-05, "loss": 2.3001, "step": 56220 }, { "epoch": 0.19724529492166964, "grad_norm": 0.5757994055747986, "learning_rate": 4.0141765674832653e-05, "loss": 2.3129, "step": 56280 }, { "epoch": 0.19745557775207653, "grad_norm": 0.5928000211715698, "learning_rate": 4.0131251533312305e-05, "loss": 2.3106, "step": 56340 }, { "epoch": 0.19766586058248345, "grad_norm": 0.6106668710708618, "learning_rate": 4.0120737391791964e-05, "loss": 2.3057, "step": 56400 }, { "epoch": 0.19787614341289034, "grad_norm": 0.5892142057418823, "learning_rate": 4.0110223250271616e-05, "loss": 2.2884, "step": 56460 }, { "epoch": 0.19808642624329723, "grad_norm": 0.6026231646537781, "learning_rate": 4.009970910875127e-05, "loss": 2.31, "step": 56520 }, { "epoch": 0.19829670907370414, "grad_norm": 0.6028878092765808, "learning_rate": 4.0089194967230926e-05, "loss": 2.2865, "step": 56580 }, { "epoch": 0.19850699190411103, "grad_norm": 0.5761397480964661, "learning_rate": 4.0078680825710585e-05, "loss": 2.3016, "step": 56640 }, { "epoch": 0.19871727473451792, "grad_norm": 0.6288372278213501, "learning_rate": 4.006816668419024e-05, "loss": 2.3027, "step": 56700 }, { "epoch": 0.19892755756492483, "grad_norm": 0.5907147526741028, "learning_rate": 4.0057652542669896e-05, "loss": 2.3144, "step": 56760 }, { "epoch": 0.19913784039533172, "grad_norm": 0.5984941720962524, "learning_rate": 4.004713840114955e-05, "loss": 2.3066, "step": 56820 }, { "epoch": 0.1993481232257386, "grad_norm": 0.5911430716514587, "learning_rate": 4.00366242596292e-05, "loss": 2.2991, "step": 56880 }, { "epoch": 0.19955840605614553, "grad_norm": 0.6023363471031189, "learning_rate": 4.002611011810886e-05, "loss": 2.3113, "step": 56940 }, { "epoch": 0.19976868888655241, "grad_norm": 0.5403707027435303, "learning_rate": 4.001559597658852e-05, "loss": 2.313, "step": 57000 }, { "epoch": 0.1999789717169593, "grad_norm": 0.5953503847122192, "learning_rate": 4.000508183506817e-05, "loss": 2.3128, "step": 57060 }, { "epoch": 0.20018925454736622, "grad_norm": 0.5829181671142578, "learning_rate": 3.999456769354783e-05, "loss": 2.3079, "step": 57120 }, { "epoch": 0.2003995373777731, "grad_norm": 0.6059377789497375, "learning_rate": 3.998405355202748e-05, "loss": 2.3212, "step": 57180 }, { "epoch": 0.20060982020818, "grad_norm": 0.6386002898216248, "learning_rate": 3.997353941050713e-05, "loss": 2.318, "step": 57240 }, { "epoch": 0.2008201030385869, "grad_norm": 0.5913571119308472, "learning_rate": 3.996302526898679e-05, "loss": 2.3003, "step": 57300 }, { "epoch": 0.2010303858689938, "grad_norm": 0.571596086025238, "learning_rate": 3.995251112746645e-05, "loss": 2.3026, "step": 57360 }, { "epoch": 0.2012406686994007, "grad_norm": 0.6805840730667114, "learning_rate": 3.99419969859461e-05, "loss": 2.3095, "step": 57420 }, { "epoch": 0.2014509515298076, "grad_norm": 0.601517379283905, "learning_rate": 3.993148284442575e-05, "loss": 2.2977, "step": 57480 }, { "epoch": 0.2016612343602145, "grad_norm": 0.604802131652832, "learning_rate": 3.992096870290541e-05, "loss": 2.2887, "step": 57540 }, { "epoch": 0.20187151719062138, "grad_norm": 0.6532741189002991, "learning_rate": 3.991045456138506e-05, "loss": 2.2981, "step": 57600 }, { "epoch": 0.2020818000210283, "grad_norm": 0.5688276290893555, "learning_rate": 3.9899940419864715e-05, "loss": 2.3068, "step": 57660 }, { "epoch": 0.20229208285143518, "grad_norm": 0.6363274455070496, "learning_rate": 3.988942627834438e-05, "loss": 2.305, "step": 57720 }, { "epoch": 0.20250236568184207, "grad_norm": 0.5459544062614441, "learning_rate": 3.987891213682403e-05, "loss": 2.3052, "step": 57780 }, { "epoch": 0.202712648512249, "grad_norm": 0.6446651816368103, "learning_rate": 3.9868397995303684e-05, "loss": 2.3085, "step": 57840 }, { "epoch": 0.20292293134265588, "grad_norm": 0.6298580169677734, "learning_rate": 3.985788385378334e-05, "loss": 2.3129, "step": 57900 }, { "epoch": 0.20313321417306276, "grad_norm": 0.6669092774391174, "learning_rate": 3.9847369712262995e-05, "loss": 2.2961, "step": 57960 }, { "epoch": 0.20334349700346965, "grad_norm": 0.5807875990867615, "learning_rate": 3.983703080643466e-05, "loss": 2.3062, "step": 58020 }, { "epoch": 0.20355377983387657, "grad_norm": 0.6042791604995728, "learning_rate": 3.982651666491431e-05, "loss": 2.3205, "step": 58080 }, { "epoch": 0.20376406266428346, "grad_norm": 0.581607460975647, "learning_rate": 3.981600252339397e-05, "loss": 2.3042, "step": 58140 }, { "epoch": 0.20397434549469035, "grad_norm": 0.5742453336715698, "learning_rate": 3.980548838187362e-05, "loss": 2.3127, "step": 58200 }, { "epoch": 0.20418462832509726, "grad_norm": 0.5976558923721313, "learning_rate": 3.979497424035328e-05, "loss": 2.3015, "step": 58260 }, { "epoch": 0.20439491115550415, "grad_norm": 0.6173827052116394, "learning_rate": 3.978446009883293e-05, "loss": 2.2928, "step": 58320 }, { "epoch": 0.20460519398591104, "grad_norm": 0.5937358736991882, "learning_rate": 3.977394595731258e-05, "loss": 2.3008, "step": 58380 }, { "epoch": 0.20481547681631795, "grad_norm": 0.6002318859100342, "learning_rate": 3.976343181579224e-05, "loss": 2.3133, "step": 58440 }, { "epoch": 0.20502575964672484, "grad_norm": 0.6029914021492004, "learning_rate": 3.97529176742719e-05, "loss": 2.2874, "step": 58500 }, { "epoch": 0.20523604247713173, "grad_norm": 0.6260485649108887, "learning_rate": 3.974240353275155e-05, "loss": 2.2919, "step": 58560 }, { "epoch": 0.20544632530753865, "grad_norm": 0.5648069977760315, "learning_rate": 3.973188939123121e-05, "loss": 2.3182, "step": 58620 }, { "epoch": 0.20565660813794553, "grad_norm": 0.6081042289733887, "learning_rate": 3.972137524971086e-05, "loss": 2.2971, "step": 58680 }, { "epoch": 0.20586689096835242, "grad_norm": 0.5963698029518127, "learning_rate": 3.9710861108190515e-05, "loss": 2.311, "step": 58740 }, { "epoch": 0.20607717379875934, "grad_norm": 0.6485723257064819, "learning_rate": 3.9700346966670174e-05, "loss": 2.3095, "step": 58800 }, { "epoch": 0.20628745662916623, "grad_norm": 0.613882839679718, "learning_rate": 3.968983282514983e-05, "loss": 2.3224, "step": 58860 }, { "epoch": 0.20649773945957312, "grad_norm": 0.5611439347267151, "learning_rate": 3.9679318683629484e-05, "loss": 2.3028, "step": 58920 }, { "epoch": 0.20670802228998003, "grad_norm": 0.5237680673599243, "learning_rate": 3.9668804542109136e-05, "loss": 2.2831, "step": 58980 }, { "epoch": 0.20691830512038692, "grad_norm": 0.5781338214874268, "learning_rate": 3.9658290400588795e-05, "loss": 2.2963, "step": 59040 }, { "epoch": 0.2071285879507938, "grad_norm": 0.5803892016410828, "learning_rate": 3.964777625906845e-05, "loss": 2.3121, "step": 59100 }, { "epoch": 0.20733887078120072, "grad_norm": 0.5665882229804993, "learning_rate": 3.96372621175481e-05, "loss": 2.3089, "step": 59160 }, { "epoch": 0.2075491536116076, "grad_norm": 0.5831686854362488, "learning_rate": 3.9626747976027764e-05, "loss": 2.2884, "step": 59220 }, { "epoch": 0.2077594364420145, "grad_norm": 0.6503099203109741, "learning_rate": 3.9616233834507416e-05, "loss": 2.2925, "step": 59280 }, { "epoch": 0.20796971927242142, "grad_norm": 0.5764198303222656, "learning_rate": 3.960571969298707e-05, "loss": 2.2878, "step": 59340 }, { "epoch": 0.2081800021028283, "grad_norm": 0.5917394161224365, "learning_rate": 3.9595205551466726e-05, "loss": 2.3005, "step": 59400 }, { "epoch": 0.2083902849332352, "grad_norm": 0.6093656420707703, "learning_rate": 3.958469140994638e-05, "loss": 2.3097, "step": 59460 }, { "epoch": 0.2086005677636421, "grad_norm": 0.6051207184791565, "learning_rate": 3.957417726842603e-05, "loss": 2.3081, "step": 59520 }, { "epoch": 0.208810850594049, "grad_norm": 0.6025633811950684, "learning_rate": 3.9563663126905696e-05, "loss": 2.3096, "step": 59580 }, { "epoch": 0.20902113342445588, "grad_norm": 0.581791341304779, "learning_rate": 3.955314898538535e-05, "loss": 2.2989, "step": 59640 }, { "epoch": 0.2092314162548628, "grad_norm": 0.6260595917701721, "learning_rate": 3.9542634843865e-05, "loss": 2.3093, "step": 59700 }, { "epoch": 0.2094416990852697, "grad_norm": 0.5583322644233704, "learning_rate": 3.953212070234466e-05, "loss": 2.2863, "step": 59760 }, { "epoch": 0.20965198191567658, "grad_norm": 0.590339183807373, "learning_rate": 3.952160656082431e-05, "loss": 2.2983, "step": 59820 }, { "epoch": 0.2098622647460835, "grad_norm": 0.5544622540473938, "learning_rate": 3.951109241930396e-05, "loss": 2.2918, "step": 59880 }, { "epoch": 0.21007254757649038, "grad_norm": 0.5708463788032532, "learning_rate": 3.950057827778362e-05, "loss": 2.292, "step": 59940 }, { "epoch": 0.21028283040689727, "grad_norm": 0.617348313331604, "learning_rate": 3.949006413626328e-05, "loss": 2.308, "step": 60000 }, { "epoch": 0.21049311323730419, "grad_norm": 0.5817039608955383, "learning_rate": 3.947954999474293e-05, "loss": 2.3057, "step": 60060 }, { "epoch": 0.21070339606771107, "grad_norm": 0.5671586990356445, "learning_rate": 3.9469211088914595e-05, "loss": 2.3052, "step": 60120 }, { "epoch": 0.21091367889811796, "grad_norm": 0.5275577902793884, "learning_rate": 3.945869694739425e-05, "loss": 2.3053, "step": 60180 }, { "epoch": 0.21112396172852488, "grad_norm": 0.5590606927871704, "learning_rate": 3.94481828058739e-05, "loss": 2.2958, "step": 60240 }, { "epoch": 0.21133424455893177, "grad_norm": 0.5458212494850159, "learning_rate": 3.943766866435356e-05, "loss": 2.2908, "step": 60300 }, { "epoch": 0.21154452738933865, "grad_norm": 0.6047812700271606, "learning_rate": 3.9427154522833216e-05, "loss": 2.2973, "step": 60360 }, { "epoch": 0.21175481021974557, "grad_norm": 0.5924034714698792, "learning_rate": 3.941664038131287e-05, "loss": 2.2874, "step": 60420 }, { "epoch": 0.21196509305015246, "grad_norm": 0.6144199371337891, "learning_rate": 3.940612623979253e-05, "loss": 2.3011, "step": 60480 }, { "epoch": 0.21217537588055935, "grad_norm": 0.5948887467384338, "learning_rate": 3.939561209827218e-05, "loss": 2.2952, "step": 60540 }, { "epoch": 0.21238565871096626, "grad_norm": 0.5655609369277954, "learning_rate": 3.9385273192443836e-05, "loss": 2.2903, "step": 60600 }, { "epoch": 0.21259594154137315, "grad_norm": 0.5578904747962952, "learning_rate": 3.9374759050923494e-05, "loss": 2.2985, "step": 60660 }, { "epoch": 0.21280622437178004, "grad_norm": 0.5913036465644836, "learning_rate": 3.9364244909403146e-05, "loss": 2.2939, "step": 60720 }, { "epoch": 0.21301650720218693, "grad_norm": 0.5955153107643127, "learning_rate": 3.93537307678828e-05, "loss": 2.2823, "step": 60780 }, { "epoch": 0.21322679003259384, "grad_norm": 0.5599097013473511, "learning_rate": 3.9343216626362464e-05, "loss": 2.293, "step": 60840 }, { "epoch": 0.21343707286300073, "grad_norm": 0.5825154185295105, "learning_rate": 3.9332702484842116e-05, "loss": 2.3027, "step": 60900 }, { "epoch": 0.21364735569340762, "grad_norm": 0.6176333427429199, "learning_rate": 3.932218834332177e-05, "loss": 2.2903, "step": 60960 }, { "epoch": 0.21385763852381454, "grad_norm": 0.6115411520004272, "learning_rate": 3.9311674201801426e-05, "loss": 2.2938, "step": 61020 }, { "epoch": 0.21406792135422142, "grad_norm": 0.6089323163032532, "learning_rate": 3.930116006028108e-05, "loss": 2.2794, "step": 61080 }, { "epoch": 0.2142782041846283, "grad_norm": 0.5954441428184509, "learning_rate": 3.929064591876073e-05, "loss": 2.2942, "step": 61140 }, { "epoch": 0.21448848701503523, "grad_norm": 0.5842456221580505, "learning_rate": 3.9280131777240395e-05, "loss": 2.3082, "step": 61200 }, { "epoch": 0.21469876984544212, "grad_norm": 0.5549436211585999, "learning_rate": 3.926961763572005e-05, "loss": 2.2954, "step": 61260 }, { "epoch": 0.214909052675849, "grad_norm": 0.6503235101699829, "learning_rate": 3.92591034941997e-05, "loss": 2.2934, "step": 61320 }, { "epoch": 0.21511933550625592, "grad_norm": 0.5893281102180481, "learning_rate": 3.924858935267936e-05, "loss": 2.3044, "step": 61380 }, { "epoch": 0.2153296183366628, "grad_norm": 0.6054825186729431, "learning_rate": 3.923807521115901e-05, "loss": 2.3034, "step": 61440 }, { "epoch": 0.2155399011670697, "grad_norm": 0.7815125584602356, "learning_rate": 3.922756106963867e-05, "loss": 2.3021, "step": 61500 }, { "epoch": 0.2157501839974766, "grad_norm": 0.6160147190093994, "learning_rate": 3.921704692811832e-05, "loss": 2.2933, "step": 61560 }, { "epoch": 0.2159604668278835, "grad_norm": 0.6206346154212952, "learning_rate": 3.920653278659798e-05, "loss": 2.2961, "step": 61620 }, { "epoch": 0.2161707496582904, "grad_norm": 0.5747255682945251, "learning_rate": 3.919601864507763e-05, "loss": 2.2893, "step": 61680 }, { "epoch": 0.2163810324886973, "grad_norm": 0.5848297476768494, "learning_rate": 3.918550450355728e-05, "loss": 2.2864, "step": 61740 }, { "epoch": 0.2165913153191042, "grad_norm": 0.6018233895301819, "learning_rate": 3.917499036203694e-05, "loss": 2.2894, "step": 61800 }, { "epoch": 0.21680159814951108, "grad_norm": 0.5732107162475586, "learning_rate": 3.91644762205166e-05, "loss": 2.2931, "step": 61860 }, { "epoch": 0.217011880979918, "grad_norm": 0.6022079586982727, "learning_rate": 3.915396207899625e-05, "loss": 2.2948, "step": 61920 }, { "epoch": 0.2172221638103249, "grad_norm": 0.596991777420044, "learning_rate": 3.914344793747591e-05, "loss": 2.2946, "step": 61980 }, { "epoch": 0.21743244664073177, "grad_norm": 0.5973013043403625, "learning_rate": 3.913293379595556e-05, "loss": 2.2917, "step": 62040 }, { "epoch": 0.2176427294711387, "grad_norm": 0.6209733486175537, "learning_rate": 3.9122419654435214e-05, "loss": 2.2975, "step": 62100 }, { "epoch": 0.21785301230154558, "grad_norm": 0.5817129015922546, "learning_rate": 3.911190551291487e-05, "loss": 2.311, "step": 62160 }, { "epoch": 0.21806329513195247, "grad_norm": 0.6193563342094421, "learning_rate": 3.910139137139453e-05, "loss": 2.294, "step": 62220 }, { "epoch": 0.21827357796235938, "grad_norm": 0.5551738142967224, "learning_rate": 3.9090877229874184e-05, "loss": 2.2894, "step": 62280 }, { "epoch": 0.21848386079276627, "grad_norm": 0.6071308255195618, "learning_rate": 3.9080363088353835e-05, "loss": 2.299, "step": 62340 }, { "epoch": 0.21869414362317316, "grad_norm": 0.5778204798698425, "learning_rate": 3.9069848946833494e-05, "loss": 2.2934, "step": 62400 }, { "epoch": 0.21890442645358008, "grad_norm": 0.5507710576057434, "learning_rate": 3.9059334805313146e-05, "loss": 2.3035, "step": 62460 }, { "epoch": 0.21911470928398696, "grad_norm": 0.6067638993263245, "learning_rate": 3.90488206637928e-05, "loss": 2.3043, "step": 62520 }, { "epoch": 0.21932499211439385, "grad_norm": 0.5624009966850281, "learning_rate": 3.9038306522272463e-05, "loss": 2.28, "step": 62580 }, { "epoch": 0.21953527494480077, "grad_norm": 0.6132268905639648, "learning_rate": 3.9027792380752115e-05, "loss": 2.2813, "step": 62640 }, { "epoch": 0.21974555777520766, "grad_norm": 0.5769537091255188, "learning_rate": 3.901727823923177e-05, "loss": 2.2959, "step": 62700 }, { "epoch": 0.21995584060561454, "grad_norm": 0.5875435471534729, "learning_rate": 3.9006764097711426e-05, "loss": 2.3173, "step": 62760 }, { "epoch": 0.22016612343602146, "grad_norm": 0.6600687503814697, "learning_rate": 3.899624995619108e-05, "loss": 2.279, "step": 62820 }, { "epoch": 0.22037640626642835, "grad_norm": 0.5903270244598389, "learning_rate": 3.898573581467073e-05, "loss": 2.2965, "step": 62880 }, { "epoch": 0.22058668909683524, "grad_norm": 0.6876611113548279, "learning_rate": 3.897522167315039e-05, "loss": 2.3025, "step": 62940 }, { "epoch": 0.22079697192724215, "grad_norm": 0.5623339414596558, "learning_rate": 3.896470753163005e-05, "loss": 2.3011, "step": 63000 }, { "epoch": 0.22100725475764904, "grad_norm": 0.627585232257843, "learning_rate": 3.89541933901097e-05, "loss": 2.2827, "step": 63060 }, { "epoch": 0.22121753758805593, "grad_norm": 0.5718169808387756, "learning_rate": 3.894367924858936e-05, "loss": 2.281, "step": 63120 }, { "epoch": 0.22142782041846285, "grad_norm": 0.608727216720581, "learning_rate": 3.893316510706901e-05, "loss": 2.3046, "step": 63180 }, { "epoch": 0.22163810324886973, "grad_norm": 0.5940541625022888, "learning_rate": 3.892265096554866e-05, "loss": 2.2994, "step": 63240 }, { "epoch": 0.22184838607927662, "grad_norm": 0.6359009742736816, "learning_rate": 3.891213682402832e-05, "loss": 2.2937, "step": 63300 }, { "epoch": 0.22205866890968354, "grad_norm": 0.6548600792884827, "learning_rate": 3.890162268250798e-05, "loss": 2.2911, "step": 63360 }, { "epoch": 0.22226895174009043, "grad_norm": 0.6042421460151672, "learning_rate": 3.889110854098763e-05, "loss": 2.2918, "step": 63420 }, { "epoch": 0.22247923457049731, "grad_norm": 0.597143292427063, "learning_rate": 3.888059439946728e-05, "loss": 2.3005, "step": 63480 }, { "epoch": 0.22268951740090423, "grad_norm": 0.624727189540863, "learning_rate": 3.887008025794694e-05, "loss": 2.2891, "step": 63540 }, { "epoch": 0.22289980023131112, "grad_norm": 0.6133010387420654, "learning_rate": 3.885956611642659e-05, "loss": 2.2943, "step": 63600 }, { "epoch": 0.223110083061718, "grad_norm": 0.6171077489852905, "learning_rate": 3.884905197490625e-05, "loss": 2.2894, "step": 63660 }, { "epoch": 0.2233203658921249, "grad_norm": 0.6293420791625977, "learning_rate": 3.883853783338591e-05, "loss": 2.2883, "step": 63720 }, { "epoch": 0.2235306487225318, "grad_norm": 0.6117975115776062, "learning_rate": 3.882802369186556e-05, "loss": 2.2944, "step": 63780 }, { "epoch": 0.2237409315529387, "grad_norm": 0.5496307015419006, "learning_rate": 3.8817509550345214e-05, "loss": 2.2912, "step": 63840 }, { "epoch": 0.2239512143833456, "grad_norm": 0.7082515954971313, "learning_rate": 3.880699540882487e-05, "loss": 2.2942, "step": 63900 }, { "epoch": 0.2241614972137525, "grad_norm": 0.5779778957366943, "learning_rate": 3.8796481267304525e-05, "loss": 2.29, "step": 63960 }, { "epoch": 0.2243717800441594, "grad_norm": 0.5756336450576782, "learning_rate": 3.878596712578418e-05, "loss": 2.2815, "step": 64020 }, { "epoch": 0.22458206287456628, "grad_norm": 0.5937342643737793, "learning_rate": 3.8775452984263835e-05, "loss": 2.2985, "step": 64080 }, { "epoch": 0.2247923457049732, "grad_norm": 0.5614263415336609, "learning_rate": 3.8764938842743494e-05, "loss": 2.2983, "step": 64140 }, { "epoch": 0.22500262853538008, "grad_norm": 0.5805337429046631, "learning_rate": 3.8754424701223146e-05, "loss": 2.295, "step": 64200 }, { "epoch": 0.22521291136578697, "grad_norm": 0.6046919822692871, "learning_rate": 3.87439105597028e-05, "loss": 2.3119, "step": 64260 }, { "epoch": 0.2254231941961939, "grad_norm": 0.5776734948158264, "learning_rate": 3.8733396418182456e-05, "loss": 2.296, "step": 64320 }, { "epoch": 0.22563347702660078, "grad_norm": 0.5678257942199707, "learning_rate": 3.8722882276662115e-05, "loss": 2.2921, "step": 64380 }, { "epoch": 0.22584375985700766, "grad_norm": 0.5991799831390381, "learning_rate": 3.871236813514177e-05, "loss": 2.3095, "step": 64440 }, { "epoch": 0.22605404268741458, "grad_norm": 0.6048367619514465, "learning_rate": 3.8701853993621426e-05, "loss": 2.3009, "step": 64500 }, { "epoch": 0.22626432551782147, "grad_norm": 0.5879917740821838, "learning_rate": 3.869133985210108e-05, "loss": 2.2973, "step": 64560 }, { "epoch": 0.22647460834822836, "grad_norm": 0.601833701133728, "learning_rate": 3.868082571058073e-05, "loss": 2.2898, "step": 64620 }, { "epoch": 0.22668489117863527, "grad_norm": 0.6042653918266296, "learning_rate": 3.867031156906039e-05, "loss": 2.2839, "step": 64680 }, { "epoch": 0.22689517400904216, "grad_norm": 0.5772842764854431, "learning_rate": 3.8659972663232045e-05, "loss": 2.2911, "step": 64740 }, { "epoch": 0.22710545683944905, "grad_norm": 0.5636753439903259, "learning_rate": 3.8649458521711704e-05, "loss": 2.2955, "step": 64800 }, { "epoch": 0.22731573966985597, "grad_norm": 0.5744955539703369, "learning_rate": 3.863894438019136e-05, "loss": 2.2872, "step": 64860 }, { "epoch": 0.22752602250026285, "grad_norm": 0.575678288936615, "learning_rate": 3.8628430238671014e-05, "loss": 2.2935, "step": 64920 }, { "epoch": 0.22773630533066974, "grad_norm": 0.6447136402130127, "learning_rate": 3.8617916097150666e-05, "loss": 2.2983, "step": 64980 }, { "epoch": 0.22794658816107666, "grad_norm": 0.5995298624038696, "learning_rate": 3.8607401955630325e-05, "loss": 2.2867, "step": 65040 }, { "epoch": 0.22815687099148355, "grad_norm": 0.609114944934845, "learning_rate": 3.859688781410998e-05, "loss": 2.294, "step": 65100 }, { "epoch": 0.22836715382189043, "grad_norm": 0.615568220615387, "learning_rate": 3.8586373672589635e-05, "loss": 2.3041, "step": 65160 }, { "epoch": 0.22857743665229735, "grad_norm": 0.6183356046676636, "learning_rate": 3.8575859531069294e-05, "loss": 2.2961, "step": 65220 }, { "epoch": 0.22878771948270424, "grad_norm": 0.589805006980896, "learning_rate": 3.8565345389548946e-05, "loss": 2.2756, "step": 65280 }, { "epoch": 0.22899800231311113, "grad_norm": 0.6104287505149841, "learning_rate": 3.85548312480286e-05, "loss": 2.2903, "step": 65340 }, { "epoch": 0.22920828514351804, "grad_norm": 0.6121728420257568, "learning_rate": 3.854431710650826e-05, "loss": 2.288, "step": 65400 }, { "epoch": 0.22941856797392493, "grad_norm": 0.5765408277511597, "learning_rate": 3.853380296498791e-05, "loss": 2.2867, "step": 65460 }, { "epoch": 0.22962885080433182, "grad_norm": 0.6292064785957336, "learning_rate": 3.852328882346757e-05, "loss": 2.2856, "step": 65520 }, { "epoch": 0.22983913363473873, "grad_norm": 0.5729442834854126, "learning_rate": 3.851277468194722e-05, "loss": 2.2967, "step": 65580 }, { "epoch": 0.23004941646514562, "grad_norm": 0.5671252608299255, "learning_rate": 3.850226054042688e-05, "loss": 2.2795, "step": 65640 }, { "epoch": 0.2302596992955525, "grad_norm": 0.6085238456726074, "learning_rate": 3.849174639890653e-05, "loss": 2.2816, "step": 65700 }, { "epoch": 0.23046998212595943, "grad_norm": 0.5640025734901428, "learning_rate": 3.848123225738619e-05, "loss": 2.282, "step": 65760 }, { "epoch": 0.23068026495636632, "grad_norm": 0.5420010685920715, "learning_rate": 3.847071811586584e-05, "loss": 2.2999, "step": 65820 }, { "epoch": 0.2308905477867732, "grad_norm": 0.5791419148445129, "learning_rate": 3.84602039743455e-05, "loss": 2.288, "step": 65880 }, { "epoch": 0.23110083061718012, "grad_norm": 0.679742693901062, "learning_rate": 3.844968983282515e-05, "loss": 2.2958, "step": 65940 }, { "epoch": 0.231311113447587, "grad_norm": 0.6004040241241455, "learning_rate": 3.843917569130481e-05, "loss": 2.3013, "step": 66000 }, { "epoch": 0.2315213962779939, "grad_norm": 0.6382125616073608, "learning_rate": 3.842866154978446e-05, "loss": 2.2935, "step": 66060 }, { "epoch": 0.2317316791084008, "grad_norm": 0.5905836820602417, "learning_rate": 3.841814740826411e-05, "loss": 2.2975, "step": 66120 }, { "epoch": 0.2319419619388077, "grad_norm": 0.598331868648529, "learning_rate": 3.840763326674377e-05, "loss": 2.2866, "step": 66180 }, { "epoch": 0.2321522447692146, "grad_norm": 0.5576470494270325, "learning_rate": 3.839711912522343e-05, "loss": 2.2794, "step": 66240 }, { "epoch": 0.2323625275996215, "grad_norm": 0.6076599955558777, "learning_rate": 3.838660498370308e-05, "loss": 2.2855, "step": 66300 }, { "epoch": 0.2325728104300284, "grad_norm": 0.5614954829216003, "learning_rate": 3.837609084218274e-05, "loss": 2.2823, "step": 66360 }, { "epoch": 0.23278309326043528, "grad_norm": 0.5770753622055054, "learning_rate": 3.836557670066239e-05, "loss": 2.3076, "step": 66420 }, { "epoch": 0.23299337609084217, "grad_norm": 0.558489978313446, "learning_rate": 3.8355062559142045e-05, "loss": 2.2918, "step": 66480 }, { "epoch": 0.23320365892124909, "grad_norm": 0.6074410080909729, "learning_rate": 3.8344548417621704e-05, "loss": 2.2886, "step": 66540 }, { "epoch": 0.23341394175165597, "grad_norm": 0.5700733065605164, "learning_rate": 3.833403427610136e-05, "loss": 2.2817, "step": 66600 }, { "epoch": 0.23362422458206286, "grad_norm": 0.5876428484916687, "learning_rate": 3.8323520134581014e-05, "loss": 2.2925, "step": 66660 }, { "epoch": 0.23383450741246978, "grad_norm": 0.5912514925003052, "learning_rate": 3.8313005993060666e-05, "loss": 2.2865, "step": 66720 }, { "epoch": 0.23404479024287667, "grad_norm": 0.5970344543457031, "learning_rate": 3.830266708723233e-05, "loss": 2.3007, "step": 66780 }, { "epoch": 0.23425507307328355, "grad_norm": 0.615527331829071, "learning_rate": 3.829215294571198e-05, "loss": 2.2843, "step": 66840 }, { "epoch": 0.23446535590369047, "grad_norm": 0.6198700070381165, "learning_rate": 3.828163880419164e-05, "loss": 2.2807, "step": 66900 }, { "epoch": 0.23467563873409736, "grad_norm": 0.5815827250480652, "learning_rate": 3.827112466267129e-05, "loss": 2.2841, "step": 66960 }, { "epoch": 0.23488592156450425, "grad_norm": 0.5822238922119141, "learning_rate": 3.826061052115095e-05, "loss": 2.2937, "step": 67020 }, { "epoch": 0.23509620439491116, "grad_norm": 0.700834333896637, "learning_rate": 3.825009637963061e-05, "loss": 2.2888, "step": 67080 }, { "epoch": 0.23530648722531805, "grad_norm": 0.6379315853118896, "learning_rate": 3.823958223811026e-05, "loss": 2.2833, "step": 67140 }, { "epoch": 0.23551677005572494, "grad_norm": 0.566112220287323, "learning_rate": 3.8229068096589913e-05, "loss": 2.2859, "step": 67200 }, { "epoch": 0.23572705288613185, "grad_norm": 0.5224472284317017, "learning_rate": 3.821855395506957e-05, "loss": 2.2963, "step": 67260 }, { "epoch": 0.23593733571653874, "grad_norm": 0.6125534772872925, "learning_rate": 3.8208039813549224e-05, "loss": 2.2776, "step": 67320 }, { "epoch": 0.23614761854694563, "grad_norm": 0.5589282512664795, "learning_rate": 3.819752567202888e-05, "loss": 2.2687, "step": 67380 }, { "epoch": 0.23635790137735255, "grad_norm": 0.6050557494163513, "learning_rate": 3.8187011530508535e-05, "loss": 2.2958, "step": 67440 }, { "epoch": 0.23656818420775944, "grad_norm": 0.5693143010139465, "learning_rate": 3.817649738898819e-05, "loss": 2.281, "step": 67500 }, { "epoch": 0.23677846703816632, "grad_norm": 0.6809577941894531, "learning_rate": 3.8165983247467845e-05, "loss": 2.3013, "step": 67560 }, { "epoch": 0.23698874986857324, "grad_norm": 0.5879551768302917, "learning_rate": 3.81554691059475e-05, "loss": 2.2664, "step": 67620 }, { "epoch": 0.23719903269898013, "grad_norm": 0.5711112022399902, "learning_rate": 3.8144954964427156e-05, "loss": 2.2878, "step": 67680 }, { "epoch": 0.23740931552938702, "grad_norm": 0.5781254172325134, "learning_rate": 3.8134440822906814e-05, "loss": 2.2977, "step": 67740 }, { "epoch": 0.23761959835979393, "grad_norm": 0.6531150341033936, "learning_rate": 3.8123926681386466e-05, "loss": 2.2855, "step": 67800 }, { "epoch": 0.23782988119020082, "grad_norm": 0.5889065265655518, "learning_rate": 3.8113412539866125e-05, "loss": 2.3085, "step": 67860 }, { "epoch": 0.2380401640206077, "grad_norm": 0.5385099649429321, "learning_rate": 3.810289839834578e-05, "loss": 2.2844, "step": 67920 }, { "epoch": 0.23825044685101462, "grad_norm": 0.5548147559165955, "learning_rate": 3.809238425682543e-05, "loss": 2.2847, "step": 67980 }, { "epoch": 0.2384607296814215, "grad_norm": 0.6021646857261658, "learning_rate": 3.808187011530509e-05, "loss": 2.2768, "step": 68040 }, { "epoch": 0.2386710125118284, "grad_norm": 0.592646598815918, "learning_rate": 3.8071355973784746e-05, "loss": 2.2908, "step": 68100 }, { "epoch": 0.23888129534223532, "grad_norm": 0.6466455459594727, "learning_rate": 3.80608418322644e-05, "loss": 2.2919, "step": 68160 }, { "epoch": 0.2390915781726422, "grad_norm": 0.5685292482376099, "learning_rate": 3.805032769074405e-05, "loss": 2.2784, "step": 68220 }, { "epoch": 0.2393018610030491, "grad_norm": 0.564929187297821, "learning_rate": 3.803981354922371e-05, "loss": 2.2802, "step": 68280 }, { "epoch": 0.239512143833456, "grad_norm": 0.609919011592865, "learning_rate": 3.802929940770336e-05, "loss": 2.2781, "step": 68340 }, { "epoch": 0.2397224266638629, "grad_norm": 0.5926073789596558, "learning_rate": 3.801878526618302e-05, "loss": 2.2773, "step": 68400 }, { "epoch": 0.23993270949426979, "grad_norm": 0.5856402516365051, "learning_rate": 3.800827112466268e-05, "loss": 2.286, "step": 68460 }, { "epoch": 0.2401429923246767, "grad_norm": 0.5757060050964355, "learning_rate": 3.799775698314233e-05, "loss": 2.2801, "step": 68520 }, { "epoch": 0.2403532751550836, "grad_norm": 0.626157820224762, "learning_rate": 3.798724284162198e-05, "loss": 2.2915, "step": 68580 }, { "epoch": 0.24056355798549048, "grad_norm": 0.5402255654335022, "learning_rate": 3.797672870010164e-05, "loss": 2.2782, "step": 68640 }, { "epoch": 0.2407738408158974, "grad_norm": 0.5578147768974304, "learning_rate": 3.796621455858129e-05, "loss": 2.2887, "step": 68700 }, { "epoch": 0.24098412364630428, "grad_norm": 0.6032596826553345, "learning_rate": 3.7955700417060944e-05, "loss": 2.2862, "step": 68760 }, { "epoch": 0.24119440647671117, "grad_norm": 0.5816863775253296, "learning_rate": 3.794518627554061e-05, "loss": 2.269, "step": 68820 }, { "epoch": 0.2414046893071181, "grad_norm": 0.5900723338127136, "learning_rate": 3.793467213402026e-05, "loss": 2.2907, "step": 68880 }, { "epoch": 0.24161497213752497, "grad_norm": 0.5583692789077759, "learning_rate": 3.792433322819192e-05, "loss": 2.29, "step": 68940 }, { "epoch": 0.24182525496793186, "grad_norm": 0.5381126999855042, "learning_rate": 3.791381908667158e-05, "loss": 2.2981, "step": 69000 }, { "epoch": 0.24203553779833878, "grad_norm": 0.5909293293952942, "learning_rate": 3.790330494515123e-05, "loss": 2.2989, "step": 69060 }, { "epoch": 0.24224582062874567, "grad_norm": 0.6078698635101318, "learning_rate": 3.789279080363089e-05, "loss": 2.296, "step": 69120 }, { "epoch": 0.24245610345915256, "grad_norm": 0.5708657503128052, "learning_rate": 3.788227666211054e-05, "loss": 2.2714, "step": 69180 }, { "epoch": 0.24266638628955944, "grad_norm": 0.5961434245109558, "learning_rate": 3.78717625205902e-05, "loss": 2.2737, "step": 69240 }, { "epoch": 0.24287666911996636, "grad_norm": 0.5856456160545349, "learning_rate": 3.786124837906985e-05, "loss": 2.2953, "step": 69300 }, { "epoch": 0.24308695195037325, "grad_norm": 0.5971105098724365, "learning_rate": 3.785073423754951e-05, "loss": 2.2791, "step": 69360 }, { "epoch": 0.24329723478078014, "grad_norm": 0.5927890539169312, "learning_rate": 3.784022009602916e-05, "loss": 2.2845, "step": 69420 }, { "epoch": 0.24350751761118705, "grad_norm": 0.5798436403274536, "learning_rate": 3.782970595450881e-05, "loss": 2.2845, "step": 69480 }, { "epoch": 0.24371780044159394, "grad_norm": 0.574324905872345, "learning_rate": 3.781919181298847e-05, "loss": 2.2817, "step": 69540 }, { "epoch": 0.24392808327200083, "grad_norm": 0.5704073905944824, "learning_rate": 3.780867767146813e-05, "loss": 2.2779, "step": 69600 }, { "epoch": 0.24413836610240774, "grad_norm": 0.5989373922348022, "learning_rate": 3.779816352994778e-05, "loss": 2.299, "step": 69660 }, { "epoch": 0.24434864893281463, "grad_norm": 0.5521675944328308, "learning_rate": 3.778764938842744e-05, "loss": 2.2701, "step": 69720 }, { "epoch": 0.24455893176322152, "grad_norm": 0.6123749017715454, "learning_rate": 3.777713524690709e-05, "loss": 2.2782, "step": 69780 }, { "epoch": 0.24476921459362844, "grad_norm": 0.5558038353919983, "learning_rate": 3.7766621105386744e-05, "loss": 2.2697, "step": 69840 }, { "epoch": 0.24497949742403533, "grad_norm": 0.5577847957611084, "learning_rate": 3.77561069638664e-05, "loss": 2.2813, "step": 69900 }, { "epoch": 0.2451897802544422, "grad_norm": 0.5782567262649536, "learning_rate": 3.774559282234606e-05, "loss": 2.2702, "step": 69960 }, { "epoch": 0.24540006308484913, "grad_norm": 0.6026614904403687, "learning_rate": 3.7735078680825713e-05, "loss": 2.2734, "step": 70020 }, { "epoch": 0.24561034591525602, "grad_norm": 0.5850802659988403, "learning_rate": 3.7724564539305365e-05, "loss": 2.2959, "step": 70080 }, { "epoch": 0.2458206287456629, "grad_norm": 0.6082926988601685, "learning_rate": 3.7714050397785024e-05, "loss": 2.2909, "step": 70140 }, { "epoch": 0.24603091157606982, "grad_norm": 0.6162857413291931, "learning_rate": 3.7703536256264676e-05, "loss": 2.293, "step": 70200 }, { "epoch": 0.2462411944064767, "grad_norm": 0.5475439429283142, "learning_rate": 3.769302211474433e-05, "loss": 2.2745, "step": 70260 }, { "epoch": 0.2464514772368836, "grad_norm": 0.6155129671096802, "learning_rate": 3.768250797322399e-05, "loss": 2.2821, "step": 70320 }, { "epoch": 0.24666176006729051, "grad_norm": 0.5935275554656982, "learning_rate": 3.7671993831703645e-05, "loss": 2.2783, "step": 70380 }, { "epoch": 0.2468720428976974, "grad_norm": 0.5896009802818298, "learning_rate": 3.76614796901833e-05, "loss": 2.2785, "step": 70440 }, { "epoch": 0.2470823257281043, "grad_norm": 0.6749953627586365, "learning_rate": 3.7650965548662956e-05, "loss": 2.2916, "step": 70500 }, { "epoch": 0.2472926085585112, "grad_norm": 0.6071747541427612, "learning_rate": 3.764045140714261e-05, "loss": 2.2915, "step": 70560 }, { "epoch": 0.2475028913889181, "grad_norm": 0.5885292291641235, "learning_rate": 3.762993726562226e-05, "loss": 2.2668, "step": 70620 }, { "epoch": 0.24771317421932498, "grad_norm": 0.5685102343559265, "learning_rate": 3.761942312410192e-05, "loss": 2.2716, "step": 70680 }, { "epoch": 0.2479234570497319, "grad_norm": 0.6011393666267395, "learning_rate": 3.760890898258158e-05, "loss": 2.2737, "step": 70740 }, { "epoch": 0.2481337398801388, "grad_norm": 0.5806678533554077, "learning_rate": 3.759839484106123e-05, "loss": 2.2944, "step": 70800 }, { "epoch": 0.24834402271054568, "grad_norm": 0.6233952641487122, "learning_rate": 3.758788069954088e-05, "loss": 2.2818, "step": 70860 }, { "epoch": 0.2485543055409526, "grad_norm": 0.6003621816635132, "learning_rate": 3.757736655802054e-05, "loss": 2.2701, "step": 70920 }, { "epoch": 0.24876458837135948, "grad_norm": 0.5553110241889954, "learning_rate": 3.7567027652192196e-05, "loss": 2.2701, "step": 70980 }, { "epoch": 0.24897487120176637, "grad_norm": 0.5705835223197937, "learning_rate": 3.7556513510671855e-05, "loss": 2.2765, "step": 71040 }, { "epoch": 0.24918515403217328, "grad_norm": 0.5779468417167664, "learning_rate": 3.7545999369151514e-05, "loss": 2.2657, "step": 71100 }, { "epoch": 0.24939543686258017, "grad_norm": 0.6075929403305054, "learning_rate": 3.7535485227631166e-05, "loss": 2.2864, "step": 71160 }, { "epoch": 0.24960571969298706, "grad_norm": 0.5834728479385376, "learning_rate": 3.7524971086110824e-05, "loss": 2.2949, "step": 71220 }, { "epoch": 0.24981600252339398, "grad_norm": 0.5804185271263123, "learning_rate": 3.7514456944590476e-05, "loss": 2.2847, "step": 71280 }, { "epoch": 0.25002628535380084, "grad_norm": 0.595839262008667, "learning_rate": 3.750394280307013e-05, "loss": 2.292, "step": 71340 }, { "epoch": 0.25023656818420775, "grad_norm": 0.6016972661018372, "learning_rate": 3.749342866154979e-05, "loss": 2.2834, "step": 71400 }, { "epoch": 0.25044685101461467, "grad_norm": 0.6161638498306274, "learning_rate": 3.7482914520029445e-05, "loss": 2.2901, "step": 71460 }, { "epoch": 0.25065713384502153, "grad_norm": 0.5625993609428406, "learning_rate": 3.74724003785091e-05, "loss": 2.2812, "step": 71520 }, { "epoch": 0.25086741667542845, "grad_norm": 0.6411647796630859, "learning_rate": 3.746188623698875e-05, "loss": 2.2818, "step": 71580 }, { "epoch": 0.25107769950583536, "grad_norm": 0.5416374802589417, "learning_rate": 3.745137209546841e-05, "loss": 2.2785, "step": 71640 }, { "epoch": 0.2512879823362422, "grad_norm": 0.6235836744308472, "learning_rate": 3.744085795394806e-05, "loss": 2.2747, "step": 71700 }, { "epoch": 0.25149826516664914, "grad_norm": 0.60355144739151, "learning_rate": 3.743034381242771e-05, "loss": 2.2697, "step": 71760 }, { "epoch": 0.25170854799705605, "grad_norm": 0.5651815533638, "learning_rate": 3.741982967090738e-05, "loss": 2.2981, "step": 71820 }, { "epoch": 0.2519188308274629, "grad_norm": 0.552401602268219, "learning_rate": 3.740931552938703e-05, "loss": 2.2984, "step": 71880 }, { "epoch": 0.25212911365786983, "grad_norm": 0.5791208148002625, "learning_rate": 3.739880138786668e-05, "loss": 2.2848, "step": 71940 }, { "epoch": 0.25233939648827675, "grad_norm": 0.5935322046279907, "learning_rate": 3.738828724634634e-05, "loss": 2.3134, "step": 72000 }, { "epoch": 0.2525496793186836, "grad_norm": 0.5474521517753601, "learning_rate": 3.737777310482599e-05, "loss": 2.2772, "step": 72060 }, { "epoch": 0.2527599621490905, "grad_norm": 0.5810613036155701, "learning_rate": 3.736725896330564e-05, "loss": 2.288, "step": 72120 }, { "epoch": 0.25297024497949744, "grad_norm": 0.5361061096191406, "learning_rate": 3.735674482178531e-05, "loss": 2.2766, "step": 72180 }, { "epoch": 0.2531805278099043, "grad_norm": 0.5803009867668152, "learning_rate": 3.734640591595696e-05, "loss": 2.2782, "step": 72240 }, { "epoch": 0.2533908106403112, "grad_norm": 0.5926686525344849, "learning_rate": 3.733589177443662e-05, "loss": 2.2903, "step": 72300 }, { "epoch": 0.25360109347071813, "grad_norm": 0.5496491193771362, "learning_rate": 3.7325377632916276e-05, "loss": 2.2587, "step": 72360 }, { "epoch": 0.253811376301125, "grad_norm": 0.6137828826904297, "learning_rate": 3.731486349139593e-05, "loss": 2.2849, "step": 72420 }, { "epoch": 0.2540216591315319, "grad_norm": 0.5940037965774536, "learning_rate": 3.730434934987558e-05, "loss": 2.2847, "step": 72480 }, { "epoch": 0.2542319419619388, "grad_norm": 0.5658112168312073, "learning_rate": 3.729383520835524e-05, "loss": 2.2715, "step": 72540 }, { "epoch": 0.2544422247923457, "grad_norm": 0.6902830004692078, "learning_rate": 3.728332106683489e-05, "loss": 2.2756, "step": 72600 }, { "epoch": 0.2546525076227526, "grad_norm": 0.6103239059448242, "learning_rate": 3.727280692531455e-05, "loss": 2.288, "step": 72660 }, { "epoch": 0.2548627904531595, "grad_norm": 0.5635161995887756, "learning_rate": 3.726229278379421e-05, "loss": 2.2926, "step": 72720 }, { "epoch": 0.2550730732835664, "grad_norm": 0.6250701546669006, "learning_rate": 3.725177864227386e-05, "loss": 2.267, "step": 72780 }, { "epoch": 0.2552833561139733, "grad_norm": 0.6795318722724915, "learning_rate": 3.724126450075351e-05, "loss": 2.2794, "step": 72840 }, { "epoch": 0.2554936389443802, "grad_norm": 0.5607568025588989, "learning_rate": 3.723075035923317e-05, "loss": 2.2719, "step": 72900 }, { "epoch": 0.25570392177478707, "grad_norm": 0.5337287187576294, "learning_rate": 3.722023621771282e-05, "loss": 2.2686, "step": 72960 }, { "epoch": 0.255914204605194, "grad_norm": 0.6685793995857239, "learning_rate": 3.720972207619248e-05, "loss": 2.2956, "step": 73020 }, { "epoch": 0.2561244874356009, "grad_norm": 0.6170177459716797, "learning_rate": 3.719920793467214e-05, "loss": 2.2859, "step": 73080 }, { "epoch": 0.25633477026600776, "grad_norm": 0.5965674519538879, "learning_rate": 3.718869379315179e-05, "loss": 2.284, "step": 73140 }, { "epoch": 0.2565450530964147, "grad_norm": 0.5725794434547424, "learning_rate": 3.7178179651631444e-05, "loss": 2.2957, "step": 73200 }, { "epoch": 0.2567553359268216, "grad_norm": 0.6222959756851196, "learning_rate": 3.71676655101111e-05, "loss": 2.2802, "step": 73260 }, { "epoch": 0.25696561875722845, "grad_norm": 0.5478679537773132, "learning_rate": 3.715715136859076e-05, "loss": 2.2772, "step": 73320 }, { "epoch": 0.25717590158763537, "grad_norm": 0.5699739456176758, "learning_rate": 3.714663722707041e-05, "loss": 2.2794, "step": 73380 }, { "epoch": 0.2573861844180423, "grad_norm": 0.5548140406608582, "learning_rate": 3.7136123085550065e-05, "loss": 2.2788, "step": 73440 }, { "epoch": 0.25759646724844915, "grad_norm": 0.5604926943778992, "learning_rate": 3.7125608944029723e-05, "loss": 2.285, "step": 73500 }, { "epoch": 0.25780675007885606, "grad_norm": 0.6234787106513977, "learning_rate": 3.7115094802509375e-05, "loss": 2.2875, "step": 73560 }, { "epoch": 0.258017032909263, "grad_norm": 0.5991029143333435, "learning_rate": 3.710458066098903e-05, "loss": 2.2752, "step": 73620 }, { "epoch": 0.25822731573966984, "grad_norm": 0.628025472164154, "learning_rate": 3.709406651946869e-05, "loss": 2.2855, "step": 73680 }, { "epoch": 0.25843759857007675, "grad_norm": 0.5676313638687134, "learning_rate": 3.7083552377948345e-05, "loss": 2.284, "step": 73740 }, { "epoch": 0.25864788140048367, "grad_norm": 0.5893223285675049, "learning_rate": 3.7073038236427996e-05, "loss": 2.2686, "step": 73800 }, { "epoch": 0.25885816423089053, "grad_norm": 0.5592080354690552, "learning_rate": 3.7062524094907655e-05, "loss": 2.2821, "step": 73860 }, { "epoch": 0.25906844706129745, "grad_norm": 0.578217625617981, "learning_rate": 3.705200995338731e-05, "loss": 2.2657, "step": 73920 }, { "epoch": 0.25927872989170436, "grad_norm": 0.5730153918266296, "learning_rate": 3.704149581186696e-05, "loss": 2.2831, "step": 73980 }, { "epoch": 0.2594890127221112, "grad_norm": 0.5621018409729004, "learning_rate": 3.703098167034662e-05, "loss": 2.2782, "step": 74040 }, { "epoch": 0.25969929555251814, "grad_norm": 0.5932819843292236, "learning_rate": 3.7020467528826276e-05, "loss": 2.2905, "step": 74100 }, { "epoch": 0.25990957838292505, "grad_norm": 0.6077775359153748, "learning_rate": 3.700995338730593e-05, "loss": 2.2759, "step": 74160 }, { "epoch": 0.2601198612133319, "grad_norm": 0.6019622087478638, "learning_rate": 3.699943924578558e-05, "loss": 2.2719, "step": 74220 }, { "epoch": 0.26033014404373883, "grad_norm": 0.5670755505561829, "learning_rate": 3.698892510426524e-05, "loss": 2.2659, "step": 74280 }, { "epoch": 0.26054042687414575, "grad_norm": 0.6045207977294922, "learning_rate": 3.697841096274489e-05, "loss": 2.2675, "step": 74340 }, { "epoch": 0.2607507097045526, "grad_norm": 0.5820584893226624, "learning_rate": 3.696789682122455e-05, "loss": 2.2751, "step": 74400 }, { "epoch": 0.2609609925349595, "grad_norm": 0.6069955825805664, "learning_rate": 3.695738267970421e-05, "loss": 2.286, "step": 74460 }, { "epoch": 0.26117127536536644, "grad_norm": 0.5582893490791321, "learning_rate": 3.694686853818386e-05, "loss": 2.2748, "step": 74520 }, { "epoch": 0.2613815581957733, "grad_norm": 0.7658640146255493, "learning_rate": 3.693635439666351e-05, "loss": 2.2724, "step": 74580 }, { "epoch": 0.2615918410261802, "grad_norm": 0.5628880262374878, "learning_rate": 3.692584025514317e-05, "loss": 2.2802, "step": 74640 }, { "epoch": 0.26180212385658713, "grad_norm": 0.5615341067314148, "learning_rate": 3.691532611362282e-05, "loss": 2.2764, "step": 74700 }, { "epoch": 0.262012406686994, "grad_norm": 0.5396032333374023, "learning_rate": 3.690481197210248e-05, "loss": 2.2799, "step": 74760 }, { "epoch": 0.2622226895174009, "grad_norm": 0.6700000166893005, "learning_rate": 3.689429783058214e-05, "loss": 2.2818, "step": 74820 }, { "epoch": 0.2624329723478078, "grad_norm": 0.5692336559295654, "learning_rate": 3.688378368906179e-05, "loss": 2.2745, "step": 74880 }, { "epoch": 0.2626432551782147, "grad_norm": 0.5630716681480408, "learning_rate": 3.687326954754144e-05, "loss": 2.2765, "step": 74940 }, { "epoch": 0.2628535380086216, "grad_norm": 0.5756769180297852, "learning_rate": 3.68627554060211e-05, "loss": 2.2772, "step": 75000 }, { "epoch": 0.2630638208390285, "grad_norm": 0.5566816329956055, "learning_rate": 3.6852241264500754e-05, "loss": 2.2685, "step": 75060 }, { "epoch": 0.2632741036694354, "grad_norm": 0.6051713228225708, "learning_rate": 3.684172712298041e-05, "loss": 2.2603, "step": 75120 }, { "epoch": 0.2634843864998423, "grad_norm": 0.6107643842697144, "learning_rate": 3.6831212981460064e-05, "loss": 2.2592, "step": 75180 }, { "epoch": 0.2636946693302492, "grad_norm": 0.5542864203453064, "learning_rate": 3.682069883993972e-05, "loss": 2.273, "step": 75240 }, { "epoch": 0.26390495216065607, "grad_norm": 0.5695670247077942, "learning_rate": 3.6810184698419375e-05, "loss": 2.2682, "step": 75300 }, { "epoch": 0.264115234991063, "grad_norm": 0.5360891222953796, "learning_rate": 3.679967055689903e-05, "loss": 2.2794, "step": 75360 }, { "epoch": 0.2643255178214699, "grad_norm": 0.5911653637886047, "learning_rate": 3.6789156415378686e-05, "loss": 2.2841, "step": 75420 }, { "epoch": 0.26453580065187676, "grad_norm": 0.5800861716270447, "learning_rate": 3.6778642273858344e-05, "loss": 2.2822, "step": 75480 }, { "epoch": 0.2647460834822837, "grad_norm": 0.5655189156532288, "learning_rate": 3.6768128132337996e-05, "loss": 2.2963, "step": 75540 }, { "epoch": 0.2649563663126906, "grad_norm": 0.5841854810714722, "learning_rate": 3.6757613990817655e-05, "loss": 2.2742, "step": 75600 }, { "epoch": 0.26516664914309745, "grad_norm": 0.5612586736679077, "learning_rate": 3.674709984929731e-05, "loss": 2.2644, "step": 75660 }, { "epoch": 0.26537693197350437, "grad_norm": 0.5804318785667419, "learning_rate": 3.673658570777696e-05, "loss": 2.2799, "step": 75720 }, { "epoch": 0.2655872148039113, "grad_norm": 0.6640058159828186, "learning_rate": 3.672607156625662e-05, "loss": 2.2752, "step": 75780 }, { "epoch": 0.26579749763431815, "grad_norm": 0.5463032126426697, "learning_rate": 3.6715557424736276e-05, "loss": 2.2671, "step": 75840 }, { "epoch": 0.26600778046472506, "grad_norm": 0.5558159351348877, "learning_rate": 3.670504328321593e-05, "loss": 2.2825, "step": 75900 }, { "epoch": 0.266218063295132, "grad_norm": 0.6093794703483582, "learning_rate": 3.669470437738759e-05, "loss": 2.2823, "step": 75960 }, { "epoch": 0.26642834612553884, "grad_norm": 0.6040659546852112, "learning_rate": 3.6684190235867244e-05, "loss": 2.2763, "step": 76020 }, { "epoch": 0.26663862895594576, "grad_norm": 0.5455958247184753, "learning_rate": 3.6673676094346896e-05, "loss": 2.2805, "step": 76080 }, { "epoch": 0.26684891178635267, "grad_norm": 0.5874274969100952, "learning_rate": 3.6663161952826554e-05, "loss": 2.2779, "step": 76140 }, { "epoch": 0.26705919461675953, "grad_norm": 0.5681596398353577, "learning_rate": 3.6652647811306206e-05, "loss": 2.2686, "step": 76200 }, { "epoch": 0.26726947744716645, "grad_norm": 0.5700655579566956, "learning_rate": 3.6642133669785865e-05, "loss": 2.277, "step": 76260 }, { "epoch": 0.2674797602775733, "grad_norm": 0.5863887071609497, "learning_rate": 3.6631619528265523e-05, "loss": 2.2858, "step": 76320 }, { "epoch": 0.2676900431079802, "grad_norm": 0.5644739270210266, "learning_rate": 3.6621105386745175e-05, "loss": 2.2728, "step": 76380 }, { "epoch": 0.26790032593838714, "grad_norm": 0.6302114129066467, "learning_rate": 3.661059124522483e-05, "loss": 2.2855, "step": 76440 }, { "epoch": 0.268110608768794, "grad_norm": 0.5792679786682129, "learning_rate": 3.6600077103704486e-05, "loss": 2.2767, "step": 76500 }, { "epoch": 0.2683208915992009, "grad_norm": 0.585135817527771, "learning_rate": 3.658956296218414e-05, "loss": 2.2628, "step": 76560 }, { "epoch": 0.26853117442960783, "grad_norm": 0.6189137697219849, "learning_rate": 3.6579048820663796e-05, "loss": 2.2751, "step": 76620 }, { "epoch": 0.2687414572600147, "grad_norm": 0.5395883321762085, "learning_rate": 3.656853467914345e-05, "loss": 2.2943, "step": 76680 }, { "epoch": 0.2689517400904216, "grad_norm": 0.5798149704933167, "learning_rate": 3.655802053762311e-05, "loss": 2.2818, "step": 76740 }, { "epoch": 0.2691620229208285, "grad_norm": 0.5785930752754211, "learning_rate": 3.654750639610276e-05, "loss": 2.277, "step": 76800 }, { "epoch": 0.2693723057512354, "grad_norm": 0.5721600651741028, "learning_rate": 3.653699225458241e-05, "loss": 2.2691, "step": 76860 }, { "epoch": 0.2695825885816423, "grad_norm": 0.5851025581359863, "learning_rate": 3.652647811306207e-05, "loss": 2.2701, "step": 76920 }, { "epoch": 0.2697928714120492, "grad_norm": 0.6369785070419312, "learning_rate": 3.651596397154173e-05, "loss": 2.2694, "step": 76980 }, { "epoch": 0.2700031542424561, "grad_norm": 0.5723124146461487, "learning_rate": 3.650544983002138e-05, "loss": 2.277, "step": 77040 }, { "epoch": 0.270213437072863, "grad_norm": 0.537835955619812, "learning_rate": 3.649493568850104e-05, "loss": 2.2772, "step": 77100 }, { "epoch": 0.2704237199032699, "grad_norm": 0.574788510799408, "learning_rate": 3.648442154698069e-05, "loss": 2.2613, "step": 77160 }, { "epoch": 0.27063400273367677, "grad_norm": 0.5730419158935547, "learning_rate": 3.647390740546034e-05, "loss": 2.2719, "step": 77220 }, { "epoch": 0.2708442855640837, "grad_norm": 0.5613835453987122, "learning_rate": 3.646339326394e-05, "loss": 2.2883, "step": 77280 }, { "epoch": 0.2710545683944906, "grad_norm": 0.5720829963684082, "learning_rate": 3.645287912241966e-05, "loss": 2.2798, "step": 77340 }, { "epoch": 0.27126485122489746, "grad_norm": 0.5370036959648132, "learning_rate": 3.644236498089931e-05, "loss": 2.248, "step": 77400 }, { "epoch": 0.2714751340553044, "grad_norm": 0.6213563084602356, "learning_rate": 3.643185083937897e-05, "loss": 2.269, "step": 77460 }, { "epoch": 0.2716854168857113, "grad_norm": 0.5907732844352722, "learning_rate": 3.642133669785862e-05, "loss": 2.2824, "step": 77520 }, { "epoch": 0.27189569971611816, "grad_norm": 0.5852854251861572, "learning_rate": 3.6410822556338274e-05, "loss": 2.2667, "step": 77580 }, { "epoch": 0.27210598254652507, "grad_norm": 0.6155591607093811, "learning_rate": 3.640030841481793e-05, "loss": 2.2729, "step": 77640 }, { "epoch": 0.272316265376932, "grad_norm": 0.6252135038375854, "learning_rate": 3.638979427329759e-05, "loss": 2.2559, "step": 77700 }, { "epoch": 0.27252654820733885, "grad_norm": 0.6540042161941528, "learning_rate": 3.637928013177724e-05, "loss": 2.2865, "step": 77760 }, { "epoch": 0.27273683103774576, "grad_norm": 0.5478835701942444, "learning_rate": 3.6368765990256895e-05, "loss": 2.2831, "step": 77820 }, { "epoch": 0.2729471138681527, "grad_norm": 0.5724520683288574, "learning_rate": 3.6358251848736554e-05, "loss": 2.2792, "step": 77880 }, { "epoch": 0.27315739669855954, "grad_norm": 0.5985337495803833, "learning_rate": 3.6347737707216206e-05, "loss": 2.2833, "step": 77940 }, { "epoch": 0.27336767952896646, "grad_norm": 0.589272677898407, "learning_rate": 3.633722356569586e-05, "loss": 2.2795, "step": 78000 }, { "epoch": 0.2735779623593734, "grad_norm": 0.5734955072402954, "learning_rate": 3.632670942417552e-05, "loss": 2.2781, "step": 78060 }, { "epoch": 0.27378824518978023, "grad_norm": 0.5893633961677551, "learning_rate": 3.6316195282655175e-05, "loss": 2.2795, "step": 78120 }, { "epoch": 0.27399852802018715, "grad_norm": 0.6065170168876648, "learning_rate": 3.630568114113483e-05, "loss": 2.2781, "step": 78180 }, { "epoch": 0.27420881085059406, "grad_norm": 0.6370767951011658, "learning_rate": 3.6295166999614486e-05, "loss": 2.2815, "step": 78240 }, { "epoch": 0.2744190936810009, "grad_norm": 0.6043940782546997, "learning_rate": 3.628465285809414e-05, "loss": 2.2856, "step": 78300 }, { "epoch": 0.27462937651140784, "grad_norm": 0.5856173634529114, "learning_rate": 3.627413871657379e-05, "loss": 2.2742, "step": 78360 }, { "epoch": 0.27483965934181476, "grad_norm": 0.5545368194580078, "learning_rate": 3.626362457505345e-05, "loss": 2.2841, "step": 78420 }, { "epoch": 0.2750499421722216, "grad_norm": 0.5876592993736267, "learning_rate": 3.625328566922511e-05, "loss": 2.2842, "step": 78480 }, { "epoch": 0.27526022500262853, "grad_norm": 0.5744016766548157, "learning_rate": 3.6242771527704764e-05, "loss": 2.2797, "step": 78540 }, { "epoch": 0.27547050783303545, "grad_norm": 0.6173490285873413, "learning_rate": 3.623225738618442e-05, "loss": 2.2653, "step": 78600 }, { "epoch": 0.2756807906634423, "grad_norm": 0.5842175483703613, "learning_rate": 3.6221743244664074e-05, "loss": 2.2698, "step": 78660 }, { "epoch": 0.2758910734938492, "grad_norm": 0.6094750761985779, "learning_rate": 3.6211229103143726e-05, "loss": 2.281, "step": 78720 }, { "epoch": 0.27610135632425614, "grad_norm": 0.5415861010551453, "learning_rate": 3.6200714961623385e-05, "loss": 2.2665, "step": 78780 }, { "epoch": 0.276311639154663, "grad_norm": 0.58269864320755, "learning_rate": 3.6190200820103044e-05, "loss": 2.2607, "step": 78840 }, { "epoch": 0.2765219219850699, "grad_norm": 0.568885087966919, "learning_rate": 3.6179686678582696e-05, "loss": 2.2534, "step": 78900 }, { "epoch": 0.27673220481547683, "grad_norm": 0.5955713987350464, "learning_rate": 3.6169172537062354e-05, "loss": 2.2671, "step": 78960 }, { "epoch": 0.2769424876458837, "grad_norm": 0.5937778949737549, "learning_rate": 3.6158658395542006e-05, "loss": 2.2728, "step": 79020 }, { "epoch": 0.2771527704762906, "grad_norm": 0.5668724179267883, "learning_rate": 3.614814425402166e-05, "loss": 2.2773, "step": 79080 }, { "epoch": 0.2773630533066975, "grad_norm": 0.5798020362854004, "learning_rate": 3.613763011250132e-05, "loss": 2.2796, "step": 79140 }, { "epoch": 0.2775733361371044, "grad_norm": 0.5528576374053955, "learning_rate": 3.6127115970980975e-05, "loss": 2.2819, "step": 79200 }, { "epoch": 0.2777836189675113, "grad_norm": 0.8906698226928711, "learning_rate": 3.611660182946063e-05, "loss": 2.2767, "step": 79260 }, { "epoch": 0.2779939017979182, "grad_norm": 0.5634118318557739, "learning_rate": 3.610608768794028e-05, "loss": 2.291, "step": 79320 }, { "epoch": 0.2782041846283251, "grad_norm": 0.615436851978302, "learning_rate": 3.609557354641994e-05, "loss": 2.2881, "step": 79380 }, { "epoch": 0.278414467458732, "grad_norm": 0.6048182249069214, "learning_rate": 3.608505940489959e-05, "loss": 2.2766, "step": 79440 }, { "epoch": 0.2786247502891389, "grad_norm": 0.5954188704490662, "learning_rate": 3.607454526337924e-05, "loss": 2.2794, "step": 79500 }, { "epoch": 0.27883503311954577, "grad_norm": 0.6068901419639587, "learning_rate": 3.606403112185891e-05, "loss": 2.2886, "step": 79560 }, { "epoch": 0.2790453159499527, "grad_norm": 0.5282168388366699, "learning_rate": 3.605351698033856e-05, "loss": 2.281, "step": 79620 }, { "epoch": 0.2792555987803596, "grad_norm": 0.5673637986183167, "learning_rate": 3.604300283881821e-05, "loss": 2.2773, "step": 79680 }, { "epoch": 0.27946588161076646, "grad_norm": 0.5759354829788208, "learning_rate": 3.603248869729787e-05, "loss": 2.2663, "step": 79740 }, { "epoch": 0.2796761644411734, "grad_norm": 0.580322265625, "learning_rate": 3.602197455577752e-05, "loss": 2.2748, "step": 79800 }, { "epoch": 0.2798864472715803, "grad_norm": 0.5620171427726746, "learning_rate": 3.601146041425717e-05, "loss": 2.2677, "step": 79860 }, { "epoch": 0.28009673010198716, "grad_norm": 0.5752658247947693, "learning_rate": 3.600094627273683e-05, "loss": 2.277, "step": 79920 }, { "epoch": 0.2803070129323941, "grad_norm": 0.6061094403266907, "learning_rate": 3.599043213121649e-05, "loss": 2.2687, "step": 79980 }, { "epoch": 0.280517295762801, "grad_norm": 0.5554862022399902, "learning_rate": 3.597991798969614e-05, "loss": 2.2693, "step": 80040 }, { "epoch": 0.28072757859320785, "grad_norm": 0.5555817484855652, "learning_rate": 3.59694038481758e-05, "loss": 2.2764, "step": 80100 }, { "epoch": 0.28093786142361477, "grad_norm": 0.5457671880722046, "learning_rate": 3.595888970665545e-05, "loss": 2.2667, "step": 80160 }, { "epoch": 0.2811481442540217, "grad_norm": 0.5533132553100586, "learning_rate": 3.5948375565135105e-05, "loss": 2.2753, "step": 80220 }, { "epoch": 0.28135842708442854, "grad_norm": 0.6642926931381226, "learning_rate": 3.5937861423614764e-05, "loss": 2.2584, "step": 80280 }, { "epoch": 0.28156870991483546, "grad_norm": 0.5732621550559998, "learning_rate": 3.592734728209442e-05, "loss": 2.2761, "step": 80340 }, { "epoch": 0.2817789927452424, "grad_norm": 0.5810340046882629, "learning_rate": 3.5916833140574074e-05, "loss": 2.2622, "step": 80400 }, { "epoch": 0.28198927557564923, "grad_norm": 0.5583139061927795, "learning_rate": 3.5906318999053726e-05, "loss": 2.2803, "step": 80460 }, { "epoch": 0.28219955840605615, "grad_norm": 0.5932782888412476, "learning_rate": 3.5895804857533385e-05, "loss": 2.2638, "step": 80520 }, { "epoch": 0.28240984123646307, "grad_norm": 0.606884241104126, "learning_rate": 3.5885290716013037e-05, "loss": 2.2761, "step": 80580 }, { "epoch": 0.2826201240668699, "grad_norm": 0.6091174483299255, "learning_rate": 3.5874776574492695e-05, "loss": 2.2745, "step": 80640 }, { "epoch": 0.28283040689727684, "grad_norm": 0.568898618221283, "learning_rate": 3.5864262432972354e-05, "loss": 2.2727, "step": 80700 }, { "epoch": 0.28304068972768376, "grad_norm": 0.5533196926116943, "learning_rate": 3.5853748291452006e-05, "loss": 2.2476, "step": 80760 }, { "epoch": 0.2832509725580906, "grad_norm": 0.5467692017555237, "learning_rate": 3.584323414993166e-05, "loss": 2.2744, "step": 80820 }, { "epoch": 0.28346125538849754, "grad_norm": 0.5872514843940735, "learning_rate": 3.5832720008411316e-05, "loss": 2.2623, "step": 80880 }, { "epoch": 0.28367153821890445, "grad_norm": 0.6108036637306213, "learning_rate": 3.582220586689097e-05, "loss": 2.2543, "step": 80940 }, { "epoch": 0.2838818210493113, "grad_norm": 0.5775202512741089, "learning_rate": 3.581169172537063e-05, "loss": 2.2559, "step": 81000 }, { "epoch": 0.2840921038797182, "grad_norm": 0.5871195793151855, "learning_rate": 3.580117758385028e-05, "loss": 2.2459, "step": 81060 }, { "epoch": 0.28430238671012514, "grad_norm": 0.5602725744247437, "learning_rate": 3.579066344232994e-05, "loss": 2.2584, "step": 81120 }, { "epoch": 0.284512669540532, "grad_norm": 0.5468248128890991, "learning_rate": 3.578014930080959e-05, "loss": 2.2736, "step": 81180 }, { "epoch": 0.2847229523709389, "grad_norm": 0.5603057146072388, "learning_rate": 3.576963515928924e-05, "loss": 2.2622, "step": 81240 }, { "epoch": 0.28493323520134584, "grad_norm": 0.5584391355514526, "learning_rate": 3.57591210177689e-05, "loss": 2.2554, "step": 81300 }, { "epoch": 0.2851435180317527, "grad_norm": 0.6015111804008484, "learning_rate": 3.574860687624856e-05, "loss": 2.2682, "step": 81360 }, { "epoch": 0.2853538008621596, "grad_norm": 0.5918145775794983, "learning_rate": 3.573809273472821e-05, "loss": 2.272, "step": 81420 }, { "epoch": 0.28556408369256653, "grad_norm": 0.6344346404075623, "learning_rate": 3.572757859320787e-05, "loss": 2.2662, "step": 81480 }, { "epoch": 0.2857743665229734, "grad_norm": 0.5758938193321228, "learning_rate": 3.571706445168752e-05, "loss": 2.2616, "step": 81540 }, { "epoch": 0.2859846493533803, "grad_norm": 0.6150460839271545, "learning_rate": 3.570655031016717e-05, "loss": 2.2674, "step": 81600 }, { "epoch": 0.2861949321837872, "grad_norm": 0.6692140102386475, "learning_rate": 3.569603616864683e-05, "loss": 2.2647, "step": 81660 }, { "epoch": 0.2864052150141941, "grad_norm": 0.5545239448547363, "learning_rate": 3.568552202712649e-05, "loss": 2.2596, "step": 81720 }, { "epoch": 0.286615497844601, "grad_norm": 0.6395848393440247, "learning_rate": 3.567500788560614e-05, "loss": 2.2774, "step": 81780 }, { "epoch": 0.2868257806750079, "grad_norm": 0.5730613470077515, "learning_rate": 3.5664493744085794e-05, "loss": 2.2655, "step": 81840 }, { "epoch": 0.2870360635054148, "grad_norm": 0.5833149552345276, "learning_rate": 3.565397960256545e-05, "loss": 2.2705, "step": 81900 }, { "epoch": 0.2872463463358217, "grad_norm": 0.5819433331489563, "learning_rate": 3.5643465461045105e-05, "loss": 2.2533, "step": 81960 }, { "epoch": 0.28745662916622855, "grad_norm": 0.5378984808921814, "learning_rate": 3.563295131952476e-05, "loss": 2.2749, "step": 82020 }, { "epoch": 0.28766691199663547, "grad_norm": 0.643327534198761, "learning_rate": 3.562243717800442e-05, "loss": 2.2694, "step": 82080 }, { "epoch": 0.2878771948270424, "grad_norm": 0.5695509910583496, "learning_rate": 3.5611923036484074e-05, "loss": 2.2715, "step": 82140 }, { "epoch": 0.28808747765744924, "grad_norm": 0.5622131824493408, "learning_rate": 3.5601408894963726e-05, "loss": 2.2797, "step": 82200 }, { "epoch": 0.28829776048785616, "grad_norm": 0.58394855260849, "learning_rate": 3.5590894753443384e-05, "loss": 2.264, "step": 82260 }, { "epoch": 0.2885080433182631, "grad_norm": 0.5865631699562073, "learning_rate": 3.5580380611923036e-05, "loss": 2.2757, "step": 82320 }, { "epoch": 0.28871832614866993, "grad_norm": 0.5562753677368164, "learning_rate": 3.556986647040269e-05, "loss": 2.2632, "step": 82380 }, { "epoch": 0.28892860897907685, "grad_norm": 0.5615153312683105, "learning_rate": 3.5559352328882354e-05, "loss": 2.2613, "step": 82440 }, { "epoch": 0.28913889180948377, "grad_norm": 0.5816681385040283, "learning_rate": 3.5548838187362006e-05, "loss": 2.2741, "step": 82500 }, { "epoch": 0.2893491746398906, "grad_norm": 0.6041331887245178, "learning_rate": 3.553832404584166e-05, "loss": 2.2675, "step": 82560 }, { "epoch": 0.28955945747029754, "grad_norm": 0.5498755574226379, "learning_rate": 3.552798514001332e-05, "loss": 2.2622, "step": 82620 }, { "epoch": 0.28976974030070446, "grad_norm": 0.5720410943031311, "learning_rate": 3.551747099849297e-05, "loss": 2.2657, "step": 82680 }, { "epoch": 0.2899800231311113, "grad_norm": 0.5457740426063538, "learning_rate": 3.550713209266464e-05, "loss": 2.2648, "step": 82740 }, { "epoch": 0.29019030596151824, "grad_norm": 0.6136612892150879, "learning_rate": 3.549661795114429e-05, "loss": 2.2572, "step": 82800 }, { "epoch": 0.29040058879192515, "grad_norm": 0.5554794073104858, "learning_rate": 3.548610380962394e-05, "loss": 2.262, "step": 82860 }, { "epoch": 0.290610871622332, "grad_norm": 0.5288531184196472, "learning_rate": 3.5475589668103606e-05, "loss": 2.2772, "step": 82920 }, { "epoch": 0.29082115445273893, "grad_norm": 0.5184205174446106, "learning_rate": 3.546507552658326e-05, "loss": 2.2646, "step": 82980 }, { "epoch": 0.29103143728314584, "grad_norm": 0.5476679801940918, "learning_rate": 3.545456138506291e-05, "loss": 2.283, "step": 83040 }, { "epoch": 0.2912417201135527, "grad_norm": 0.602214515209198, "learning_rate": 3.544404724354257e-05, "loss": 2.253, "step": 83100 }, { "epoch": 0.2914520029439596, "grad_norm": 0.5712595582008362, "learning_rate": 3.543353310202222e-05, "loss": 2.2706, "step": 83160 }, { "epoch": 0.29166228577436654, "grad_norm": 0.6233317852020264, "learning_rate": 3.542301896050187e-05, "loss": 2.2605, "step": 83220 }, { "epoch": 0.2918725686047734, "grad_norm": 0.5277979969978333, "learning_rate": 3.541250481898153e-05, "loss": 2.2423, "step": 83280 }, { "epoch": 0.2920828514351803, "grad_norm": 0.564591109752655, "learning_rate": 3.540199067746119e-05, "loss": 2.2628, "step": 83340 }, { "epoch": 0.29229313426558723, "grad_norm": 0.5598502159118652, "learning_rate": 3.539147653594084e-05, "loss": 2.2807, "step": 83400 }, { "epoch": 0.2925034170959941, "grad_norm": 0.5494130849838257, "learning_rate": 3.5380962394420494e-05, "loss": 2.2698, "step": 83460 }, { "epoch": 0.292713699926401, "grad_norm": 0.5903727412223816, "learning_rate": 3.537044825290015e-05, "loss": 2.2692, "step": 83520 }, { "epoch": 0.2929239827568079, "grad_norm": 0.5893160104751587, "learning_rate": 3.5359934111379804e-05, "loss": 2.2593, "step": 83580 }, { "epoch": 0.2931342655872148, "grad_norm": 0.5820626616477966, "learning_rate": 3.534941996985946e-05, "loss": 2.2657, "step": 83640 }, { "epoch": 0.2933445484176217, "grad_norm": 0.6435462236404419, "learning_rate": 3.533890582833912e-05, "loss": 2.2636, "step": 83700 }, { "epoch": 0.2935548312480286, "grad_norm": 0.5747917890548706, "learning_rate": 3.5328391686818774e-05, "loss": 2.2617, "step": 83760 }, { "epoch": 0.2937651140784355, "grad_norm": 0.5375548005104065, "learning_rate": 3.5317877545298425e-05, "loss": 2.2723, "step": 83820 }, { "epoch": 0.2939753969088424, "grad_norm": 0.5366535186767578, "learning_rate": 3.5307363403778084e-05, "loss": 2.2755, "step": 83880 }, { "epoch": 0.2941856797392493, "grad_norm": 0.641517698764801, "learning_rate": 3.5296849262257736e-05, "loss": 2.271, "step": 83940 }, { "epoch": 0.29439596256965617, "grad_norm": 0.5670305490493774, "learning_rate": 3.5286335120737395e-05, "loss": 2.2707, "step": 84000 }, { "epoch": 0.2946062454000631, "grad_norm": 0.5636521577835083, "learning_rate": 3.527582097921705e-05, "loss": 2.2622, "step": 84060 }, { "epoch": 0.29481652823047, "grad_norm": 0.5666058659553528, "learning_rate": 3.5265306837696705e-05, "loss": 2.256, "step": 84120 }, { "epoch": 0.29502681106087686, "grad_norm": 0.621285617351532, "learning_rate": 3.525479269617636e-05, "loss": 2.2726, "step": 84180 }, { "epoch": 0.2952370938912838, "grad_norm": 0.5541791319847107, "learning_rate": 3.5244278554656016e-05, "loss": 2.2598, "step": 84240 }, { "epoch": 0.2954473767216907, "grad_norm": 0.5623100996017456, "learning_rate": 3.523376441313567e-05, "loss": 2.2616, "step": 84300 }, { "epoch": 0.29565765955209755, "grad_norm": 0.617722749710083, "learning_rate": 3.5223250271615326e-05, "loss": 2.2695, "step": 84360 }, { "epoch": 0.29586794238250447, "grad_norm": 0.5718526840209961, "learning_rate": 3.521273613009498e-05, "loss": 2.2756, "step": 84420 }, { "epoch": 0.2960782252129114, "grad_norm": 0.5954556465148926, "learning_rate": 3.520222198857464e-05, "loss": 2.2605, "step": 84480 }, { "epoch": 0.29628850804331824, "grad_norm": 0.5449030995368958, "learning_rate": 3.519170784705429e-05, "loss": 2.2795, "step": 84540 }, { "epoch": 0.29649879087372516, "grad_norm": 0.6317994594573975, "learning_rate": 3.518119370553394e-05, "loss": 2.2734, "step": 84600 }, { "epoch": 0.2967090737041321, "grad_norm": 0.5306956768035889, "learning_rate": 3.51706795640136e-05, "loss": 2.2734, "step": 84660 }, { "epoch": 0.29691935653453894, "grad_norm": 0.5752288103103638, "learning_rate": 3.516016542249326e-05, "loss": 2.2721, "step": 84720 }, { "epoch": 0.29712963936494585, "grad_norm": 0.5798787474632263, "learning_rate": 3.514965128097291e-05, "loss": 2.263, "step": 84780 }, { "epoch": 0.29733992219535277, "grad_norm": 0.6193447113037109, "learning_rate": 3.513913713945257e-05, "loss": 2.2682, "step": 84840 }, { "epoch": 0.29755020502575963, "grad_norm": 0.606647789478302, "learning_rate": 3.512862299793222e-05, "loss": 2.2723, "step": 84900 }, { "epoch": 0.29776048785616654, "grad_norm": 0.5479643940925598, "learning_rate": 3.5118284092103884e-05, "loss": 2.2642, "step": 84960 }, { "epoch": 0.29797077068657346, "grad_norm": 0.5397103428840637, "learning_rate": 3.5107769950583536e-05, "loss": 2.2639, "step": 85020 }, { "epoch": 0.2981810535169803, "grad_norm": 0.5763701796531677, "learning_rate": 3.509725580906319e-05, "loss": 2.282, "step": 85080 }, { "epoch": 0.29839133634738724, "grad_norm": 0.5760320425033569, "learning_rate": 3.508674166754285e-05, "loss": 2.2701, "step": 85140 }, { "epoch": 0.29860161917779415, "grad_norm": 0.5381782054901123, "learning_rate": 3.5076227526022505e-05, "loss": 2.2619, "step": 85200 }, { "epoch": 0.298811902008201, "grad_norm": 0.5738365054130554, "learning_rate": 3.506571338450216e-05, "loss": 2.2596, "step": 85260 }, { "epoch": 0.29902218483860793, "grad_norm": 0.5890007019042969, "learning_rate": 3.505519924298181e-05, "loss": 2.2801, "step": 85320 }, { "epoch": 0.29923246766901485, "grad_norm": 0.5713682174682617, "learning_rate": 3.504468510146147e-05, "loss": 2.2637, "step": 85380 }, { "epoch": 0.2994427504994217, "grad_norm": 0.5621296167373657, "learning_rate": 3.503417095994112e-05, "loss": 2.2639, "step": 85440 }, { "epoch": 0.2996530333298286, "grad_norm": 0.5415980815887451, "learning_rate": 3.502365681842078e-05, "loss": 2.2729, "step": 85500 }, { "epoch": 0.29986331616023554, "grad_norm": 0.6131845116615295, "learning_rate": 3.501314267690044e-05, "loss": 2.2626, "step": 85560 }, { "epoch": 0.3000735989906424, "grad_norm": 0.5747252702713013, "learning_rate": 3.500262853538009e-05, "loss": 2.2589, "step": 85620 }, { "epoch": 0.3002838818210493, "grad_norm": 0.5767868161201477, "learning_rate": 3.499211439385974e-05, "loss": 2.2655, "step": 85680 }, { "epoch": 0.30049416465145623, "grad_norm": 0.563033938407898, "learning_rate": 3.49816002523394e-05, "loss": 2.2744, "step": 85740 }, { "epoch": 0.3007044474818631, "grad_norm": 0.503409743309021, "learning_rate": 3.497108611081905e-05, "loss": 2.2667, "step": 85800 }, { "epoch": 0.30091473031227, "grad_norm": 0.5511951446533203, "learning_rate": 3.496057196929871e-05, "loss": 2.2694, "step": 85860 }, { "epoch": 0.3011250131426769, "grad_norm": 0.5811784267425537, "learning_rate": 3.495005782777836e-05, "loss": 2.2629, "step": 85920 }, { "epoch": 0.3013352959730838, "grad_norm": 0.5567927956581116, "learning_rate": 3.493954368625802e-05, "loss": 2.2606, "step": 85980 }, { "epoch": 0.3015455788034907, "grad_norm": 0.567691445350647, "learning_rate": 3.492902954473767e-05, "loss": 2.263, "step": 86040 }, { "epoch": 0.3017558616338976, "grad_norm": 0.5785837769508362, "learning_rate": 3.4918515403217325e-05, "loss": 2.2551, "step": 86100 }, { "epoch": 0.3019661444643045, "grad_norm": 0.6118089556694031, "learning_rate": 3.490800126169698e-05, "loss": 2.2685, "step": 86160 }, { "epoch": 0.3021764272947114, "grad_norm": 0.5302484631538391, "learning_rate": 3.489748712017664e-05, "loss": 2.2555, "step": 86220 }, { "epoch": 0.3023867101251183, "grad_norm": 0.6238006949424744, "learning_rate": 3.4886972978656294e-05, "loss": 2.2602, "step": 86280 }, { "epoch": 0.30259699295552517, "grad_norm": 0.5831438302993774, "learning_rate": 3.487645883713595e-05, "loss": 2.2522, "step": 86340 }, { "epoch": 0.3028072757859321, "grad_norm": 0.5737531781196594, "learning_rate": 3.4865944695615604e-05, "loss": 2.2572, "step": 86400 }, { "epoch": 0.303017558616339, "grad_norm": 0.6034284234046936, "learning_rate": 3.4855430554095256e-05, "loss": 2.2708, "step": 86460 }, { "epoch": 0.30322784144674586, "grad_norm": 0.6044401526451111, "learning_rate": 3.4844916412574915e-05, "loss": 2.2659, "step": 86520 }, { "epoch": 0.3034381242771528, "grad_norm": 0.5900658965110779, "learning_rate": 3.4834402271054574e-05, "loss": 2.2673, "step": 86580 }, { "epoch": 0.3036484071075597, "grad_norm": 0.5390185117721558, "learning_rate": 3.4823888129534225e-05, "loss": 2.2505, "step": 86640 }, { "epoch": 0.30385868993796655, "grad_norm": 0.5748520493507385, "learning_rate": 3.4813373988013884e-05, "loss": 2.2634, "step": 86700 }, { "epoch": 0.30406897276837347, "grad_norm": 0.5243687033653259, "learning_rate": 3.4802859846493536e-05, "loss": 2.2798, "step": 86760 }, { "epoch": 0.3042792555987804, "grad_norm": 0.5927646160125732, "learning_rate": 3.479234570497319e-05, "loss": 2.2792, "step": 86820 }, { "epoch": 0.30448953842918725, "grad_norm": 0.5824649333953857, "learning_rate": 3.4781831563452847e-05, "loss": 2.2633, "step": 86880 }, { "epoch": 0.30469982125959416, "grad_norm": 0.5818813443183899, "learning_rate": 3.4771317421932505e-05, "loss": 2.271, "step": 86940 }, { "epoch": 0.3049101040900011, "grad_norm": 0.5972533822059631, "learning_rate": 3.476080328041216e-05, "loss": 2.282, "step": 87000 }, { "epoch": 0.30512038692040794, "grad_norm": 0.5627744793891907, "learning_rate": 3.475028913889181e-05, "loss": 2.2621, "step": 87060 }, { "epoch": 0.30533066975081485, "grad_norm": 0.5838892459869385, "learning_rate": 3.473977499737147e-05, "loss": 2.2751, "step": 87120 }, { "epoch": 0.30554095258122177, "grad_norm": 0.5534815788269043, "learning_rate": 3.472926085585112e-05, "loss": 2.2741, "step": 87180 }, { "epoch": 0.30575123541162863, "grad_norm": 0.5433311462402344, "learning_rate": 3.471874671433077e-05, "loss": 2.2716, "step": 87240 }, { "epoch": 0.30596151824203555, "grad_norm": 0.5968735218048096, "learning_rate": 3.470823257281044e-05, "loss": 2.2503, "step": 87300 }, { "epoch": 0.30617180107244246, "grad_norm": 0.5322306752204895, "learning_rate": 3.469771843129009e-05, "loss": 2.2795, "step": 87360 }, { "epoch": 0.3063820839028493, "grad_norm": 0.6331864595413208, "learning_rate": 3.468720428976974e-05, "loss": 2.2611, "step": 87420 }, { "epoch": 0.30659236673325624, "grad_norm": 0.5504761338233948, "learning_rate": 3.46766901482494e-05, "loss": 2.2672, "step": 87480 }, { "epoch": 0.30680264956366315, "grad_norm": 0.6416230201721191, "learning_rate": 3.466617600672905e-05, "loss": 2.2727, "step": 87540 }, { "epoch": 0.30701293239407, "grad_norm": 0.639982283115387, "learning_rate": 3.46556618652087e-05, "loss": 2.2618, "step": 87600 }, { "epoch": 0.30722321522447693, "grad_norm": 0.6075161099433899, "learning_rate": 3.464514772368836e-05, "loss": 2.251, "step": 87660 }, { "epoch": 0.3074334980548838, "grad_norm": 0.5911880135536194, "learning_rate": 3.463463358216802e-05, "loss": 2.2673, "step": 87720 }, { "epoch": 0.3076437808852907, "grad_norm": 0.5482556223869324, "learning_rate": 3.462411944064767e-05, "loss": 2.2676, "step": 87780 }, { "epoch": 0.3078540637156976, "grad_norm": 0.5883685946464539, "learning_rate": 3.4613605299127324e-05, "loss": 2.2726, "step": 87840 }, { "epoch": 0.3080643465461045, "grad_norm": 0.5492207407951355, "learning_rate": 3.460309115760698e-05, "loss": 2.2536, "step": 87900 }, { "epoch": 0.3082746293765114, "grad_norm": 0.5360828042030334, "learning_rate": 3.4592577016086635e-05, "loss": 2.2705, "step": 87960 }, { "epoch": 0.3084849122069183, "grad_norm": 0.5943337082862854, "learning_rate": 3.4582062874566293e-05, "loss": 2.2732, "step": 88020 }, { "epoch": 0.3086951950373252, "grad_norm": 0.5922872424125671, "learning_rate": 3.457154873304595e-05, "loss": 2.2563, "step": 88080 }, { "epoch": 0.3089054778677321, "grad_norm": 0.5607509613037109, "learning_rate": 3.456120982721761e-05, "loss": 2.2701, "step": 88140 }, { "epoch": 0.309115760698139, "grad_norm": 0.6036087870597839, "learning_rate": 3.455069568569727e-05, "loss": 2.2575, "step": 88200 }, { "epoch": 0.30932604352854587, "grad_norm": 0.5599225163459778, "learning_rate": 3.454018154417692e-05, "loss": 2.2664, "step": 88260 }, { "epoch": 0.3095363263589528, "grad_norm": 0.5826290249824524, "learning_rate": 3.452966740265657e-05, "loss": 2.2645, "step": 88320 }, { "epoch": 0.3097466091893597, "grad_norm": 0.5541812777519226, "learning_rate": 3.451915326113623e-05, "loss": 2.256, "step": 88380 }, { "epoch": 0.30995689201976656, "grad_norm": 0.5463770627975464, "learning_rate": 3.450863911961589e-05, "loss": 2.2494, "step": 88440 }, { "epoch": 0.3101671748501735, "grad_norm": 0.554961085319519, "learning_rate": 3.449812497809554e-05, "loss": 2.2609, "step": 88500 }, { "epoch": 0.3103774576805804, "grad_norm": 0.5289806127548218, "learning_rate": 3.448761083657519e-05, "loss": 2.2712, "step": 88560 }, { "epoch": 0.31058774051098725, "grad_norm": 0.5908351540565491, "learning_rate": 3.447709669505485e-05, "loss": 2.2589, "step": 88620 }, { "epoch": 0.31079802334139417, "grad_norm": 0.5531824231147766, "learning_rate": 3.4466582553534503e-05, "loss": 2.2567, "step": 88680 }, { "epoch": 0.3110083061718011, "grad_norm": 0.6163316369056702, "learning_rate": 3.4456068412014155e-05, "loss": 2.2585, "step": 88740 }, { "epoch": 0.31121858900220795, "grad_norm": 0.527815580368042, "learning_rate": 3.444555427049382e-05, "loss": 2.27, "step": 88800 }, { "epoch": 0.31142887183261486, "grad_norm": 0.5704740285873413, "learning_rate": 3.443504012897347e-05, "loss": 2.2537, "step": 88860 }, { "epoch": 0.3116391546630218, "grad_norm": 0.5804399847984314, "learning_rate": 3.4424525987453125e-05, "loss": 2.2601, "step": 88920 }, { "epoch": 0.31184943749342864, "grad_norm": 0.5516076683998108, "learning_rate": 3.441401184593278e-05, "loss": 2.2565, "step": 88980 }, { "epoch": 0.31205972032383555, "grad_norm": 0.5606161952018738, "learning_rate": 3.4403497704412435e-05, "loss": 2.2563, "step": 89040 }, { "epoch": 0.31227000315424247, "grad_norm": 0.5703347325325012, "learning_rate": 3.439298356289209e-05, "loss": 2.2505, "step": 89100 }, { "epoch": 0.31248028598464933, "grad_norm": 0.5731121301651001, "learning_rate": 3.438246942137175e-05, "loss": 2.2589, "step": 89160 }, { "epoch": 0.31269056881505625, "grad_norm": 0.5524592995643616, "learning_rate": 3.4371955279851404e-05, "loss": 2.2535, "step": 89220 }, { "epoch": 0.31290085164546316, "grad_norm": 0.5376356244087219, "learning_rate": 3.4361441138331056e-05, "loss": 2.2718, "step": 89280 }, { "epoch": 0.31311113447587, "grad_norm": 0.5426408052444458, "learning_rate": 3.4350926996810715e-05, "loss": 2.2534, "step": 89340 }, { "epoch": 0.31332141730627694, "grad_norm": 0.5779808759689331, "learning_rate": 3.434041285529037e-05, "loss": 2.2609, "step": 89400 }, { "epoch": 0.31353170013668386, "grad_norm": 0.5719578862190247, "learning_rate": 3.432989871377002e-05, "loss": 2.2715, "step": 89460 }, { "epoch": 0.3137419829670907, "grad_norm": 0.5462282299995422, "learning_rate": 3.431938457224968e-05, "loss": 2.2761, "step": 89520 }, { "epoch": 0.31395226579749763, "grad_norm": 0.5807678699493408, "learning_rate": 3.4308870430729336e-05, "loss": 2.2539, "step": 89580 }, { "epoch": 0.31416254862790455, "grad_norm": 0.5386305451393127, "learning_rate": 3.429835628920899e-05, "loss": 2.2604, "step": 89640 }, { "epoch": 0.3143728314583114, "grad_norm": 0.5434944033622742, "learning_rate": 3.428784214768864e-05, "loss": 2.2533, "step": 89700 }, { "epoch": 0.3145831142887183, "grad_norm": 0.5666850209236145, "learning_rate": 3.42773280061683e-05, "loss": 2.2525, "step": 89760 }, { "epoch": 0.31479339711912524, "grad_norm": 0.5859403014183044, "learning_rate": 3.426681386464795e-05, "loss": 2.2715, "step": 89820 }, { "epoch": 0.3150036799495321, "grad_norm": 0.6053626537322998, "learning_rate": 3.425629972312761e-05, "loss": 2.2643, "step": 89880 }, { "epoch": 0.315213962779939, "grad_norm": 0.6361802816390991, "learning_rate": 3.424578558160727e-05, "loss": 2.268, "step": 89940 }, { "epoch": 0.31542424561034593, "grad_norm": 0.6449795961380005, "learning_rate": 3.423527144008692e-05, "loss": 2.2574, "step": 90000 }, { "epoch": 0.3156345284407528, "grad_norm": 0.57269287109375, "learning_rate": 3.422475729856657e-05, "loss": 2.2697, "step": 90060 }, { "epoch": 0.3158448112711597, "grad_norm": 0.5491210222244263, "learning_rate": 3.421424315704623e-05, "loss": 2.2598, "step": 90120 }, { "epoch": 0.3160550941015666, "grad_norm": 0.5665245652198792, "learning_rate": 3.420372901552588e-05, "loss": 2.2456, "step": 90180 }, { "epoch": 0.3162653769319735, "grad_norm": 0.5336186289787292, "learning_rate": 3.419321487400554e-05, "loss": 2.2659, "step": 90240 }, { "epoch": 0.3164756597623804, "grad_norm": 0.539472222328186, "learning_rate": 3.418270073248519e-05, "loss": 2.2555, "step": 90300 }, { "epoch": 0.3166859425927873, "grad_norm": 0.6023342609405518, "learning_rate": 3.417218659096485e-05, "loss": 2.2755, "step": 90360 }, { "epoch": 0.3168962254231942, "grad_norm": 0.5498049855232239, "learning_rate": 3.41616724494445e-05, "loss": 2.2514, "step": 90420 }, { "epoch": 0.3171065082536011, "grad_norm": 0.5622275471687317, "learning_rate": 3.4151158307924155e-05, "loss": 2.2569, "step": 90480 }, { "epoch": 0.317316791084008, "grad_norm": 0.5502556562423706, "learning_rate": 3.4140644166403814e-05, "loss": 2.2784, "step": 90540 }, { "epoch": 0.31752707391441487, "grad_norm": 0.5545588731765747, "learning_rate": 3.413013002488347e-05, "loss": 2.2701, "step": 90600 }, { "epoch": 0.3177373567448218, "grad_norm": 0.5292103886604309, "learning_rate": 3.4119615883363124e-05, "loss": 2.268, "step": 90660 }, { "epoch": 0.3179476395752287, "grad_norm": 0.6121399402618408, "learning_rate": 3.410910174184278e-05, "loss": 2.2552, "step": 90720 }, { "epoch": 0.31815792240563556, "grad_norm": 0.6200298070907593, "learning_rate": 3.4098587600322435e-05, "loss": 2.2734, "step": 90780 }, { "epoch": 0.3183682052360425, "grad_norm": 0.5391733646392822, "learning_rate": 3.408807345880209e-05, "loss": 2.2668, "step": 90840 }, { "epoch": 0.3185784880664494, "grad_norm": 0.5934666395187378, "learning_rate": 3.4077559317281745e-05, "loss": 2.2687, "step": 90900 }, { "epoch": 0.31878877089685626, "grad_norm": 0.5021581053733826, "learning_rate": 3.4067045175761404e-05, "loss": 2.2582, "step": 90960 }, { "epoch": 0.31899905372726317, "grad_norm": 0.7432253956794739, "learning_rate": 3.4056531034241056e-05, "loss": 2.2719, "step": 91020 }, { "epoch": 0.3192093365576701, "grad_norm": 0.5567831993103027, "learning_rate": 3.4046016892720715e-05, "loss": 2.2607, "step": 91080 }, { "epoch": 0.31941961938807695, "grad_norm": 0.5400500893592834, "learning_rate": 3.4035502751200366e-05, "loss": 2.2592, "step": 91140 }, { "epoch": 0.31962990221848386, "grad_norm": 0.5561904907226562, "learning_rate": 3.402498860968002e-05, "loss": 2.2624, "step": 91200 }, { "epoch": 0.3198401850488908, "grad_norm": 0.6148996949195862, "learning_rate": 3.401447446815968e-05, "loss": 2.2486, "step": 91260 }, { "epoch": 0.32005046787929764, "grad_norm": 0.5738281607627869, "learning_rate": 3.4003960326639336e-05, "loss": 2.2401, "step": 91320 }, { "epoch": 0.32026075070970456, "grad_norm": 0.5140102505683899, "learning_rate": 3.399344618511899e-05, "loss": 2.2603, "step": 91380 }, { "epoch": 0.32047103354011147, "grad_norm": 0.6118866205215454, "learning_rate": 3.398293204359864e-05, "loss": 2.2582, "step": 91440 }, { "epoch": 0.32068131637051833, "grad_norm": 0.5432943105697632, "learning_rate": 3.39724179020783e-05, "loss": 2.2531, "step": 91500 }, { "epoch": 0.32089159920092525, "grad_norm": 0.5328633785247803, "learning_rate": 3.396190376055795e-05, "loss": 2.2455, "step": 91560 }, { "epoch": 0.32110188203133216, "grad_norm": 0.5897437930107117, "learning_rate": 3.39513896190376e-05, "loss": 2.2739, "step": 91620 }, { "epoch": 0.321312164861739, "grad_norm": 0.5548875331878662, "learning_rate": 3.394087547751727e-05, "loss": 2.2699, "step": 91680 }, { "epoch": 0.32152244769214594, "grad_norm": 0.5671025514602661, "learning_rate": 3.393036133599692e-05, "loss": 2.2655, "step": 91740 }, { "epoch": 0.32173273052255286, "grad_norm": 0.5613976716995239, "learning_rate": 3.391984719447657e-05, "loss": 2.265, "step": 91800 }, { "epoch": 0.3219430133529597, "grad_norm": 0.5736813545227051, "learning_rate": 3.390933305295623e-05, "loss": 2.2543, "step": 91860 }, { "epoch": 0.32215329618336663, "grad_norm": 0.5219939947128296, "learning_rate": 3.389881891143588e-05, "loss": 2.2588, "step": 91920 }, { "epoch": 0.32236357901377355, "grad_norm": 0.5467992424964905, "learning_rate": 3.3888304769915534e-05, "loss": 2.2552, "step": 91980 }, { "epoch": 0.3225738618441804, "grad_norm": 0.572704017162323, "learning_rate": 3.387779062839519e-05, "loss": 2.2651, "step": 92040 }, { "epoch": 0.3227841446745873, "grad_norm": 0.5442659258842468, "learning_rate": 3.386727648687485e-05, "loss": 2.2542, "step": 92100 }, { "epoch": 0.32299442750499424, "grad_norm": 0.6716965436935425, "learning_rate": 3.38567623453545e-05, "loss": 2.2431, "step": 92160 }, { "epoch": 0.3232047103354011, "grad_norm": 0.5147510170936584, "learning_rate": 3.3846248203834155e-05, "loss": 2.2577, "step": 92220 }, { "epoch": 0.323414993165808, "grad_norm": 0.523707389831543, "learning_rate": 3.3835734062313813e-05, "loss": 2.2588, "step": 92280 }, { "epoch": 0.32362527599621493, "grad_norm": 0.5979717373847961, "learning_rate": 3.3825219920793465e-05, "loss": 2.2678, "step": 92340 }, { "epoch": 0.3238355588266218, "grad_norm": 0.5593013763427734, "learning_rate": 3.381488101496513e-05, "loss": 2.2686, "step": 92400 }, { "epoch": 0.3240458416570287, "grad_norm": 0.5766727924346924, "learning_rate": 3.380436687344479e-05, "loss": 2.2619, "step": 92460 }, { "epoch": 0.3242561244874356, "grad_norm": 0.5491504669189453, "learning_rate": 3.379385273192444e-05, "loss": 2.2583, "step": 92520 }, { "epoch": 0.3244664073178425, "grad_norm": 0.570665717124939, "learning_rate": 3.37833385904041e-05, "loss": 2.2549, "step": 92580 }, { "epoch": 0.3246766901482494, "grad_norm": 0.7060606479644775, "learning_rate": 3.377282444888375e-05, "loss": 2.2543, "step": 92640 }, { "epoch": 0.3248869729786563, "grad_norm": 0.5789251327514648, "learning_rate": 3.37623103073634e-05, "loss": 2.2744, "step": 92700 }, { "epoch": 0.3250972558090632, "grad_norm": 0.6093466281890869, "learning_rate": 3.375179616584306e-05, "loss": 2.2537, "step": 92760 }, { "epoch": 0.3253075386394701, "grad_norm": 0.6546159982681274, "learning_rate": 3.374128202432272e-05, "loss": 2.2615, "step": 92820 }, { "epoch": 0.325517821469877, "grad_norm": 0.5653020739555359, "learning_rate": 3.373076788280237e-05, "loss": 2.2522, "step": 92880 }, { "epoch": 0.32572810430028387, "grad_norm": 0.5708128213882446, "learning_rate": 3.372025374128202e-05, "loss": 2.2478, "step": 92940 }, { "epoch": 0.3259383871306908, "grad_norm": 0.5512128472328186, "learning_rate": 3.370973959976168e-05, "loss": 2.2609, "step": 93000 }, { "epoch": 0.3261486699610977, "grad_norm": 0.5367293953895569, "learning_rate": 3.3699225458241334e-05, "loss": 2.2584, "step": 93060 }, { "epoch": 0.32635895279150456, "grad_norm": 0.5934156775474548, "learning_rate": 3.3688711316720986e-05, "loss": 2.2551, "step": 93120 }, { "epoch": 0.3265692356219115, "grad_norm": 0.5676745772361755, "learning_rate": 3.367819717520065e-05, "loss": 2.2589, "step": 93180 }, { "epoch": 0.32677951845231834, "grad_norm": 0.5735729932785034, "learning_rate": 3.36676830336803e-05, "loss": 2.2476, "step": 93240 }, { "epoch": 0.32698980128272526, "grad_norm": 0.5471116304397583, "learning_rate": 3.3657168892159955e-05, "loss": 2.2497, "step": 93300 }, { "epoch": 0.3272000841131322, "grad_norm": 0.5850885510444641, "learning_rate": 3.3646654750639614e-05, "loss": 2.2593, "step": 93360 }, { "epoch": 0.32741036694353903, "grad_norm": 0.5297808647155762, "learning_rate": 3.3636140609119266e-05, "loss": 2.2455, "step": 93420 }, { "epoch": 0.32762064977394595, "grad_norm": 0.5714202523231506, "learning_rate": 3.362562646759892e-05, "loss": 2.2594, "step": 93480 }, { "epoch": 0.32783093260435286, "grad_norm": 0.6715126633644104, "learning_rate": 3.3615112326078576e-05, "loss": 2.2555, "step": 93540 }, { "epoch": 0.3280412154347597, "grad_norm": 0.5289738178253174, "learning_rate": 3.3604598184558235e-05, "loss": 2.2483, "step": 93600 }, { "epoch": 0.32825149826516664, "grad_norm": 0.5812007784843445, "learning_rate": 3.359408404303789e-05, "loss": 2.2677, "step": 93660 }, { "epoch": 0.32846178109557356, "grad_norm": 0.5916142463684082, "learning_rate": 3.3583569901517545e-05, "loss": 2.2611, "step": 93720 }, { "epoch": 0.3286720639259804, "grad_norm": 0.575390100479126, "learning_rate": 3.35730557599972e-05, "loss": 2.2667, "step": 93780 }, { "epoch": 0.32888234675638733, "grad_norm": 0.559893012046814, "learning_rate": 3.356254161847685e-05, "loss": 2.2486, "step": 93840 }, { "epoch": 0.32909262958679425, "grad_norm": 0.5065498948097229, "learning_rate": 3.355202747695651e-05, "loss": 2.2562, "step": 93900 }, { "epoch": 0.3293029124172011, "grad_norm": 0.6010339856147766, "learning_rate": 3.3541513335436166e-05, "loss": 2.2599, "step": 93960 }, { "epoch": 0.329513195247608, "grad_norm": 0.5235238671302795, "learning_rate": 3.353099919391582e-05, "loss": 2.2639, "step": 94020 }, { "epoch": 0.32972347807801494, "grad_norm": 0.48841866850852966, "learning_rate": 3.352048505239547e-05, "loss": 2.2623, "step": 94080 }, { "epoch": 0.3299337609084218, "grad_norm": 0.5267378687858582, "learning_rate": 3.350997091087513e-05, "loss": 2.2622, "step": 94140 }, { "epoch": 0.3301440437388287, "grad_norm": 0.6126842498779297, "learning_rate": 3.349945676935478e-05, "loss": 2.266, "step": 94200 }, { "epoch": 0.33035432656923563, "grad_norm": 0.5331740379333496, "learning_rate": 3.348894262783444e-05, "loss": 2.2528, "step": 94260 }, { "epoch": 0.3305646093996425, "grad_norm": 0.5459609031677246, "learning_rate": 3.34784284863141e-05, "loss": 2.2501, "step": 94320 }, { "epoch": 0.3307748922300494, "grad_norm": 0.5753666162490845, "learning_rate": 3.346791434479375e-05, "loss": 2.241, "step": 94380 }, { "epoch": 0.3309851750604563, "grad_norm": 0.5683814287185669, "learning_rate": 3.34574002032734e-05, "loss": 2.2516, "step": 94440 }, { "epoch": 0.3311954578908632, "grad_norm": 0.5722650289535522, "learning_rate": 3.3447061297445066e-05, "loss": 2.2594, "step": 94500 }, { "epoch": 0.3314057407212701, "grad_norm": 0.5353830456733704, "learning_rate": 3.343654715592472e-05, "loss": 2.2654, "step": 94560 }, { "epoch": 0.331616023551677, "grad_norm": 0.5557458400726318, "learning_rate": 3.3426033014404376e-05, "loss": 2.2348, "step": 94620 }, { "epoch": 0.3318263063820839, "grad_norm": 0.5775477886199951, "learning_rate": 3.3415518872884035e-05, "loss": 2.2588, "step": 94680 }, { "epoch": 0.3320365892124908, "grad_norm": 0.6219456195831299, "learning_rate": 3.340500473136369e-05, "loss": 2.2453, "step": 94740 }, { "epoch": 0.3322468720428977, "grad_norm": 0.5657910108566284, "learning_rate": 3.339449058984334e-05, "loss": 2.2507, "step": 94800 }, { "epoch": 0.3324571548733046, "grad_norm": 0.5465739369392395, "learning_rate": 3.3383976448323e-05, "loss": 2.2523, "step": 94860 }, { "epoch": 0.3326674377037115, "grad_norm": 0.5367757081985474, "learning_rate": 3.337346230680265e-05, "loss": 2.2658, "step": 94920 }, { "epoch": 0.3328777205341184, "grad_norm": 0.5744025707244873, "learning_rate": 3.33629481652823e-05, "loss": 2.2767, "step": 94980 }, { "epoch": 0.33308800336452526, "grad_norm": 0.5318028330802917, "learning_rate": 3.335243402376197e-05, "loss": 2.2766, "step": 95040 }, { "epoch": 0.3332982861949322, "grad_norm": 0.5595847368240356, "learning_rate": 3.334191988224162e-05, "loss": 2.2378, "step": 95100 }, { "epoch": 0.3335085690253391, "grad_norm": 0.5904885530471802, "learning_rate": 3.333140574072127e-05, "loss": 2.2638, "step": 95160 }, { "epoch": 0.33371885185574596, "grad_norm": 0.5781721472740173, "learning_rate": 3.332089159920093e-05, "loss": 2.2609, "step": 95220 }, { "epoch": 0.3339291346861529, "grad_norm": 0.5953025221824646, "learning_rate": 3.331037745768058e-05, "loss": 2.2427, "step": 95280 }, { "epoch": 0.3341394175165598, "grad_norm": 0.5774268507957458, "learning_rate": 3.329986331616023e-05, "loss": 2.2708, "step": 95340 }, { "epoch": 0.33434970034696665, "grad_norm": 0.6212142705917358, "learning_rate": 3.328934917463989e-05, "loss": 2.2473, "step": 95400 }, { "epoch": 0.33455998317737357, "grad_norm": 0.6178370714187622, "learning_rate": 3.327883503311955e-05, "loss": 2.2389, "step": 95460 }, { "epoch": 0.3347702660077805, "grad_norm": 0.630793035030365, "learning_rate": 3.32683208915992e-05, "loss": 2.2598, "step": 95520 }, { "epoch": 0.33498054883818734, "grad_norm": 0.6330223083496094, "learning_rate": 3.3257806750078854e-05, "loss": 2.2453, "step": 95580 }, { "epoch": 0.33519083166859426, "grad_norm": 0.6235046982765198, "learning_rate": 3.324729260855851e-05, "loss": 2.2458, "step": 95640 }, { "epoch": 0.3354011144990012, "grad_norm": 0.5611142516136169, "learning_rate": 3.3236778467038165e-05, "loss": 2.2608, "step": 95700 }, { "epoch": 0.33561139732940803, "grad_norm": 0.5523935556411743, "learning_rate": 3.322626432551782e-05, "loss": 2.2592, "step": 95760 }, { "epoch": 0.33582168015981495, "grad_norm": 0.5253689289093018, "learning_rate": 3.321575018399748e-05, "loss": 2.2541, "step": 95820 }, { "epoch": 0.33603196299022187, "grad_norm": 0.578079104423523, "learning_rate": 3.3205236042477134e-05, "loss": 2.2629, "step": 95880 }, { "epoch": 0.3362422458206287, "grad_norm": 0.5487265586853027, "learning_rate": 3.3194721900956786e-05, "loss": 2.2482, "step": 95940 }, { "epoch": 0.33645252865103564, "grad_norm": 0.5192381739616394, "learning_rate": 3.3184207759436444e-05, "loss": 2.2556, "step": 96000 }, { "epoch": 0.33666281148144256, "grad_norm": 0.6124681234359741, "learning_rate": 3.3173693617916096e-05, "loss": 2.2545, "step": 96060 }, { "epoch": 0.3368730943118494, "grad_norm": 0.5543643832206726, "learning_rate": 3.3163179476395755e-05, "loss": 2.2566, "step": 96120 }, { "epoch": 0.33708337714225634, "grad_norm": 0.5779359936714172, "learning_rate": 3.315266533487541e-05, "loss": 2.2612, "step": 96180 }, { "epoch": 0.33729365997266325, "grad_norm": 0.5384775400161743, "learning_rate": 3.3142151193355066e-05, "loss": 2.258, "step": 96240 }, { "epoch": 0.3375039428030701, "grad_norm": 0.6284984946250916, "learning_rate": 3.313163705183472e-05, "loss": 2.2617, "step": 96300 }, { "epoch": 0.337714225633477, "grad_norm": 0.5733190774917603, "learning_rate": 3.312112291031437e-05, "loss": 2.2411, "step": 96360 }, { "epoch": 0.33792450846388394, "grad_norm": 0.5528522729873657, "learning_rate": 3.311060876879403e-05, "loss": 2.2512, "step": 96420 }, { "epoch": 0.3381347912942908, "grad_norm": 0.6423466205596924, "learning_rate": 3.310009462727369e-05, "loss": 2.2564, "step": 96480 }, { "epoch": 0.3383450741246977, "grad_norm": 0.5352001786231995, "learning_rate": 3.308975572144535e-05, "loss": 2.2525, "step": 96540 }, { "epoch": 0.33855535695510464, "grad_norm": 0.5893850922584534, "learning_rate": 3.3079241579925e-05, "loss": 2.2474, "step": 96600 }, { "epoch": 0.3387656397855115, "grad_norm": 0.5544783473014832, "learning_rate": 3.3068727438404654e-05, "loss": 2.2595, "step": 96660 }, { "epoch": 0.3389759226159184, "grad_norm": 0.5462645292282104, "learning_rate": 3.305821329688431e-05, "loss": 2.2581, "step": 96720 }, { "epoch": 0.33918620544632533, "grad_norm": 0.5372522473335266, "learning_rate": 3.3047699155363965e-05, "loss": 2.2553, "step": 96780 }, { "epoch": 0.3393964882767322, "grad_norm": 0.5661049485206604, "learning_rate": 3.303718501384362e-05, "loss": 2.2475, "step": 96840 }, { "epoch": 0.3396067711071391, "grad_norm": 0.5360636711120605, "learning_rate": 3.3026670872323276e-05, "loss": 2.2632, "step": 96900 }, { "epoch": 0.339817053937546, "grad_norm": 0.5690281987190247, "learning_rate": 3.3016156730802934e-05, "loss": 2.2427, "step": 96960 }, { "epoch": 0.3400273367679529, "grad_norm": 0.5777586102485657, "learning_rate": 3.3005642589282586e-05, "loss": 2.2635, "step": 97020 }, { "epoch": 0.3402376195983598, "grad_norm": 0.570685625076294, "learning_rate": 3.299512844776224e-05, "loss": 2.2486, "step": 97080 }, { "epoch": 0.3404479024287667, "grad_norm": 0.5590705275535583, "learning_rate": 3.29846143062419e-05, "loss": 2.259, "step": 97140 }, { "epoch": 0.3406581852591736, "grad_norm": 0.5950242877006531, "learning_rate": 3.297410016472155e-05, "loss": 2.2606, "step": 97200 }, { "epoch": 0.3408684680895805, "grad_norm": 0.5764995813369751, "learning_rate": 3.296358602320121e-05, "loss": 2.2652, "step": 97260 }, { "epoch": 0.3410787509199874, "grad_norm": 0.5674739480018616, "learning_rate": 3.2953071881680866e-05, "loss": 2.2421, "step": 97320 }, { "epoch": 0.34128903375039427, "grad_norm": 0.5347117781639099, "learning_rate": 3.294255774016052e-05, "loss": 2.2452, "step": 97380 }, { "epoch": 0.3414993165808012, "grad_norm": 0.5449021458625793, "learning_rate": 3.293204359864017e-05, "loss": 2.2509, "step": 97440 }, { "epoch": 0.3417095994112081, "grad_norm": 0.5273362994194031, "learning_rate": 3.292152945711983e-05, "loss": 2.2523, "step": 97500 }, { "epoch": 0.34191988224161496, "grad_norm": 0.5723702907562256, "learning_rate": 3.291101531559948e-05, "loss": 2.248, "step": 97560 }, { "epoch": 0.3421301650720219, "grad_norm": 0.5732927918434143, "learning_rate": 3.290050117407914e-05, "loss": 2.2491, "step": 97620 }, { "epoch": 0.3423404479024288, "grad_norm": 0.5697137117385864, "learning_rate": 3.28899870325588e-05, "loss": 2.2542, "step": 97680 }, { "epoch": 0.34255073073283565, "grad_norm": 0.5517762899398804, "learning_rate": 3.287947289103845e-05, "loss": 2.258, "step": 97740 }, { "epoch": 0.34276101356324257, "grad_norm": 0.5736147165298462, "learning_rate": 3.28689587495181e-05, "loss": 2.2695, "step": 97800 }, { "epoch": 0.3429712963936495, "grad_norm": 0.5735359191894531, "learning_rate": 3.285844460799776e-05, "loss": 2.2411, "step": 97860 }, { "epoch": 0.34318157922405634, "grad_norm": 0.5607940554618835, "learning_rate": 3.284793046647741e-05, "loss": 2.2488, "step": 97920 }, { "epoch": 0.34339186205446326, "grad_norm": 0.5465531349182129, "learning_rate": 3.283741632495707e-05, "loss": 2.2673, "step": 97980 }, { "epoch": 0.3436021448848702, "grad_norm": 0.6117886900901794, "learning_rate": 3.282690218343672e-05, "loss": 2.2608, "step": 98040 }, { "epoch": 0.34381242771527704, "grad_norm": 0.5727432370185852, "learning_rate": 3.281638804191638e-05, "loss": 2.2667, "step": 98100 }, { "epoch": 0.34402271054568395, "grad_norm": 0.5974268913269043, "learning_rate": 3.280587390039603e-05, "loss": 2.2436, "step": 98160 }, { "epoch": 0.34423299337609087, "grad_norm": 0.5546492338180542, "learning_rate": 3.2795359758875685e-05, "loss": 2.2442, "step": 98220 }, { "epoch": 0.34444327620649773, "grad_norm": 0.5560635328292847, "learning_rate": 3.2784845617355344e-05, "loss": 2.2408, "step": 98280 }, { "epoch": 0.34465355903690464, "grad_norm": 0.570353090763092, "learning_rate": 3.2774331475835e-05, "loss": 2.2486, "step": 98340 }, { "epoch": 0.34486384186731156, "grad_norm": 0.6234951615333557, "learning_rate": 3.2763817334314654e-05, "loss": 2.2398, "step": 98400 }, { "epoch": 0.3450741246977184, "grad_norm": 0.5658458471298218, "learning_rate": 3.275330319279431e-05, "loss": 2.2492, "step": 98460 }, { "epoch": 0.34528440752812534, "grad_norm": 0.6331504583358765, "learning_rate": 3.2742789051273965e-05, "loss": 2.2568, "step": 98520 }, { "epoch": 0.34549469035853225, "grad_norm": 0.5121544003486633, "learning_rate": 3.273245014544563e-05, "loss": 2.2432, "step": 98580 }, { "epoch": 0.3457049731889391, "grad_norm": 0.5852991342544556, "learning_rate": 3.272193600392528e-05, "loss": 2.2823, "step": 98640 }, { "epoch": 0.34591525601934603, "grad_norm": 0.5442538261413574, "learning_rate": 3.271142186240493e-05, "loss": 2.2429, "step": 98700 }, { "epoch": 0.34612553884975295, "grad_norm": 0.5579994320869446, "learning_rate": 3.270090772088459e-05, "loss": 2.235, "step": 98760 }, { "epoch": 0.3463358216801598, "grad_norm": 0.5348595976829529, "learning_rate": 3.269039357936425e-05, "loss": 2.247, "step": 98820 }, { "epoch": 0.3465461045105667, "grad_norm": 0.5732651352882385, "learning_rate": 3.26798794378439e-05, "loss": 2.249, "step": 98880 }, { "epoch": 0.3467563873409736, "grad_norm": 0.5671512484550476, "learning_rate": 3.2669365296323553e-05, "loss": 2.2515, "step": 98940 }, { "epoch": 0.3469666701713805, "grad_norm": 0.5615571737289429, "learning_rate": 3.265885115480321e-05, "loss": 2.2373, "step": 99000 }, { "epoch": 0.3471769530017874, "grad_norm": 0.5443949699401855, "learning_rate": 3.2648337013282864e-05, "loss": 2.2428, "step": 99060 }, { "epoch": 0.3473872358321943, "grad_norm": 0.5558658838272095, "learning_rate": 3.263782287176252e-05, "loss": 2.2556, "step": 99120 }, { "epoch": 0.3475975186626012, "grad_norm": 0.7935243248939514, "learning_rate": 3.262730873024218e-05, "loss": 2.2481, "step": 99180 }, { "epoch": 0.3478078014930081, "grad_norm": 0.5348489880561829, "learning_rate": 3.261679458872183e-05, "loss": 2.2458, "step": 99240 }, { "epoch": 0.34801808432341497, "grad_norm": 0.5577119588851929, "learning_rate": 3.2606280447201485e-05, "loss": 2.2317, "step": 99300 }, { "epoch": 0.3482283671538219, "grad_norm": 0.6064680218696594, "learning_rate": 3.2595766305681144e-05, "loss": 2.2508, "step": 99360 }, { "epoch": 0.3484386499842288, "grad_norm": 0.534321129322052, "learning_rate": 3.2585252164160796e-05, "loss": 2.2448, "step": 99420 }, { "epoch": 0.34864893281463566, "grad_norm": 0.6524277925491333, "learning_rate": 3.2574738022640454e-05, "loss": 2.2563, "step": 99480 }, { "epoch": 0.3488592156450426, "grad_norm": 0.5712284445762634, "learning_rate": 3.2564223881120106e-05, "loss": 2.2582, "step": 99540 }, { "epoch": 0.3490694984754495, "grad_norm": 0.6521320343017578, "learning_rate": 3.2553709739599765e-05, "loss": 2.2551, "step": 99600 }, { "epoch": 0.34927978130585635, "grad_norm": 0.5247908234596252, "learning_rate": 3.254319559807942e-05, "loss": 2.2573, "step": 99660 }, { "epoch": 0.34949006413626327, "grad_norm": 0.5874356627464294, "learning_rate": 3.253268145655907e-05, "loss": 2.2516, "step": 99720 }, { "epoch": 0.3497003469666702, "grad_norm": 0.5620294213294983, "learning_rate": 3.252216731503873e-05, "loss": 2.2459, "step": 99780 }, { "epoch": 0.34991062979707704, "grad_norm": 0.5107635259628296, "learning_rate": 3.2511653173518386e-05, "loss": 2.2529, "step": 99840 }, { "epoch": 0.35012091262748396, "grad_norm": 0.5404499173164368, "learning_rate": 3.250113903199804e-05, "loss": 2.254, "step": 99900 }, { "epoch": 0.3503311954578909, "grad_norm": 0.6408905982971191, "learning_rate": 3.24906248904777e-05, "loss": 2.2524, "step": 99960 }, { "epoch": 0.35054147828829774, "grad_norm": 0.5619672536849976, "learning_rate": 3.248011074895735e-05, "loss": 2.262, "step": 100020 }, { "epoch": 0.35075176111870465, "grad_norm": 0.5574461817741394, "learning_rate": 3.2469596607437e-05, "loss": 2.2523, "step": 100080 }, { "epoch": 0.35096204394911157, "grad_norm": 0.5604238510131836, "learning_rate": 3.245908246591666e-05, "loss": 2.25, "step": 100140 }, { "epoch": 0.35117232677951843, "grad_norm": 0.5675678849220276, "learning_rate": 3.244856832439632e-05, "loss": 2.2511, "step": 100200 }, { "epoch": 0.35138260960992534, "grad_norm": 0.5655011534690857, "learning_rate": 3.243805418287597e-05, "loss": 2.2651, "step": 100260 }, { "epoch": 0.35159289244033226, "grad_norm": 0.5608187317848206, "learning_rate": 3.2427715277047634e-05, "loss": 2.2427, "step": 100320 }, { "epoch": 0.3518031752707391, "grad_norm": 0.597961962223053, "learning_rate": 3.2417201135527285e-05, "loss": 2.2632, "step": 100380 }, { "epoch": 0.35201345810114604, "grad_norm": 0.6023119688034058, "learning_rate": 3.240668699400694e-05, "loss": 2.2564, "step": 100440 }, { "epoch": 0.35222374093155295, "grad_norm": 0.5368738174438477, "learning_rate": 3.2396172852486596e-05, "loss": 2.2621, "step": 100500 }, { "epoch": 0.3524340237619598, "grad_norm": 0.5581424832344055, "learning_rate": 3.238565871096625e-05, "loss": 2.2489, "step": 100560 }, { "epoch": 0.35264430659236673, "grad_norm": 0.5462180376052856, "learning_rate": 3.2375144569445907e-05, "loss": 2.2416, "step": 100620 }, { "epoch": 0.35285458942277365, "grad_norm": 0.5432653427124023, "learning_rate": 3.2364630427925565e-05, "loss": 2.2484, "step": 100680 }, { "epoch": 0.3530648722531805, "grad_norm": 0.7162202596664429, "learning_rate": 3.235411628640522e-05, "loss": 2.2335, "step": 100740 }, { "epoch": 0.3532751550835874, "grad_norm": 0.5798888802528381, "learning_rate": 3.234360214488487e-05, "loss": 2.2471, "step": 100800 }, { "epoch": 0.35348543791399434, "grad_norm": 0.56444251537323, "learning_rate": 3.233308800336453e-05, "loss": 2.2548, "step": 100860 }, { "epoch": 0.3536957207444012, "grad_norm": 0.5319738388061523, "learning_rate": 3.232257386184418e-05, "loss": 2.2546, "step": 100920 }, { "epoch": 0.3539060035748081, "grad_norm": 0.5734124779701233, "learning_rate": 3.231205972032384e-05, "loss": 2.2415, "step": 100980 }, { "epoch": 0.35411628640521503, "grad_norm": 0.5583657026290894, "learning_rate": 3.23015455788035e-05, "loss": 2.2401, "step": 101040 }, { "epoch": 0.3543265692356219, "grad_norm": 0.5711774826049805, "learning_rate": 3.229103143728315e-05, "loss": 2.2468, "step": 101100 }, { "epoch": 0.3545368520660288, "grad_norm": 0.5239578485488892, "learning_rate": 3.22805172957628e-05, "loss": 2.2571, "step": 101160 }, { "epoch": 0.3547471348964357, "grad_norm": 0.56839919090271, "learning_rate": 3.227000315424246e-05, "loss": 2.2466, "step": 101220 }, { "epoch": 0.3549574177268426, "grad_norm": 0.585908830165863, "learning_rate": 3.225948901272211e-05, "loss": 2.2559, "step": 101280 }, { "epoch": 0.3551677005572495, "grad_norm": 0.628909707069397, "learning_rate": 3.224897487120177e-05, "loss": 2.2551, "step": 101340 }, { "epoch": 0.3553779833876564, "grad_norm": 0.6182568073272705, "learning_rate": 3.223846072968142e-05, "loss": 2.2379, "step": 101400 }, { "epoch": 0.3555882662180633, "grad_norm": 0.5343201160430908, "learning_rate": 3.222794658816108e-05, "loss": 2.2346, "step": 101460 }, { "epoch": 0.3557985490484702, "grad_norm": 0.5548133850097656, "learning_rate": 3.221743244664073e-05, "loss": 2.2461, "step": 101520 }, { "epoch": 0.3560088318788771, "grad_norm": 0.6163520812988281, "learning_rate": 3.2206918305120384e-05, "loss": 2.2595, "step": 101580 }, { "epoch": 0.35621911470928397, "grad_norm": 0.5295227766036987, "learning_rate": 3.219640416360004e-05, "loss": 2.2445, "step": 101640 }, { "epoch": 0.3564293975396909, "grad_norm": 0.5932655930519104, "learning_rate": 3.21858900220797e-05, "loss": 2.2535, "step": 101700 }, { "epoch": 0.3566396803700978, "grad_norm": 0.5422408580780029, "learning_rate": 3.2175375880559353e-05, "loss": 2.2535, "step": 101760 }, { "epoch": 0.35684996320050466, "grad_norm": 0.5753630995750427, "learning_rate": 3.216486173903901e-05, "loss": 2.2419, "step": 101820 }, { "epoch": 0.3570602460309116, "grad_norm": 0.5593083500862122, "learning_rate": 3.2154347597518664e-05, "loss": 2.2441, "step": 101880 }, { "epoch": 0.3572705288613185, "grad_norm": 0.6243857145309448, "learning_rate": 3.2143833455998316e-05, "loss": 2.2362, "step": 101940 }, { "epoch": 0.35748081169172535, "grad_norm": 0.5592348575592041, "learning_rate": 3.2133319314477975e-05, "loss": 2.2408, "step": 102000 }, { "epoch": 0.35769109452213227, "grad_norm": 0.5877608060836792, "learning_rate": 3.212280517295763e-05, "loss": 2.2413, "step": 102060 }, { "epoch": 0.3579013773525392, "grad_norm": 0.5965809226036072, "learning_rate": 3.2112291031437285e-05, "loss": 2.2449, "step": 102120 }, { "epoch": 0.35811166018294605, "grad_norm": 0.5869651436805725, "learning_rate": 3.210177688991694e-05, "loss": 2.2437, "step": 102180 }, { "epoch": 0.35832194301335296, "grad_norm": 0.6029491424560547, "learning_rate": 3.2091262748396596e-05, "loss": 2.2413, "step": 102240 }, { "epoch": 0.3585322258437599, "grad_norm": 0.628241777420044, "learning_rate": 3.208074860687625e-05, "loss": 2.251, "step": 102300 }, { "epoch": 0.35874250867416674, "grad_norm": 0.5701780319213867, "learning_rate": 3.20702344653559e-05, "loss": 2.2615, "step": 102360 }, { "epoch": 0.35895279150457365, "grad_norm": 0.5451123714447021, "learning_rate": 3.2059720323835565e-05, "loss": 2.2572, "step": 102420 }, { "epoch": 0.35916307433498057, "grad_norm": 0.5642544627189636, "learning_rate": 3.204920618231522e-05, "loss": 2.2575, "step": 102480 }, { "epoch": 0.35937335716538743, "grad_norm": 0.6085901260375977, "learning_rate": 3.203869204079487e-05, "loss": 2.2534, "step": 102540 }, { "epoch": 0.35958363999579435, "grad_norm": Infinity, "learning_rate": 3.202835313496653e-05, "loss": 2.2543, "step": 102600 }, { "epoch": 0.35979392282620126, "grad_norm": 0.6252976059913635, "learning_rate": 3.2017838993446185e-05, "loss": 2.2496, "step": 102660 }, { "epoch": 0.3600042056566081, "grad_norm": 0.6806179285049438, "learning_rate": 3.200732485192584e-05, "loss": 2.255, "step": 102720 }, { "epoch": 0.36021448848701504, "grad_norm": 0.5911528468132019, "learning_rate": 3.1996810710405495e-05, "loss": 2.247, "step": 102780 }, { "epoch": 0.36042477131742195, "grad_norm": 0.5509805083274841, "learning_rate": 3.1986296568885154e-05, "loss": 2.2584, "step": 102840 }, { "epoch": 0.3606350541478288, "grad_norm": 0.5932069420814514, "learning_rate": 3.1975782427364806e-05, "loss": 2.2533, "step": 102900 }, { "epoch": 0.36084533697823573, "grad_norm": 0.5719885230064392, "learning_rate": 3.1965268285844464e-05, "loss": 2.2415, "step": 102960 }, { "epoch": 0.36105561980864265, "grad_norm": 0.5080400705337524, "learning_rate": 3.1954754144324116e-05, "loss": 2.257, "step": 103020 }, { "epoch": 0.3612659026390495, "grad_norm": 0.5492014288902283, "learning_rate": 3.194424000280377e-05, "loss": 2.2447, "step": 103080 }, { "epoch": 0.3614761854694564, "grad_norm": 0.5836805105209351, "learning_rate": 3.193372586128343e-05, "loss": 2.2559, "step": 103140 }, { "epoch": 0.36168646829986334, "grad_norm": 0.563239574432373, "learning_rate": 3.1923211719763085e-05, "loss": 2.2493, "step": 103200 }, { "epoch": 0.3618967511302702, "grad_norm": 0.5607362389564514, "learning_rate": 3.191269757824274e-05, "loss": 2.2417, "step": 103260 }, { "epoch": 0.3621070339606771, "grad_norm": 0.5219749808311462, "learning_rate": 3.1902183436722396e-05, "loss": 2.2553, "step": 103320 }, { "epoch": 0.36231731679108403, "grad_norm": 0.5371667742729187, "learning_rate": 3.189166929520205e-05, "loss": 2.2527, "step": 103380 }, { "epoch": 0.3625275996214909, "grad_norm": 0.6101118326187134, "learning_rate": 3.18811551536817e-05, "loss": 2.2503, "step": 103440 }, { "epoch": 0.3627378824518978, "grad_norm": 0.578412652015686, "learning_rate": 3.187064101216136e-05, "loss": 2.2537, "step": 103500 }, { "epoch": 0.3629481652823047, "grad_norm": 0.5483526587486267, "learning_rate": 3.186012687064102e-05, "loss": 2.2547, "step": 103560 }, { "epoch": 0.3631584481127116, "grad_norm": 0.5780754685401917, "learning_rate": 3.184961272912067e-05, "loss": 2.2482, "step": 103620 }, { "epoch": 0.3633687309431185, "grad_norm": 0.5565093159675598, "learning_rate": 3.183909858760033e-05, "loss": 2.2445, "step": 103680 }, { "epoch": 0.3635790137735254, "grad_norm": 0.5821536183357239, "learning_rate": 3.182858444607998e-05, "loss": 2.2456, "step": 103740 }, { "epoch": 0.3637892966039323, "grad_norm": 0.5497275590896606, "learning_rate": 3.181807030455963e-05, "loss": 2.2416, "step": 103800 }, { "epoch": 0.3639995794343392, "grad_norm": 0.5313774347305298, "learning_rate": 3.180755616303929e-05, "loss": 2.2555, "step": 103860 }, { "epoch": 0.3642098622647461, "grad_norm": 0.5351090431213379, "learning_rate": 3.179704202151895e-05, "loss": 2.2355, "step": 103920 }, { "epoch": 0.36442014509515297, "grad_norm": 0.5801105499267578, "learning_rate": 3.17865278799986e-05, "loss": 2.2415, "step": 103980 }, { "epoch": 0.3646304279255599, "grad_norm": 0.5337709784507751, "learning_rate": 3.177601373847825e-05, "loss": 2.2519, "step": 104040 }, { "epoch": 0.3648407107559668, "grad_norm": 0.6514520049095154, "learning_rate": 3.176549959695791e-05, "loss": 2.2415, "step": 104100 }, { "epoch": 0.36505099358637366, "grad_norm": 0.5500940680503845, "learning_rate": 3.175498545543756e-05, "loss": 2.2488, "step": 104160 }, { "epoch": 0.3652612764167806, "grad_norm": 0.6053885221481323, "learning_rate": 3.1744471313917215e-05, "loss": 2.2521, "step": 104220 }, { "epoch": 0.3654715592471875, "grad_norm": 0.5517741441726685, "learning_rate": 3.173395717239688e-05, "loss": 2.2446, "step": 104280 }, { "epoch": 0.36568184207759435, "grad_norm": 0.5213547348976135, "learning_rate": 3.172344303087653e-05, "loss": 2.2482, "step": 104340 }, { "epoch": 0.36589212490800127, "grad_norm": 0.5915991067886353, "learning_rate": 3.1712928889356184e-05, "loss": 2.2425, "step": 104400 }, { "epoch": 0.3661024077384082, "grad_norm": 0.5863001346588135, "learning_rate": 3.170241474783584e-05, "loss": 2.2535, "step": 104460 }, { "epoch": 0.36631269056881505, "grad_norm": 0.5261284708976746, "learning_rate": 3.1691900606315495e-05, "loss": 2.2545, "step": 104520 }, { "epoch": 0.36652297339922196, "grad_norm": 0.5854162573814392, "learning_rate": 3.168138646479515e-05, "loss": 2.2545, "step": 104580 }, { "epoch": 0.3667332562296288, "grad_norm": 0.5391114354133606, "learning_rate": 3.1670872323274805e-05, "loss": 2.2462, "step": 104640 }, { "epoch": 0.36694353906003574, "grad_norm": 0.551304280757904, "learning_rate": 3.1660358181754464e-05, "loss": 2.2438, "step": 104700 }, { "epoch": 0.36715382189044266, "grad_norm": 0.5472719073295593, "learning_rate": 3.1649844040234116e-05, "loss": 2.2427, "step": 104760 }, { "epoch": 0.3673641047208495, "grad_norm": 0.5311259031295776, "learning_rate": 3.163932989871377e-05, "loss": 2.2521, "step": 104820 }, { "epoch": 0.36757438755125643, "grad_norm": 0.6197320818901062, "learning_rate": 3.1628815757193427e-05, "loss": 2.2467, "step": 104880 }, { "epoch": 0.36778467038166335, "grad_norm": 0.5505039095878601, "learning_rate": 3.161830161567308e-05, "loss": 2.2456, "step": 104940 }, { "epoch": 0.3679949532120702, "grad_norm": 0.5435019135475159, "learning_rate": 3.160778747415274e-05, "loss": 2.2502, "step": 105000 }, { "epoch": 0.3682052360424771, "grad_norm": 0.5461676716804504, "learning_rate": 3.1597273332632396e-05, "loss": 2.2565, "step": 105060 }, { "epoch": 0.36841551887288404, "grad_norm": 0.5456289649009705, "learning_rate": 3.158693442680405e-05, "loss": 2.2574, "step": 105120 }, { "epoch": 0.3686258017032909, "grad_norm": 0.4910428524017334, "learning_rate": 3.157642028528371e-05, "loss": 2.2321, "step": 105180 }, { "epoch": 0.3688360845336978, "grad_norm": 0.5281582474708557, "learning_rate": 3.1565906143763363e-05, "loss": 2.237, "step": 105240 }, { "epoch": 0.36904636736410473, "grad_norm": 0.5398959517478943, "learning_rate": 3.1555392002243015e-05, "loss": 2.2487, "step": 105300 }, { "epoch": 0.3692566501945116, "grad_norm": 0.5760674476623535, "learning_rate": 3.1544877860722674e-05, "loss": 2.2384, "step": 105360 }, { "epoch": 0.3694669330249185, "grad_norm": 0.5500927567481995, "learning_rate": 3.153436371920233e-05, "loss": 2.2577, "step": 105420 }, { "epoch": 0.3696772158553254, "grad_norm": 0.5500715970993042, "learning_rate": 3.1523849577681985e-05, "loss": 2.2433, "step": 105480 }, { "epoch": 0.3698874986857323, "grad_norm": 0.5827727317810059, "learning_rate": 3.1513335436161636e-05, "loss": 2.2465, "step": 105540 }, { "epoch": 0.3700977815161392, "grad_norm": 0.5601902604103088, "learning_rate": 3.1502821294641295e-05, "loss": 2.25, "step": 105600 }, { "epoch": 0.3703080643465461, "grad_norm": 0.561234712600708, "learning_rate": 3.149230715312095e-05, "loss": 2.2434, "step": 105660 }, { "epoch": 0.370518347176953, "grad_norm": 0.5326609015464783, "learning_rate": 3.14817930116006e-05, "loss": 2.2481, "step": 105720 }, { "epoch": 0.3707286300073599, "grad_norm": 0.6148693561553955, "learning_rate": 3.1471278870080264e-05, "loss": 2.2336, "step": 105780 }, { "epoch": 0.3709389128377668, "grad_norm": 0.5935849547386169, "learning_rate": 3.1460764728559916e-05, "loss": 2.2555, "step": 105840 }, { "epoch": 0.37114919566817367, "grad_norm": 0.5288058519363403, "learning_rate": 3.145025058703957e-05, "loss": 2.2413, "step": 105900 }, { "epoch": 0.3713594784985806, "grad_norm": 0.5751053690910339, "learning_rate": 3.143973644551923e-05, "loss": 2.2566, "step": 105960 }, { "epoch": 0.3715697613289875, "grad_norm": 0.528931200504303, "learning_rate": 3.142922230399888e-05, "loss": 2.236, "step": 106020 }, { "epoch": 0.37178004415939436, "grad_norm": 0.5181067585945129, "learning_rate": 3.141870816247853e-05, "loss": 2.2493, "step": 106080 }, { "epoch": 0.3719903269898013, "grad_norm": 0.5679474472999573, "learning_rate": 3.1408194020958196e-05, "loss": 2.2428, "step": 106140 }, { "epoch": 0.3722006098202082, "grad_norm": 0.5477309226989746, "learning_rate": 3.139767987943785e-05, "loss": 2.2476, "step": 106200 }, { "epoch": 0.37241089265061506, "grad_norm": 0.531114399433136, "learning_rate": 3.13871657379175e-05, "loss": 2.2521, "step": 106260 }, { "epoch": 0.37262117548102197, "grad_norm": 0.6383332014083862, "learning_rate": 3.137665159639716e-05, "loss": 2.2458, "step": 106320 }, { "epoch": 0.3728314583114289, "grad_norm": 0.6344099640846252, "learning_rate": 3.136613745487681e-05, "loss": 2.2645, "step": 106380 }, { "epoch": 0.37304174114183575, "grad_norm": 0.5700390934944153, "learning_rate": 3.135562331335646e-05, "loss": 2.2407, "step": 106440 }, { "epoch": 0.37325202397224266, "grad_norm": 0.5498290061950684, "learning_rate": 3.134510917183612e-05, "loss": 2.2249, "step": 106500 }, { "epoch": 0.3734623068026496, "grad_norm": 0.560438871383667, "learning_rate": 3.133459503031578e-05, "loss": 2.2556, "step": 106560 }, { "epoch": 0.37367258963305644, "grad_norm": 0.5165132284164429, "learning_rate": 3.132408088879543e-05, "loss": 2.231, "step": 106620 }, { "epoch": 0.37388287246346336, "grad_norm": 0.7113255262374878, "learning_rate": 3.131356674727508e-05, "loss": 2.2551, "step": 106680 }, { "epoch": 0.37409315529387027, "grad_norm": 0.5605031847953796, "learning_rate": 3.130305260575474e-05, "loss": 2.2449, "step": 106740 }, { "epoch": 0.37430343812427713, "grad_norm": 0.49553272128105164, "learning_rate": 3.1292538464234394e-05, "loss": 2.2307, "step": 106800 }, { "epoch": 0.37451372095468405, "grad_norm": 0.558988094329834, "learning_rate": 3.128202432271405e-05, "loss": 2.2406, "step": 106860 }, { "epoch": 0.37472400378509096, "grad_norm": 0.5793077349662781, "learning_rate": 3.127151018119371e-05, "loss": 2.2598, "step": 106920 }, { "epoch": 0.3749342866154978, "grad_norm": 0.6070676445960999, "learning_rate": 3.126099603967336e-05, "loss": 2.246, "step": 106980 }, { "epoch": 0.37514456944590474, "grad_norm": 0.5537981390953064, "learning_rate": 3.1250481898153015e-05, "loss": 2.2416, "step": 107040 }, { "epoch": 0.37535485227631166, "grad_norm": 0.6169135570526123, "learning_rate": 3.1239967756632674e-05, "loss": 2.2434, "step": 107100 }, { "epoch": 0.3755651351067185, "grad_norm": 0.546450138092041, "learning_rate": 3.1229453615112326e-05, "loss": 2.2498, "step": 107160 }, { "epoch": 0.37577541793712543, "grad_norm": 0.5148147940635681, "learning_rate": 3.1218939473591984e-05, "loss": 2.2508, "step": 107220 }, { "epoch": 0.37598570076753235, "grad_norm": 0.5672466158866882, "learning_rate": 3.1208425332071636e-05, "loss": 2.2454, "step": 107280 }, { "epoch": 0.3761959835979392, "grad_norm": 0.5429059267044067, "learning_rate": 3.1197911190551295e-05, "loss": 2.2466, "step": 107340 }, { "epoch": 0.3764062664283461, "grad_norm": 0.6130840182304382, "learning_rate": 3.118739704903095e-05, "loss": 2.2551, "step": 107400 }, { "epoch": 0.37661654925875304, "grad_norm": 0.5849588513374329, "learning_rate": 3.11768829075106e-05, "loss": 2.2551, "step": 107460 }, { "epoch": 0.3768268320891599, "grad_norm": 0.6018226146697998, "learning_rate": 3.116636876599026e-05, "loss": 2.2457, "step": 107520 }, { "epoch": 0.3770371149195668, "grad_norm": 0.5993386507034302, "learning_rate": 3.1155854624469916e-05, "loss": 2.2304, "step": 107580 }, { "epoch": 0.37724739774997373, "grad_norm": 0.6036729216575623, "learning_rate": 3.114534048294957e-05, "loss": 2.2313, "step": 107640 }, { "epoch": 0.3774576805803806, "grad_norm": 0.5473097562789917, "learning_rate": 3.1134826341429227e-05, "loss": 2.2337, "step": 107700 }, { "epoch": 0.3776679634107875, "grad_norm": 0.526314914226532, "learning_rate": 3.112431219990888e-05, "loss": 2.2359, "step": 107760 }, { "epoch": 0.3778782462411944, "grad_norm": 0.5568339824676514, "learning_rate": 3.111379805838853e-05, "loss": 2.232, "step": 107820 }, { "epoch": 0.3780885290716013, "grad_norm": 0.5934057235717773, "learning_rate": 3.110328391686819e-05, "loss": 2.249, "step": 107880 }, { "epoch": 0.3782988119020082, "grad_norm": 0.5619835257530212, "learning_rate": 3.109276977534785e-05, "loss": 2.2361, "step": 107940 }, { "epoch": 0.3785090947324151, "grad_norm": 0.5756205320358276, "learning_rate": 3.10822556338275e-05, "loss": 2.238, "step": 108000 }, { "epoch": 0.378719377562822, "grad_norm": 0.5448141694068909, "learning_rate": 3.107174149230715e-05, "loss": 2.2498, "step": 108060 }, { "epoch": 0.3789296603932289, "grad_norm": 0.5649576783180237, "learning_rate": 3.106122735078681e-05, "loss": 2.2352, "step": 108120 }, { "epoch": 0.3791399432236358, "grad_norm": 0.5113899111747742, "learning_rate": 3.105071320926646e-05, "loss": 2.2526, "step": 108180 }, { "epoch": 0.37935022605404267, "grad_norm": 0.5608682036399841, "learning_rate": 3.104019906774612e-05, "loss": 2.2474, "step": 108240 }, { "epoch": 0.3795605088844496, "grad_norm": 0.5401106476783752, "learning_rate": 3.102968492622578e-05, "loss": 2.2552, "step": 108300 }, { "epoch": 0.3797707917148565, "grad_norm": 0.5243397951126099, "learning_rate": 3.101917078470543e-05, "loss": 2.2306, "step": 108360 }, { "epoch": 0.37998107454526336, "grad_norm": 0.5430238246917725, "learning_rate": 3.100865664318508e-05, "loss": 2.2566, "step": 108420 }, { "epoch": 0.3801913573756703, "grad_norm": 0.5654410719871521, "learning_rate": 3.099814250166474e-05, "loss": 2.2548, "step": 108480 }, { "epoch": 0.3804016402060772, "grad_norm": 0.6027359366416931, "learning_rate": 3.0987628360144394e-05, "loss": 2.2466, "step": 108540 }, { "epoch": 0.38061192303648406, "grad_norm": 0.547913670539856, "learning_rate": 3.0977114218624046e-05, "loss": 2.2357, "step": 108600 }, { "epoch": 0.380822205866891, "grad_norm": 0.5652959942817688, "learning_rate": 3.096660007710371e-05, "loss": 2.2411, "step": 108660 }, { "epoch": 0.3810324886972979, "grad_norm": 0.5216713547706604, "learning_rate": 3.095608593558336e-05, "loss": 2.2482, "step": 108720 }, { "epoch": 0.38124277152770475, "grad_norm": 0.5479360222816467, "learning_rate": 3.0945571794063015e-05, "loss": 2.2338, "step": 108780 }, { "epoch": 0.38145305435811167, "grad_norm": 0.5628937482833862, "learning_rate": 3.0935057652542673e-05, "loss": 2.2531, "step": 108840 }, { "epoch": 0.3816633371885186, "grad_norm": 0.5069226622581482, "learning_rate": 3.0924543511022325e-05, "loss": 2.2533, "step": 108900 }, { "epoch": 0.38187362001892544, "grad_norm": 0.5676142573356628, "learning_rate": 3.091402936950198e-05, "loss": 2.2329, "step": 108960 }, { "epoch": 0.38208390284933236, "grad_norm": 0.50908362865448, "learning_rate": 3.0903515227981636e-05, "loss": 2.2376, "step": 109020 }, { "epoch": 0.3822941856797393, "grad_norm": 0.5596504211425781, "learning_rate": 3.0893001086461295e-05, "loss": 2.2539, "step": 109080 }, { "epoch": 0.38250446851014613, "grad_norm": 0.5801405906677246, "learning_rate": 3.0882486944940946e-05, "loss": 2.2478, "step": 109140 }, { "epoch": 0.38271475134055305, "grad_norm": 0.5595638751983643, "learning_rate": 3.08719728034206e-05, "loss": 2.247, "step": 109200 }, { "epoch": 0.38292503417095997, "grad_norm": 0.5531991124153137, "learning_rate": 3.086163389759226e-05, "loss": 2.2325, "step": 109260 }, { "epoch": 0.3831353170013668, "grad_norm": 0.5818877816200256, "learning_rate": 3.0851119756071914e-05, "loss": 2.2396, "step": 109320 }, { "epoch": 0.38334559983177374, "grad_norm": 0.550900399684906, "learning_rate": 3.084060561455157e-05, "loss": 2.2317, "step": 109380 }, { "epoch": 0.38355588266218066, "grad_norm": 0.5770589709281921, "learning_rate": 3.083009147303123e-05, "loss": 2.2427, "step": 109440 }, { "epoch": 0.3837661654925875, "grad_norm": 0.5315117239952087, "learning_rate": 3.081957733151088e-05, "loss": 2.2436, "step": 109500 }, { "epoch": 0.38397644832299443, "grad_norm": 0.5745555758476257, "learning_rate": 3.080906318999054e-05, "loss": 2.2297, "step": 109560 }, { "epoch": 0.38418673115340135, "grad_norm": 0.5811091661453247, "learning_rate": 3.0798549048470194e-05, "loss": 2.2418, "step": 109620 }, { "epoch": 0.3843970139838082, "grad_norm": 0.6055805683135986, "learning_rate": 3.0788034906949846e-05, "loss": 2.2344, "step": 109680 }, { "epoch": 0.3846072968142151, "grad_norm": 0.5680029392242432, "learning_rate": 3.0777520765429504e-05, "loss": 2.2571, "step": 109740 }, { "epoch": 0.38481757964462204, "grad_norm": 0.5134311318397522, "learning_rate": 3.076700662390916e-05, "loss": 2.2575, "step": 109800 }, { "epoch": 0.3850278624750289, "grad_norm": 0.5660803914070129, "learning_rate": 3.0756492482388815e-05, "loss": 2.2517, "step": 109860 }, { "epoch": 0.3852381453054358, "grad_norm": 0.5265384912490845, "learning_rate": 3.074597834086847e-05, "loss": 2.2545, "step": 109920 }, { "epoch": 0.38544842813584274, "grad_norm": 0.4810716211795807, "learning_rate": 3.0735464199348126e-05, "loss": 2.2417, "step": 109980 }, { "epoch": 0.3856587109662496, "grad_norm": 0.516758143901825, "learning_rate": 3.072495005782778e-05, "loss": 2.2409, "step": 110040 }, { "epoch": 0.3858689937966565, "grad_norm": 0.5345315337181091, "learning_rate": 3.071443591630743e-05, "loss": 2.2502, "step": 110100 }, { "epoch": 0.38607927662706343, "grad_norm": 0.5337819457054138, "learning_rate": 3.0703921774787095e-05, "loss": 2.2227, "step": 110160 }, { "epoch": 0.3862895594574703, "grad_norm": 0.5085687637329102, "learning_rate": 3.069340763326675e-05, "loss": 2.247, "step": 110220 }, { "epoch": 0.3864998422878772, "grad_norm": 0.5997688174247742, "learning_rate": 3.06828934917464e-05, "loss": 2.2545, "step": 110280 }, { "epoch": 0.38671012511828406, "grad_norm": 0.6165827512741089, "learning_rate": 3.067237935022606e-05, "loss": 2.2427, "step": 110340 }, { "epoch": 0.386920407948691, "grad_norm": 0.6079306602478027, "learning_rate": 3.066186520870571e-05, "loss": 2.2245, "step": 110400 }, { "epoch": 0.3871306907790979, "grad_norm": 0.5653731226921082, "learning_rate": 3.065135106718536e-05, "loss": 2.2416, "step": 110460 }, { "epoch": 0.38734097360950476, "grad_norm": 0.5574362277984619, "learning_rate": 3.064083692566502e-05, "loss": 2.2417, "step": 110520 }, { "epoch": 0.3875512564399117, "grad_norm": 0.5841783285140991, "learning_rate": 3.063032278414468e-05, "loss": 2.2382, "step": 110580 }, { "epoch": 0.3877615392703186, "grad_norm": 0.5657100677490234, "learning_rate": 3.061980864262433e-05, "loss": 2.2302, "step": 110640 }, { "epoch": 0.38797182210072545, "grad_norm": 0.5238633155822754, "learning_rate": 3.060929450110398e-05, "loss": 2.2378, "step": 110700 }, { "epoch": 0.38818210493113237, "grad_norm": 0.5261026620864868, "learning_rate": 3.059878035958364e-05, "loss": 2.239, "step": 110760 }, { "epoch": 0.3883923877615393, "grad_norm": 0.5436447858810425, "learning_rate": 3.058826621806329e-05, "loss": 2.2478, "step": 110820 }, { "epoch": 0.38860267059194614, "grad_norm": 0.5481789112091064, "learning_rate": 3.057775207654295e-05, "loss": 2.2473, "step": 110880 }, { "epoch": 0.38881295342235306, "grad_norm": 0.6277444958686829, "learning_rate": 3.056723793502261e-05, "loss": 2.2423, "step": 110940 }, { "epoch": 0.38902323625276, "grad_norm": 0.5612151622772217, "learning_rate": 3.055672379350226e-05, "loss": 2.2426, "step": 111000 }, { "epoch": 0.38923351908316683, "grad_norm": 0.563457727432251, "learning_rate": 3.0546209651981914e-05, "loss": 2.2374, "step": 111060 }, { "epoch": 0.38944380191357375, "grad_norm": 0.5553929805755615, "learning_rate": 3.053569551046157e-05, "loss": 2.2368, "step": 111120 }, { "epoch": 0.38965408474398067, "grad_norm": 0.6097853183746338, "learning_rate": 3.0525181368941224e-05, "loss": 2.2409, "step": 111180 }, { "epoch": 0.3898643675743875, "grad_norm": 0.5294462442398071, "learning_rate": 3.0514667227420883e-05, "loss": 2.2392, "step": 111240 }, { "epoch": 0.39007465040479444, "grad_norm": 0.5460028648376465, "learning_rate": 3.0504328321592547e-05, "loss": 2.2325, "step": 111300 }, { "epoch": 0.39028493323520136, "grad_norm": 0.5618406534194946, "learning_rate": 3.04938141800722e-05, "loss": 2.2472, "step": 111360 }, { "epoch": 0.3904952160656082, "grad_norm": 0.5654280185699463, "learning_rate": 3.0483300038551854e-05, "loss": 2.242, "step": 111420 }, { "epoch": 0.39070549889601514, "grad_norm": 0.5807438492774963, "learning_rate": 3.047278589703151e-05, "loss": 2.2386, "step": 111480 }, { "epoch": 0.39091578172642205, "grad_norm": 0.6127345561981201, "learning_rate": 3.046227175551116e-05, "loss": 2.2501, "step": 111540 }, { "epoch": 0.3911260645568289, "grad_norm": 0.5517969727516174, "learning_rate": 3.0451757613990817e-05, "loss": 2.2367, "step": 111600 }, { "epoch": 0.39133634738723583, "grad_norm": 0.5994294881820679, "learning_rate": 3.0441243472470475e-05, "loss": 2.2497, "step": 111660 }, { "epoch": 0.39154663021764274, "grad_norm": 0.5764245986938477, "learning_rate": 3.043072933095013e-05, "loss": 2.2423, "step": 111720 }, { "epoch": 0.3917569130480496, "grad_norm": 0.5494112968444824, "learning_rate": 3.0420215189429786e-05, "loss": 2.2354, "step": 111780 }, { "epoch": 0.3919671958784565, "grad_norm": 0.561445415019989, "learning_rate": 3.0409701047909438e-05, "loss": 2.24, "step": 111840 }, { "epoch": 0.39217747870886344, "grad_norm": 0.5689985156059265, "learning_rate": 3.0399186906389093e-05, "loss": 2.2409, "step": 111900 }, { "epoch": 0.3923877615392703, "grad_norm": 0.5241578817367554, "learning_rate": 3.038867276486875e-05, "loss": 2.2314, "step": 111960 }, { "epoch": 0.3925980443696772, "grad_norm": 0.5331136584281921, "learning_rate": 3.0378158623348407e-05, "loss": 2.2364, "step": 112020 }, { "epoch": 0.39280832720008413, "grad_norm": 0.5528892278671265, "learning_rate": 3.0367644481828062e-05, "loss": 2.2523, "step": 112080 }, { "epoch": 0.393018610030491, "grad_norm": 0.5552469491958618, "learning_rate": 3.0357130340307714e-05, "loss": 2.2384, "step": 112140 }, { "epoch": 0.3932288928608979, "grad_norm": 0.5582413673400879, "learning_rate": 3.034661619878737e-05, "loss": 2.2439, "step": 112200 }, { "epoch": 0.3934391756913048, "grad_norm": 0.4989684820175171, "learning_rate": 3.0336102057267025e-05, "loss": 2.2515, "step": 112260 }, { "epoch": 0.3936494585217117, "grad_norm": 0.574006974697113, "learning_rate": 3.0325587915746677e-05, "loss": 2.239, "step": 112320 }, { "epoch": 0.3938597413521186, "grad_norm": 0.5915408730506897, "learning_rate": 3.031507377422634e-05, "loss": 2.2411, "step": 112380 }, { "epoch": 0.3940700241825255, "grad_norm": 0.5183863043785095, "learning_rate": 3.030455963270599e-05, "loss": 2.2469, "step": 112440 }, { "epoch": 0.3942803070129324, "grad_norm": 0.5208684802055359, "learning_rate": 3.0294045491185646e-05, "loss": 2.2343, "step": 112500 }, { "epoch": 0.3944905898433393, "grad_norm": 0.6256120800971985, "learning_rate": 3.02835313496653e-05, "loss": 2.2467, "step": 112560 }, { "epoch": 0.3947008726737462, "grad_norm": 0.5246924161911011, "learning_rate": 3.0273017208144956e-05, "loss": 2.2413, "step": 112620 }, { "epoch": 0.39491115550415307, "grad_norm": 0.5094478130340576, "learning_rate": 3.0262503066624608e-05, "loss": 2.2345, "step": 112680 }, { "epoch": 0.39512143833456, "grad_norm": 0.5629433393478394, "learning_rate": 3.025198892510427e-05, "loss": 2.2398, "step": 112740 }, { "epoch": 0.3953317211649669, "grad_norm": 0.541553795337677, "learning_rate": 3.0241474783583922e-05, "loss": 2.2266, "step": 112800 }, { "epoch": 0.39554200399537376, "grad_norm": 0.5277448892593384, "learning_rate": 3.0230960642063578e-05, "loss": 2.2339, "step": 112860 }, { "epoch": 0.3957522868257807, "grad_norm": 0.6435659527778625, "learning_rate": 3.0220446500543233e-05, "loss": 2.2537, "step": 112920 }, { "epoch": 0.3959625696561876, "grad_norm": 0.5504876375198364, "learning_rate": 3.0209932359022885e-05, "loss": 2.2507, "step": 112980 }, { "epoch": 0.39617285248659445, "grad_norm": 0.5389472842216492, "learning_rate": 3.019941821750254e-05, "loss": 2.233, "step": 113040 }, { "epoch": 0.39638313531700137, "grad_norm": 0.5711050629615784, "learning_rate": 3.01889040759822e-05, "loss": 2.2264, "step": 113100 }, { "epoch": 0.3965934181474083, "grad_norm": 0.5223866105079651, "learning_rate": 3.0178389934461854e-05, "loss": 2.2274, "step": 113160 }, { "epoch": 0.39680370097781514, "grad_norm": 0.5716981291770935, "learning_rate": 3.016787579294151e-05, "loss": 2.2541, "step": 113220 }, { "epoch": 0.39701398380822206, "grad_norm": 0.6482357978820801, "learning_rate": 3.015736165142116e-05, "loss": 2.2481, "step": 113280 }, { "epoch": 0.397224266638629, "grad_norm": 0.5465265512466431, "learning_rate": 3.0146847509900816e-05, "loss": 2.2432, "step": 113340 }, { "epoch": 0.39743454946903584, "grad_norm": 0.5352566838264465, "learning_rate": 3.0136508604072477e-05, "loss": 2.2309, "step": 113400 }, { "epoch": 0.39764483229944275, "grad_norm": 0.5502161979675293, "learning_rate": 3.0125994462552132e-05, "loss": 2.2392, "step": 113460 }, { "epoch": 0.39785511512984967, "grad_norm": 0.536171555519104, "learning_rate": 3.011548032103179e-05, "loss": 2.2289, "step": 113520 }, { "epoch": 0.39806539796025653, "grad_norm": 0.6111114621162415, "learning_rate": 3.0104966179511446e-05, "loss": 2.2382, "step": 113580 }, { "epoch": 0.39827568079066344, "grad_norm": 0.610998272895813, "learning_rate": 3.00944520379911e-05, "loss": 2.2357, "step": 113640 }, { "epoch": 0.39848596362107036, "grad_norm": 0.5478922128677368, "learning_rate": 3.0083937896470753e-05, "loss": 2.2417, "step": 113700 }, { "epoch": 0.3986962464514772, "grad_norm": 0.515874445438385, "learning_rate": 3.007342375495041e-05, "loss": 2.2375, "step": 113760 }, { "epoch": 0.39890652928188414, "grad_norm": 0.5432803630828857, "learning_rate": 3.0062909613430064e-05, "loss": 2.2242, "step": 113820 }, { "epoch": 0.39911681211229105, "grad_norm": 0.6252090334892273, "learning_rate": 3.0052395471909723e-05, "loss": 2.2431, "step": 113880 }, { "epoch": 0.3993270949426979, "grad_norm": 0.5408844351768494, "learning_rate": 3.0041881330389378e-05, "loss": 2.2413, "step": 113940 }, { "epoch": 0.39953737777310483, "grad_norm": 0.5749211311340332, "learning_rate": 3.003136718886903e-05, "loss": 2.223, "step": 114000 }, { "epoch": 0.39974766060351175, "grad_norm": 0.568860650062561, "learning_rate": 3.0020853047348685e-05, "loss": 2.255, "step": 114060 }, { "epoch": 0.3999579434339186, "grad_norm": 0.5700159072875977, "learning_rate": 3.001033890582834e-05, "loss": 2.2504, "step": 114120 }, { "epoch": 0.4001682262643255, "grad_norm": 0.5364213585853577, "learning_rate": 2.9999824764307992e-05, "loss": 2.2327, "step": 114180 }, { "epoch": 0.40037850909473244, "grad_norm": 0.6105502247810364, "learning_rate": 2.9989310622787654e-05, "loss": 2.2264, "step": 114240 }, { "epoch": 0.4005887919251393, "grad_norm": 0.5763128399848938, "learning_rate": 2.9978796481267306e-05, "loss": 2.2373, "step": 114300 }, { "epoch": 0.4007990747555462, "grad_norm": 0.5186144113540649, "learning_rate": 2.996828233974696e-05, "loss": 2.2397, "step": 114360 }, { "epoch": 0.40100935758595313, "grad_norm": 0.5978050827980042, "learning_rate": 2.9957768198226617e-05, "loss": 2.2451, "step": 114420 }, { "epoch": 0.40121964041636, "grad_norm": 0.533627450466156, "learning_rate": 2.994725405670627e-05, "loss": 2.2421, "step": 114480 }, { "epoch": 0.4014299232467669, "grad_norm": 0.5692892670631409, "learning_rate": 2.9936739915185924e-05, "loss": 2.2384, "step": 114540 }, { "epoch": 0.4016402060771738, "grad_norm": 0.5549047589302063, "learning_rate": 2.9926225773665582e-05, "loss": 2.2379, "step": 114600 }, { "epoch": 0.4018504889075807, "grad_norm": 0.6541472673416138, "learning_rate": 2.9915711632145238e-05, "loss": 2.2458, "step": 114660 }, { "epoch": 0.4020607717379876, "grad_norm": 0.5438767671585083, "learning_rate": 2.9905197490624893e-05, "loss": 2.2562, "step": 114720 }, { "epoch": 0.4022710545683945, "grad_norm": 0.5015159249305725, "learning_rate": 2.9894683349104545e-05, "loss": 2.2245, "step": 114780 }, { "epoch": 0.4024813373988014, "grad_norm": 0.5856261849403381, "learning_rate": 2.98841692075842e-05, "loss": 2.241, "step": 114840 }, { "epoch": 0.4026916202292083, "grad_norm": 0.5348838567733765, "learning_rate": 2.9873655066063855e-05, "loss": 2.2259, "step": 114900 }, { "epoch": 0.4029019030596152, "grad_norm": 0.5396873950958252, "learning_rate": 2.9863140924543514e-05, "loss": 2.2413, "step": 114960 }, { "epoch": 0.40311218589002207, "grad_norm": 0.6850928068161011, "learning_rate": 2.985262678302317e-05, "loss": 2.2454, "step": 115020 }, { "epoch": 0.403322468720429, "grad_norm": 0.5155946016311646, "learning_rate": 2.984211264150282e-05, "loss": 2.2516, "step": 115080 }, { "epoch": 0.4035327515508359, "grad_norm": 0.5300692915916443, "learning_rate": 2.9831598499982477e-05, "loss": 2.242, "step": 115140 }, { "epoch": 0.40374303438124276, "grad_norm": 0.5764443874359131, "learning_rate": 2.9821084358462132e-05, "loss": 2.2258, "step": 115200 }, { "epoch": 0.4039533172116497, "grad_norm": 0.5413774251937866, "learning_rate": 2.9810570216941787e-05, "loss": 2.2469, "step": 115260 }, { "epoch": 0.4041636000420566, "grad_norm": 0.5148273706436157, "learning_rate": 2.9800056075421446e-05, "loss": 2.2292, "step": 115320 }, { "epoch": 0.40437388287246345, "grad_norm": 0.5263270735740662, "learning_rate": 2.97895419339011e-05, "loss": 2.2364, "step": 115380 }, { "epoch": 0.40458416570287037, "grad_norm": 0.5516897439956665, "learning_rate": 2.977920302807276e-05, "loss": 2.2345, "step": 115440 }, { "epoch": 0.4047944485332773, "grad_norm": 0.5485020279884338, "learning_rate": 2.9768688886552414e-05, "loss": 2.2291, "step": 115500 }, { "epoch": 0.40500473136368415, "grad_norm": 0.5054183602333069, "learning_rate": 2.975817474503207e-05, "loss": 2.2505, "step": 115560 }, { "epoch": 0.40521501419409106, "grad_norm": 0.5678335428237915, "learning_rate": 2.9747660603511724e-05, "loss": 2.2476, "step": 115620 }, { "epoch": 0.405425297024498, "grad_norm": 0.5526294112205505, "learning_rate": 2.9737146461991376e-05, "loss": 2.244, "step": 115680 }, { "epoch": 0.40563557985490484, "grad_norm": 0.5054203271865845, "learning_rate": 2.9726632320471038e-05, "loss": 2.2392, "step": 115740 }, { "epoch": 0.40584586268531175, "grad_norm": 0.5515086054801941, "learning_rate": 2.971611817895069e-05, "loss": 2.2366, "step": 115800 }, { "epoch": 0.4060561455157186, "grad_norm": 0.5307000875473022, "learning_rate": 2.9705604037430345e-05, "loss": 2.2323, "step": 115860 }, { "epoch": 0.40626642834612553, "grad_norm": 0.5576381087303162, "learning_rate": 2.969508989591e-05, "loss": 2.2366, "step": 115920 }, { "epoch": 0.40647671117653245, "grad_norm": 0.5790101885795593, "learning_rate": 2.9684575754389656e-05, "loss": 2.2342, "step": 115980 }, { "epoch": 0.4066869940069393, "grad_norm": 0.5370448231697083, "learning_rate": 2.9674061612869308e-05, "loss": 2.2266, "step": 116040 }, { "epoch": 0.4068972768373462, "grad_norm": 0.5727744102478027, "learning_rate": 2.966354747134897e-05, "loss": 2.2409, "step": 116100 }, { "epoch": 0.40710755966775314, "grad_norm": 0.5130985379219055, "learning_rate": 2.965303332982862e-05, "loss": 2.2479, "step": 116160 }, { "epoch": 0.40731784249816, "grad_norm": 0.5241026878356934, "learning_rate": 2.9642519188308277e-05, "loss": 2.2545, "step": 116220 }, { "epoch": 0.4075281253285669, "grad_norm": 0.49381840229034424, "learning_rate": 2.9632005046787932e-05, "loss": 2.2361, "step": 116280 }, { "epoch": 0.40773840815897383, "grad_norm": 0.5293487310409546, "learning_rate": 2.9621490905267584e-05, "loss": 2.2476, "step": 116340 }, { "epoch": 0.4079486909893807, "grad_norm": 0.5284563302993774, "learning_rate": 2.961097676374724e-05, "loss": 2.2249, "step": 116400 }, { "epoch": 0.4081589738197876, "grad_norm": 0.545966625213623, "learning_rate": 2.9600462622226898e-05, "loss": 2.2227, "step": 116460 }, { "epoch": 0.4083692566501945, "grad_norm": 0.574855625629425, "learning_rate": 2.9589948480706553e-05, "loss": 2.2341, "step": 116520 }, { "epoch": 0.4085795394806014, "grad_norm": 0.5358082056045532, "learning_rate": 2.957943433918621e-05, "loss": 2.2397, "step": 116580 }, { "epoch": 0.4087898223110083, "grad_norm": 0.5895236134529114, "learning_rate": 2.956892019766586e-05, "loss": 2.2342, "step": 116640 }, { "epoch": 0.4090001051414152, "grad_norm": 0.5704084634780884, "learning_rate": 2.9558406056145516e-05, "loss": 2.2417, "step": 116700 }, { "epoch": 0.4092103879718221, "grad_norm": 0.58211350440979, "learning_rate": 2.954789191462517e-05, "loss": 2.2371, "step": 116760 }, { "epoch": 0.409420670802229, "grad_norm": 0.532575786113739, "learning_rate": 2.953737777310483e-05, "loss": 2.239, "step": 116820 }, { "epoch": 0.4096309536326359, "grad_norm": 0.5325069427490234, "learning_rate": 2.9526863631584485e-05, "loss": 2.2369, "step": 116880 }, { "epoch": 0.40984123646304277, "grad_norm": 0.5889403223991394, "learning_rate": 2.9516349490064137e-05, "loss": 2.2481, "step": 116940 }, { "epoch": 0.4100515192934497, "grad_norm": 0.6322012543678284, "learning_rate": 2.9505835348543792e-05, "loss": 2.2413, "step": 117000 }, { "epoch": 0.4102618021238566, "grad_norm": 0.5821205377578735, "learning_rate": 2.9495321207023447e-05, "loss": 2.2249, "step": 117060 }, { "epoch": 0.41047208495426346, "grad_norm": 0.6500152349472046, "learning_rate": 2.94848070655031e-05, "loss": 2.2425, "step": 117120 }, { "epoch": 0.4106823677846704, "grad_norm": 0.5491619110107422, "learning_rate": 2.947429292398276e-05, "loss": 2.2478, "step": 117180 }, { "epoch": 0.4108926506150773, "grad_norm": 0.5800027251243591, "learning_rate": 2.9463778782462413e-05, "loss": 2.2458, "step": 117240 }, { "epoch": 0.41110293344548415, "grad_norm": 0.5748924612998962, "learning_rate": 2.945326464094207e-05, "loss": 2.2259, "step": 117300 }, { "epoch": 0.41131321627589107, "grad_norm": 0.5880306363105774, "learning_rate": 2.9442750499421724e-05, "loss": 2.2461, "step": 117360 }, { "epoch": 0.411523499106298, "grad_norm": 0.5313839912414551, "learning_rate": 2.9432236357901376e-05, "loss": 2.2411, "step": 117420 }, { "epoch": 0.41173378193670485, "grad_norm": 0.5506525635719299, "learning_rate": 2.942172221638103e-05, "loss": 2.2414, "step": 117480 }, { "epoch": 0.41194406476711176, "grad_norm": 0.5372014045715332, "learning_rate": 2.941138331055269e-05, "loss": 2.2413, "step": 117540 }, { "epoch": 0.4121543475975187, "grad_norm": 0.5816333889961243, "learning_rate": 2.9400869169032354e-05, "loss": 2.2294, "step": 117600 }, { "epoch": 0.41236463042792554, "grad_norm": 0.5727115273475647, "learning_rate": 2.9390355027512005e-05, "loss": 2.221, "step": 117660 }, { "epoch": 0.41257491325833245, "grad_norm": 0.5720790028572083, "learning_rate": 2.938001612168367e-05, "loss": 2.242, "step": 117720 }, { "epoch": 0.41278519608873937, "grad_norm": 0.5449382066726685, "learning_rate": 2.936950198016332e-05, "loss": 2.2247, "step": 117780 }, { "epoch": 0.41299547891914623, "grad_norm": 0.5297885537147522, "learning_rate": 2.9358987838642977e-05, "loss": 2.233, "step": 117840 }, { "epoch": 0.41320576174955315, "grad_norm": 0.5624762177467346, "learning_rate": 2.9348473697122632e-05, "loss": 2.2335, "step": 117900 }, { "epoch": 0.41341604457996006, "grad_norm": 0.5435742735862732, "learning_rate": 2.9337959555602284e-05, "loss": 2.2378, "step": 117960 }, { "epoch": 0.4136263274103669, "grad_norm": 0.5343254208564758, "learning_rate": 2.9327445414081946e-05, "loss": 2.2461, "step": 118020 }, { "epoch": 0.41383661024077384, "grad_norm": 0.5433058738708496, "learning_rate": 2.9316931272561598e-05, "loss": 2.2361, "step": 118080 }, { "epoch": 0.41404689307118075, "grad_norm": 0.5210029482841492, "learning_rate": 2.9306417131041253e-05, "loss": 2.2497, "step": 118140 }, { "epoch": 0.4142571759015876, "grad_norm": 0.5953100919723511, "learning_rate": 2.9295902989520908e-05, "loss": 2.2403, "step": 118200 }, { "epoch": 0.41446745873199453, "grad_norm": 0.523826003074646, "learning_rate": 2.928538884800056e-05, "loss": 2.2405, "step": 118260 }, { "epoch": 0.41467774156240145, "grad_norm": 0.5504089593887329, "learning_rate": 2.9274874706480215e-05, "loss": 2.2415, "step": 118320 }, { "epoch": 0.4148880243928083, "grad_norm": 0.5553892850875854, "learning_rate": 2.9264360564959874e-05, "loss": 2.2383, "step": 118380 }, { "epoch": 0.4150983072232152, "grad_norm": 0.5315834879875183, "learning_rate": 2.925384642343953e-05, "loss": 2.2172, "step": 118440 }, { "epoch": 0.41530859005362214, "grad_norm": 0.5641074776649475, "learning_rate": 2.9243332281919185e-05, "loss": 2.2516, "step": 118500 }, { "epoch": 0.415518872884029, "grad_norm": 0.513152539730072, "learning_rate": 2.9232818140398837e-05, "loss": 2.2416, "step": 118560 }, { "epoch": 0.4157291557144359, "grad_norm": 0.5532053112983704, "learning_rate": 2.9222303998878492e-05, "loss": 2.2424, "step": 118620 }, { "epoch": 0.41593943854484283, "grad_norm": 0.529827892780304, "learning_rate": 2.9211789857358147e-05, "loss": 2.2246, "step": 118680 }, { "epoch": 0.4161497213752497, "grad_norm": 0.5687451958656311, "learning_rate": 2.9201275715837806e-05, "loss": 2.2251, "step": 118740 }, { "epoch": 0.4163600042056566, "grad_norm": 0.5417942404747009, "learning_rate": 2.919076157431746e-05, "loss": 2.22, "step": 118800 }, { "epoch": 0.4165702870360635, "grad_norm": 0.5505846738815308, "learning_rate": 2.9180247432797113e-05, "loss": 2.2247, "step": 118860 }, { "epoch": 0.4167805698664704, "grad_norm": 0.5387886762619019, "learning_rate": 2.9169733291276768e-05, "loss": 2.2375, "step": 118920 }, { "epoch": 0.4169908526968773, "grad_norm": 0.5633653402328491, "learning_rate": 2.9159219149756423e-05, "loss": 2.2265, "step": 118980 }, { "epoch": 0.4172011355272842, "grad_norm": 0.581901490688324, "learning_rate": 2.9148705008236075e-05, "loss": 2.2427, "step": 119040 }, { "epoch": 0.4174114183576911, "grad_norm": 0.5256428122520447, "learning_rate": 2.9138190866715737e-05, "loss": 2.2371, "step": 119100 }, { "epoch": 0.417621701188098, "grad_norm": 0.516146719455719, "learning_rate": 2.912767672519539e-05, "loss": 2.2485, "step": 119160 }, { "epoch": 0.4178319840185049, "grad_norm": 0.5308356285095215, "learning_rate": 2.9117162583675045e-05, "loss": 2.2391, "step": 119220 }, { "epoch": 0.41804226684891177, "grad_norm": 0.5721306800842285, "learning_rate": 2.91066484421547e-05, "loss": 2.2283, "step": 119280 }, { "epoch": 0.4182525496793187, "grad_norm": 0.5151183605194092, "learning_rate": 2.9096134300634352e-05, "loss": 2.2278, "step": 119340 }, { "epoch": 0.4184628325097256, "grad_norm": 0.539119303226471, "learning_rate": 2.9085620159114007e-05, "loss": 2.2244, "step": 119400 }, { "epoch": 0.41867311534013246, "grad_norm": 0.5463353395462036, "learning_rate": 2.9075106017593666e-05, "loss": 2.2349, "step": 119460 }, { "epoch": 0.4188833981705394, "grad_norm": 0.6453050971031189, "learning_rate": 2.906459187607332e-05, "loss": 2.2309, "step": 119520 }, { "epoch": 0.4190936810009463, "grad_norm": 0.5602206587791443, "learning_rate": 2.9054077734552976e-05, "loss": 2.2323, "step": 119580 }, { "epoch": 0.41930396383135315, "grad_norm": 0.5998344421386719, "learning_rate": 2.904356359303263e-05, "loss": 2.2313, "step": 119640 }, { "epoch": 0.41951424666176007, "grad_norm": 0.5444914698600769, "learning_rate": 2.9033049451512283e-05, "loss": 2.2332, "step": 119700 }, { "epoch": 0.419724529492167, "grad_norm": 0.5113641023635864, "learning_rate": 2.902253530999194e-05, "loss": 2.2325, "step": 119760 }, { "epoch": 0.41993481232257385, "grad_norm": 0.5486562848091125, "learning_rate": 2.9012021168471597e-05, "loss": 2.2463, "step": 119820 }, { "epoch": 0.42014509515298076, "grad_norm": 0.5450092554092407, "learning_rate": 2.9001507026951253e-05, "loss": 2.2495, "step": 119880 }, { "epoch": 0.4203553779833877, "grad_norm": 0.6255912184715271, "learning_rate": 2.8990992885430908e-05, "loss": 2.2348, "step": 119940 }, { "epoch": 0.42056566081379454, "grad_norm": 0.5884232521057129, "learning_rate": 2.898047874391056e-05, "loss": 2.2301, "step": 120000 }, { "epoch": 0.42077594364420146, "grad_norm": 0.5232421159744263, "learning_rate": 2.8969964602390215e-05, "loss": 2.2267, "step": 120060 }, { "epoch": 0.42098622647460837, "grad_norm": 0.5445395112037659, "learning_rate": 2.895945046086987e-05, "loss": 2.2177, "step": 120120 }, { "epoch": 0.42119650930501523, "grad_norm": 0.5900552868843079, "learning_rate": 2.894893631934953e-05, "loss": 2.2466, "step": 120180 }, { "epoch": 0.42140679213542215, "grad_norm": 0.547518253326416, "learning_rate": 2.8938422177829184e-05, "loss": 2.2245, "step": 120240 }, { "epoch": 0.42161707496582906, "grad_norm": 0.6022685766220093, "learning_rate": 2.8927908036308836e-05, "loss": 2.2354, "step": 120300 }, { "epoch": 0.4218273577962359, "grad_norm": 0.5935965776443481, "learning_rate": 2.891739389478849e-05, "loss": 2.2352, "step": 120360 }, { "epoch": 0.42203764062664284, "grad_norm": 0.5695683360099792, "learning_rate": 2.8906879753268147e-05, "loss": 2.2308, "step": 120420 }, { "epoch": 0.42224792345704976, "grad_norm": 0.5888732075691223, "learning_rate": 2.88963656117478e-05, "loss": 2.2399, "step": 120480 }, { "epoch": 0.4224582062874566, "grad_norm": 0.5700323581695557, "learning_rate": 2.888585147022746e-05, "loss": 2.2396, "step": 120540 }, { "epoch": 0.42266848911786353, "grad_norm": 0.5631459951400757, "learning_rate": 2.8875337328707113e-05, "loss": 2.232, "step": 120600 }, { "epoch": 0.42287877194827045, "grad_norm": 0.5992614030838013, "learning_rate": 2.8864823187186768e-05, "loss": 2.2455, "step": 120660 }, { "epoch": 0.4230890547786773, "grad_norm": 0.531584620475769, "learning_rate": 2.8854309045666423e-05, "loss": 2.2286, "step": 120720 }, { "epoch": 0.4232993376090842, "grad_norm": 0.5879722237586975, "learning_rate": 2.8843794904146075e-05, "loss": 2.229, "step": 120780 }, { "epoch": 0.42350962043949114, "grad_norm": 0.5823795795440674, "learning_rate": 2.883328076262573e-05, "loss": 2.2367, "step": 120840 }, { "epoch": 0.423719903269898, "grad_norm": 0.5151508450508118, "learning_rate": 2.882276662110539e-05, "loss": 2.2285, "step": 120900 }, { "epoch": 0.4239301861003049, "grad_norm": 0.5895393490791321, "learning_rate": 2.8812252479585044e-05, "loss": 2.2495, "step": 120960 }, { "epoch": 0.42414046893071183, "grad_norm": 0.5240002274513245, "learning_rate": 2.88017383380647e-05, "loss": 2.2464, "step": 121020 }, { "epoch": 0.4243507517611187, "grad_norm": 0.560350239276886, "learning_rate": 2.879122419654435e-05, "loss": 2.2292, "step": 121080 }, { "epoch": 0.4245610345915256, "grad_norm": 0.5635443329811096, "learning_rate": 2.8780710055024007e-05, "loss": 2.2249, "step": 121140 }, { "epoch": 0.4247713174219325, "grad_norm": 0.525646448135376, "learning_rate": 2.8770195913503662e-05, "loss": 2.2328, "step": 121200 }, { "epoch": 0.4249816002523394, "grad_norm": 0.5428566336631775, "learning_rate": 2.875968177198332e-05, "loss": 2.2304, "step": 121260 }, { "epoch": 0.4251918830827463, "grad_norm": 0.5613175630569458, "learning_rate": 2.8749167630462976e-05, "loss": 2.2184, "step": 121320 }, { "epoch": 0.4254021659131532, "grad_norm": 0.544946551322937, "learning_rate": 2.8738653488942628e-05, "loss": 2.2286, "step": 121380 }, { "epoch": 0.4256124487435601, "grad_norm": 0.6552244424819946, "learning_rate": 2.8728139347422283e-05, "loss": 2.2402, "step": 121440 }, { "epoch": 0.425822731573967, "grad_norm": 0.5511813163757324, "learning_rate": 2.871762520590194e-05, "loss": 2.226, "step": 121500 }, { "epoch": 0.42603301440437386, "grad_norm": 0.5645471811294556, "learning_rate": 2.8707111064381594e-05, "loss": 2.2363, "step": 121560 }, { "epoch": 0.42624329723478077, "grad_norm": 0.5755658149719238, "learning_rate": 2.8696596922861252e-05, "loss": 2.2314, "step": 121620 }, { "epoch": 0.4264535800651877, "grad_norm": 0.5792255997657776, "learning_rate": 2.8686082781340908e-05, "loss": 2.2355, "step": 121680 }, { "epoch": 0.42666386289559455, "grad_norm": 0.5449994802474976, "learning_rate": 2.8675743875512568e-05, "loss": 2.2387, "step": 121740 }, { "epoch": 0.42687414572600146, "grad_norm": 0.5818429589271545, "learning_rate": 2.866522973399222e-05, "loss": 2.2386, "step": 121800 }, { "epoch": 0.4270844285564084, "grad_norm": 0.5970384478569031, "learning_rate": 2.8654715592471875e-05, "loss": 2.2309, "step": 121860 }, { "epoch": 0.42729471138681524, "grad_norm": 0.6303051114082336, "learning_rate": 2.864420145095153e-05, "loss": 2.2248, "step": 121920 }, { "epoch": 0.42750499421722216, "grad_norm": 0.5991727113723755, "learning_rate": 2.8633687309431183e-05, "loss": 2.2394, "step": 121980 }, { "epoch": 0.42771527704762907, "grad_norm": 0.5650383830070496, "learning_rate": 2.8623173167910845e-05, "loss": 2.2439, "step": 122040 }, { "epoch": 0.42792555987803593, "grad_norm": 0.547942578792572, "learning_rate": 2.8612659026390496e-05, "loss": 2.225, "step": 122100 }, { "epoch": 0.42813584270844285, "grad_norm": 0.5970585942268372, "learning_rate": 2.8602144884870152e-05, "loss": 2.2238, "step": 122160 }, { "epoch": 0.42834612553884976, "grad_norm": 0.540296733379364, "learning_rate": 2.8591630743349807e-05, "loss": 2.2304, "step": 122220 }, { "epoch": 0.4285564083692566, "grad_norm": 0.5640031695365906, "learning_rate": 2.8581116601829462e-05, "loss": 2.2392, "step": 122280 }, { "epoch": 0.42876669119966354, "grad_norm": 0.5584647059440613, "learning_rate": 2.8570602460309114e-05, "loss": 2.2253, "step": 122340 }, { "epoch": 0.42897697403007046, "grad_norm": 0.5100136995315552, "learning_rate": 2.8560088318788776e-05, "loss": 2.2381, "step": 122400 }, { "epoch": 0.4291872568604773, "grad_norm": 0.5283793807029724, "learning_rate": 2.8549574177268428e-05, "loss": 2.225, "step": 122460 }, { "epoch": 0.42939753969088423, "grad_norm": 0.5295258164405823, "learning_rate": 2.8539060035748083e-05, "loss": 2.23, "step": 122520 }, { "epoch": 0.42960782252129115, "grad_norm": 0.5125938057899475, "learning_rate": 2.852854589422774e-05, "loss": 2.2247, "step": 122580 }, { "epoch": 0.429818105351698, "grad_norm": 0.6334744095802307, "learning_rate": 2.851803175270739e-05, "loss": 2.233, "step": 122640 }, { "epoch": 0.4300283881821049, "grad_norm": 0.5219427943229675, "learning_rate": 2.8507517611187046e-05, "loss": 2.2167, "step": 122700 }, { "epoch": 0.43023867101251184, "grad_norm": 0.5502570867538452, "learning_rate": 2.8497003469666705e-05, "loss": 2.2263, "step": 122760 }, { "epoch": 0.4304489538429187, "grad_norm": 0.5381381511688232, "learning_rate": 2.848648932814636e-05, "loss": 2.2359, "step": 122820 }, { "epoch": 0.4306592366733256, "grad_norm": 0.5731381773948669, "learning_rate": 2.8475975186626015e-05, "loss": 2.2329, "step": 122880 }, { "epoch": 0.43086951950373253, "grad_norm": 0.5249903202056885, "learning_rate": 2.8465461045105667e-05, "loss": 2.2451, "step": 122940 }, { "epoch": 0.4310798023341394, "grad_norm": 0.517389178276062, "learning_rate": 2.8454946903585322e-05, "loss": 2.2341, "step": 123000 }, { "epoch": 0.4312900851645463, "grad_norm": 0.5333336591720581, "learning_rate": 2.8444432762064978e-05, "loss": 2.2407, "step": 123060 }, { "epoch": 0.4315003679949532, "grad_norm": 0.527782678604126, "learning_rate": 2.8433918620544636e-05, "loss": 2.2298, "step": 123120 }, { "epoch": 0.4317106508253601, "grad_norm": 0.5925357341766357, "learning_rate": 2.842340447902429e-05, "loss": 2.2361, "step": 123180 }, { "epoch": 0.431920933655767, "grad_norm": 0.5482273697853088, "learning_rate": 2.8412890337503943e-05, "loss": 2.2371, "step": 123240 }, { "epoch": 0.4321312164861739, "grad_norm": 0.5719531774520874, "learning_rate": 2.84023761959836e-05, "loss": 2.229, "step": 123300 }, { "epoch": 0.4323414993165808, "grad_norm": 0.6021667122840881, "learning_rate": 2.8391862054463254e-05, "loss": 2.2275, "step": 123360 }, { "epoch": 0.4325517821469877, "grad_norm": 0.5164570212364197, "learning_rate": 2.8381347912942906e-05, "loss": 2.2231, "step": 123420 }, { "epoch": 0.4327620649773946, "grad_norm": 0.6197656989097595, "learning_rate": 2.8370833771422568e-05, "loss": 2.2398, "step": 123480 }, { "epoch": 0.43297234780780147, "grad_norm": 0.5924232006072998, "learning_rate": 2.836031962990222e-05, "loss": 2.2379, "step": 123540 }, { "epoch": 0.4331826306382084, "grad_norm": 0.57561856508255, "learning_rate": 2.8349805488381875e-05, "loss": 2.244, "step": 123600 }, { "epoch": 0.4333929134686153, "grad_norm": 0.5562826991081238, "learning_rate": 2.833929134686153e-05, "loss": 2.2281, "step": 123660 }, { "epoch": 0.43360319629902216, "grad_norm": 0.5709443688392639, "learning_rate": 2.8328777205341182e-05, "loss": 2.2216, "step": 123720 }, { "epoch": 0.4338134791294291, "grad_norm": 0.6017040014266968, "learning_rate": 2.8318263063820838e-05, "loss": 2.2352, "step": 123780 }, { "epoch": 0.434023761959836, "grad_norm": 0.5744826793670654, "learning_rate": 2.8307924157992498e-05, "loss": 2.2266, "step": 123840 }, { "epoch": 0.43423404479024286, "grad_norm": 0.6745397448539734, "learning_rate": 2.829741001647216e-05, "loss": 2.2247, "step": 123900 }, { "epoch": 0.4344443276206498, "grad_norm": 0.5829909443855286, "learning_rate": 2.8286895874951812e-05, "loss": 2.2301, "step": 123960 }, { "epoch": 0.4346546104510567, "grad_norm": 0.5703389644622803, "learning_rate": 2.8276381733431467e-05, "loss": 2.2126, "step": 124020 }, { "epoch": 0.43486489328146355, "grad_norm": 0.5177561640739441, "learning_rate": 2.8265867591911123e-05, "loss": 2.2376, "step": 124080 }, { "epoch": 0.43507517611187047, "grad_norm": 0.5599654912948608, "learning_rate": 2.8255353450390774e-05, "loss": 2.2116, "step": 124140 }, { "epoch": 0.4352854589422774, "grad_norm": 0.5665854215621948, "learning_rate": 2.824483930887043e-05, "loss": 2.2437, "step": 124200 }, { "epoch": 0.43549574177268424, "grad_norm": 0.548435628414154, "learning_rate": 2.823432516735009e-05, "loss": 2.2466, "step": 124260 }, { "epoch": 0.43570602460309116, "grad_norm": 0.5328386425971985, "learning_rate": 2.8223811025829744e-05, "loss": 2.2386, "step": 124320 }, { "epoch": 0.4359163074334981, "grad_norm": 0.5652334094047546, "learning_rate": 2.82132968843094e-05, "loss": 2.2331, "step": 124380 }, { "epoch": 0.43612659026390493, "grad_norm": 0.5891857147216797, "learning_rate": 2.820278274278905e-05, "loss": 2.2185, "step": 124440 }, { "epoch": 0.43633687309431185, "grad_norm": 0.5438734889030457, "learning_rate": 2.8192268601268706e-05, "loss": 2.2226, "step": 124500 }, { "epoch": 0.43654715592471877, "grad_norm": 0.5453084111213684, "learning_rate": 2.818175445974836e-05, "loss": 2.2288, "step": 124560 }, { "epoch": 0.4367574387551256, "grad_norm": 0.5740223526954651, "learning_rate": 2.817124031822802e-05, "loss": 2.2327, "step": 124620 }, { "epoch": 0.43696772158553254, "grad_norm": 0.5052223801612854, "learning_rate": 2.8160726176707675e-05, "loss": 2.2366, "step": 124680 }, { "epoch": 0.43717800441593946, "grad_norm": 0.5633878707885742, "learning_rate": 2.8150212035187327e-05, "loss": 2.2296, "step": 124740 }, { "epoch": 0.4373882872463463, "grad_norm": 0.5381173491477966, "learning_rate": 2.8139697893666983e-05, "loss": 2.2313, "step": 124800 }, { "epoch": 0.43759857007675323, "grad_norm": 0.5788121223449707, "learning_rate": 2.8129183752146638e-05, "loss": 2.2267, "step": 124860 }, { "epoch": 0.43780885290716015, "grad_norm": 0.5180872678756714, "learning_rate": 2.8118669610626293e-05, "loss": 2.2213, "step": 124920 }, { "epoch": 0.438019135737567, "grad_norm": 0.5612934827804565, "learning_rate": 2.8108155469105952e-05, "loss": 2.2218, "step": 124980 }, { "epoch": 0.4382294185679739, "grad_norm": 0.7243056893348694, "learning_rate": 2.8097641327585607e-05, "loss": 2.2406, "step": 125040 }, { "epoch": 0.43843970139838084, "grad_norm": 0.6163976788520813, "learning_rate": 2.808712718606526e-05, "loss": 2.2208, "step": 125100 }, { "epoch": 0.4386499842287877, "grad_norm": 0.57260662317276, "learning_rate": 2.8076613044544914e-05, "loss": 2.2152, "step": 125160 }, { "epoch": 0.4388602670591946, "grad_norm": 0.5954247713088989, "learning_rate": 2.806609890302457e-05, "loss": 2.2249, "step": 125220 }, { "epoch": 0.43907054988960154, "grad_norm": 0.48894116282463074, "learning_rate": 2.805558476150422e-05, "loss": 2.2506, "step": 125280 }, { "epoch": 0.4392808327200084, "grad_norm": 0.5188723802566528, "learning_rate": 2.8045070619983883e-05, "loss": 2.2224, "step": 125340 }, { "epoch": 0.4394911155504153, "grad_norm": 0.5828328728675842, "learning_rate": 2.8034556478463535e-05, "loss": 2.2445, "step": 125400 }, { "epoch": 0.43970139838082223, "grad_norm": 0.521661102771759, "learning_rate": 2.802404233694319e-05, "loss": 2.2302, "step": 125460 }, { "epoch": 0.4399116812112291, "grad_norm": 0.5403749942779541, "learning_rate": 2.8013528195422846e-05, "loss": 2.2135, "step": 125520 }, { "epoch": 0.440121964041636, "grad_norm": 0.534483015537262, "learning_rate": 2.8003014053902498e-05, "loss": 2.212, "step": 125580 }, { "epoch": 0.4403322468720429, "grad_norm": 0.6184211373329163, "learning_rate": 2.7992499912382153e-05, "loss": 2.2396, "step": 125640 }, { "epoch": 0.4405425297024498, "grad_norm": 0.5149890780448914, "learning_rate": 2.7981985770861812e-05, "loss": 2.2368, "step": 125700 }, { "epoch": 0.4407528125328567, "grad_norm": 0.5126898884773254, "learning_rate": 2.7971471629341467e-05, "loss": 2.2277, "step": 125760 }, { "epoch": 0.4409630953632636, "grad_norm": 0.5691506862640381, "learning_rate": 2.7960957487821122e-05, "loss": 2.2398, "step": 125820 }, { "epoch": 0.4411733781936705, "grad_norm": 0.5742629170417786, "learning_rate": 2.7950443346300774e-05, "loss": 2.2313, "step": 125880 }, { "epoch": 0.4413836610240774, "grad_norm": 0.5491039156913757, "learning_rate": 2.793992920478043e-05, "loss": 2.2295, "step": 125940 }, { "epoch": 0.4415939438544843, "grad_norm": 0.5712140798568726, "learning_rate": 2.792959029895209e-05, "loss": 2.2311, "step": 126000 }, { "epoch": 0.44180422668489117, "grad_norm": 0.5529006123542786, "learning_rate": 2.7919076157431745e-05, "loss": 2.2285, "step": 126060 }, { "epoch": 0.4420145095152981, "grad_norm": 0.5630046129226685, "learning_rate": 2.7908562015911404e-05, "loss": 2.2245, "step": 126120 }, { "epoch": 0.442224792345705, "grad_norm": 0.6187325119972229, "learning_rate": 2.789804787439106e-05, "loss": 2.2175, "step": 126180 }, { "epoch": 0.44243507517611186, "grad_norm": 0.508385419845581, "learning_rate": 2.7887533732870714e-05, "loss": 2.2245, "step": 126240 }, { "epoch": 0.4426453580065188, "grad_norm": 0.5613511800765991, "learning_rate": 2.7877019591350366e-05, "loss": 2.2226, "step": 126300 }, { "epoch": 0.4428556408369257, "grad_norm": 0.5883780121803284, "learning_rate": 2.786650544983002e-05, "loss": 2.2158, "step": 126360 }, { "epoch": 0.44306592366733255, "grad_norm": 0.6000725626945496, "learning_rate": 2.7855991308309677e-05, "loss": 2.2262, "step": 126420 }, { "epoch": 0.44327620649773947, "grad_norm": 0.5127400755882263, "learning_rate": 2.7845477166789336e-05, "loss": 2.2276, "step": 126480 }, { "epoch": 0.4434864893281464, "grad_norm": 0.5251140594482422, "learning_rate": 2.783496302526899e-05, "loss": 2.2197, "step": 126540 }, { "epoch": 0.44369677215855324, "grad_norm": 0.5944713950157166, "learning_rate": 2.7824448883748643e-05, "loss": 2.2275, "step": 126600 }, { "epoch": 0.44390705498896016, "grad_norm": 0.5731028318405151, "learning_rate": 2.7813934742228298e-05, "loss": 2.235, "step": 126660 }, { "epoch": 0.4441173378193671, "grad_norm": 0.5536123514175415, "learning_rate": 2.7803420600707953e-05, "loss": 2.2396, "step": 126720 }, { "epoch": 0.44432762064977394, "grad_norm": 0.5599493384361267, "learning_rate": 2.7792906459187605e-05, "loss": 2.226, "step": 126780 }, { "epoch": 0.44453790348018085, "grad_norm": 0.548050045967102, "learning_rate": 2.7782392317667267e-05, "loss": 2.2504, "step": 126840 }, { "epoch": 0.44474818631058777, "grad_norm": 0.5116492509841919, "learning_rate": 2.777187817614692e-05, "loss": 2.2346, "step": 126900 }, { "epoch": 0.44495846914099463, "grad_norm": 0.5121557712554932, "learning_rate": 2.7761364034626574e-05, "loss": 2.2365, "step": 126960 }, { "epoch": 0.44516875197140154, "grad_norm": 0.5654640793800354, "learning_rate": 2.775084989310623e-05, "loss": 2.2114, "step": 127020 }, { "epoch": 0.44537903480180846, "grad_norm": 0.5340287089347839, "learning_rate": 2.774033575158588e-05, "loss": 2.2296, "step": 127080 }, { "epoch": 0.4455893176322153, "grad_norm": 0.5115010738372803, "learning_rate": 2.7729821610065537e-05, "loss": 2.2265, "step": 127140 }, { "epoch": 0.44579960046262224, "grad_norm": 0.5538489818572998, "learning_rate": 2.7719307468545196e-05, "loss": 2.2066, "step": 127200 }, { "epoch": 0.4460098832930291, "grad_norm": 0.5297344923019409, "learning_rate": 2.770879332702485e-05, "loss": 2.218, "step": 127260 }, { "epoch": 0.446220166123436, "grad_norm": 0.5513839721679688, "learning_rate": 2.7698279185504506e-05, "loss": 2.2197, "step": 127320 }, { "epoch": 0.44643044895384293, "grad_norm": 0.5162503719329834, "learning_rate": 2.7687765043984158e-05, "loss": 2.2308, "step": 127380 }, { "epoch": 0.4466407317842498, "grad_norm": 0.6151419878005981, "learning_rate": 2.7677250902463813e-05, "loss": 2.2455, "step": 127440 }, { "epoch": 0.4468510146146567, "grad_norm": 0.6089375615119934, "learning_rate": 2.766673676094347e-05, "loss": 2.2147, "step": 127500 }, { "epoch": 0.4470612974450636, "grad_norm": 0.5278417468070984, "learning_rate": 2.7656222619423127e-05, "loss": 2.2258, "step": 127560 }, { "epoch": 0.4472715802754705, "grad_norm": 0.5466154217720032, "learning_rate": 2.7645708477902783e-05, "loss": 2.2321, "step": 127620 }, { "epoch": 0.4474818631058774, "grad_norm": 0.5713569521903992, "learning_rate": 2.7635194336382438e-05, "loss": 2.2191, "step": 127680 }, { "epoch": 0.4476921459362843, "grad_norm": 0.560494601726532, "learning_rate": 2.762468019486209e-05, "loss": 2.2269, "step": 127740 }, { "epoch": 0.4479024287666912, "grad_norm": 0.48673978447914124, "learning_rate": 2.7614166053341745e-05, "loss": 2.2386, "step": 127800 }, { "epoch": 0.4481127115970981, "grad_norm": 0.5395427346229553, "learning_rate": 2.76036519118214e-05, "loss": 2.2298, "step": 127860 }, { "epoch": 0.448322994427505, "grad_norm": 0.5101057887077332, "learning_rate": 2.759313777030106e-05, "loss": 2.2421, "step": 127920 }, { "epoch": 0.44853327725791187, "grad_norm": 0.5033383369445801, "learning_rate": 2.758279886447272e-05, "loss": 2.2284, "step": 127980 }, { "epoch": 0.4487435600883188, "grad_norm": 0.5280095934867859, "learning_rate": 2.7572284722952375e-05, "loss": 2.2458, "step": 128040 }, { "epoch": 0.4489538429187257, "grad_norm": 0.5307741761207581, "learning_rate": 2.7561770581432027e-05, "loss": 2.2245, "step": 128100 }, { "epoch": 0.44916412574913256, "grad_norm": 0.5375804305076599, "learning_rate": 2.7551256439911682e-05, "loss": 2.2116, "step": 128160 }, { "epoch": 0.4493744085795395, "grad_norm": 0.5175926685333252, "learning_rate": 2.7540742298391337e-05, "loss": 2.2212, "step": 128220 }, { "epoch": 0.4495846914099464, "grad_norm": 0.5948784947395325, "learning_rate": 2.753022815687099e-05, "loss": 2.2265, "step": 128280 }, { "epoch": 0.44979497424035325, "grad_norm": 0.5095413327217102, "learning_rate": 2.751971401535065e-05, "loss": 2.227, "step": 128340 }, { "epoch": 0.45000525707076017, "grad_norm": 0.5439273118972778, "learning_rate": 2.7509199873830303e-05, "loss": 2.2472, "step": 128400 }, { "epoch": 0.4502155399011671, "grad_norm": 0.5273983478546143, "learning_rate": 2.749868573230996e-05, "loss": 2.2369, "step": 128460 }, { "epoch": 0.45042582273157394, "grad_norm": 0.5817850828170776, "learning_rate": 2.7488171590789614e-05, "loss": 2.229, "step": 128520 }, { "epoch": 0.45063610556198086, "grad_norm": 0.5278428792953491, "learning_rate": 2.747765744926927e-05, "loss": 2.2221, "step": 128580 }, { "epoch": 0.4508463883923878, "grad_norm": 0.529971718788147, "learning_rate": 2.746714330774892e-05, "loss": 2.2413, "step": 128640 }, { "epoch": 0.45105667122279464, "grad_norm": 0.5319401621818542, "learning_rate": 2.7456629166228583e-05, "loss": 2.2259, "step": 128700 }, { "epoch": 0.45126695405320155, "grad_norm": 0.5638734698295593, "learning_rate": 2.7446115024708235e-05, "loss": 2.2072, "step": 128760 }, { "epoch": 0.45147723688360847, "grad_norm": 0.567353367805481, "learning_rate": 2.743560088318789e-05, "loss": 2.206, "step": 128820 }, { "epoch": 0.45168751971401533, "grad_norm": 0.5370661616325378, "learning_rate": 2.7425086741667545e-05, "loss": 2.2205, "step": 128880 }, { "epoch": 0.45189780254442224, "grad_norm": 0.5796499848365784, "learning_rate": 2.7414572600147197e-05, "loss": 2.2259, "step": 128940 }, { "epoch": 0.45210808537482916, "grad_norm": 0.5625753402709961, "learning_rate": 2.7404058458626852e-05, "loss": 2.2237, "step": 129000 }, { "epoch": 0.452318368205236, "grad_norm": 0.5433998703956604, "learning_rate": 2.739354431710651e-05, "loss": 2.2162, "step": 129060 }, { "epoch": 0.45252865103564294, "grad_norm": 0.5345425009727478, "learning_rate": 2.7383030175586166e-05, "loss": 2.2191, "step": 129120 }, { "epoch": 0.45273893386604985, "grad_norm": 0.492722749710083, "learning_rate": 2.737251603406582e-05, "loss": 2.2256, "step": 129180 }, { "epoch": 0.4529492166964567, "grad_norm": 0.5437445640563965, "learning_rate": 2.7362001892545474e-05, "loss": 2.2252, "step": 129240 }, { "epoch": 0.45315949952686363, "grad_norm": 0.5385192036628723, "learning_rate": 2.735148775102513e-05, "loss": 2.2337, "step": 129300 }, { "epoch": 0.45336978235727055, "grad_norm": 0.5326671004295349, "learning_rate": 2.7340973609504784e-05, "loss": 2.2292, "step": 129360 }, { "epoch": 0.4535800651876774, "grad_norm": 0.582104504108429, "learning_rate": 2.7330459467984443e-05, "loss": 2.225, "step": 129420 }, { "epoch": 0.4537903480180843, "grad_norm": 0.5414701700210571, "learning_rate": 2.7319945326464098e-05, "loss": 2.2438, "step": 129480 }, { "epoch": 0.45400063084849124, "grad_norm": 0.5443952679634094, "learning_rate": 2.730943118494375e-05, "loss": 2.2414, "step": 129540 }, { "epoch": 0.4542109136788981, "grad_norm": 0.49736344814300537, "learning_rate": 2.7298917043423405e-05, "loss": 2.2184, "step": 129600 }, { "epoch": 0.454421196509305, "grad_norm": 0.5797542929649353, "learning_rate": 2.728840290190306e-05, "loss": 2.224, "step": 129660 }, { "epoch": 0.45463147933971193, "grad_norm": 0.5145884156227112, "learning_rate": 2.7277888760382712e-05, "loss": 2.2343, "step": 129720 }, { "epoch": 0.4548417621701188, "grad_norm": 0.5669538378715515, "learning_rate": 2.7267374618862374e-05, "loss": 2.2406, "step": 129780 }, { "epoch": 0.4550520450005257, "grad_norm": 0.531952977180481, "learning_rate": 2.7256860477342026e-05, "loss": 2.2407, "step": 129840 }, { "epoch": 0.4552623278309326, "grad_norm": 0.5253896117210388, "learning_rate": 2.724634633582168e-05, "loss": 2.2334, "step": 129900 }, { "epoch": 0.4554726106613395, "grad_norm": 0.5913567543029785, "learning_rate": 2.7235832194301337e-05, "loss": 2.2157, "step": 129960 }, { "epoch": 0.4556828934917464, "grad_norm": 0.5832261443138123, "learning_rate": 2.722531805278099e-05, "loss": 2.2168, "step": 130020 }, { "epoch": 0.4558931763221533, "grad_norm": 0.528374195098877, "learning_rate": 2.7214803911260644e-05, "loss": 2.2204, "step": 130080 }, { "epoch": 0.4561034591525602, "grad_norm": 0.5559998154640198, "learning_rate": 2.7204465005432305e-05, "loss": 2.2149, "step": 130140 }, { "epoch": 0.4563137419829671, "grad_norm": 0.5101924538612366, "learning_rate": 2.7193950863911967e-05, "loss": 2.2408, "step": 130200 }, { "epoch": 0.456524024813374, "grad_norm": 0.5665587186813354, "learning_rate": 2.718343672239162e-05, "loss": 2.2305, "step": 130260 }, { "epoch": 0.45673430764378087, "grad_norm": 0.601974606513977, "learning_rate": 2.7172922580871274e-05, "loss": 2.2291, "step": 130320 }, { "epoch": 0.4569445904741878, "grad_norm": 0.4964762032032013, "learning_rate": 2.716240843935093e-05, "loss": 2.2179, "step": 130380 }, { "epoch": 0.4571548733045947, "grad_norm": 0.5069715976715088, "learning_rate": 2.715189429783058e-05, "loss": 2.2339, "step": 130440 }, { "epoch": 0.45736515613500156, "grad_norm": 0.560584545135498, "learning_rate": 2.7141380156310236e-05, "loss": 2.2287, "step": 130500 }, { "epoch": 0.4575754389654085, "grad_norm": 0.5845340490341187, "learning_rate": 2.7130866014789895e-05, "loss": 2.2249, "step": 130560 }, { "epoch": 0.4577857217958154, "grad_norm": 0.5387807488441467, "learning_rate": 2.712035187326955e-05, "loss": 2.226, "step": 130620 }, { "epoch": 0.45799600462622225, "grad_norm": 0.5556718111038208, "learning_rate": 2.7109837731749206e-05, "loss": 2.2328, "step": 130680 }, { "epoch": 0.45820628745662917, "grad_norm": 0.5116903185844421, "learning_rate": 2.7099323590228857e-05, "loss": 2.228, "step": 130740 }, { "epoch": 0.4584165702870361, "grad_norm": 0.5489668846130371, "learning_rate": 2.7088809448708513e-05, "loss": 2.2253, "step": 130800 }, { "epoch": 0.45862685311744295, "grad_norm": 0.49988293647766113, "learning_rate": 2.7078295307188168e-05, "loss": 2.2123, "step": 130860 }, { "epoch": 0.45883713594784986, "grad_norm": 0.5553171634674072, "learning_rate": 2.706795640135983e-05, "loss": 2.2295, "step": 130920 }, { "epoch": 0.4590474187782568, "grad_norm": 0.5072939991950989, "learning_rate": 2.7057442259839484e-05, "loss": 2.2345, "step": 130980 }, { "epoch": 0.45925770160866364, "grad_norm": 0.5555667877197266, "learning_rate": 2.7046928118319142e-05, "loss": 2.2448, "step": 131040 }, { "epoch": 0.45946798443907055, "grad_norm": 0.5642482042312622, "learning_rate": 2.7036413976798798e-05, "loss": 2.2164, "step": 131100 }, { "epoch": 0.45967826726947747, "grad_norm": 0.651848316192627, "learning_rate": 2.702589983527845e-05, "loss": 2.2258, "step": 131160 }, { "epoch": 0.45988855009988433, "grad_norm": 0.6740097403526306, "learning_rate": 2.7015385693758105e-05, "loss": 2.2263, "step": 131220 }, { "epoch": 0.46009883293029125, "grad_norm": 0.5537322759628296, "learning_rate": 2.700487155223776e-05, "loss": 2.2224, "step": 131280 }, { "epoch": 0.46030911576069816, "grad_norm": 0.5565202236175537, "learning_rate": 2.6994357410717412e-05, "loss": 2.2227, "step": 131340 }, { "epoch": 0.460519398591105, "grad_norm": 0.5631937384605408, "learning_rate": 2.6983843269197074e-05, "loss": 2.2206, "step": 131400 }, { "epoch": 0.46072968142151194, "grad_norm": 0.5715699195861816, "learning_rate": 2.6973329127676726e-05, "loss": 2.2145, "step": 131460 }, { "epoch": 0.46093996425191885, "grad_norm": 0.5436623096466064, "learning_rate": 2.696281498615638e-05, "loss": 2.2266, "step": 131520 }, { "epoch": 0.4611502470823257, "grad_norm": 0.5260350108146667, "learning_rate": 2.6952300844636037e-05, "loss": 2.2117, "step": 131580 }, { "epoch": 0.46136052991273263, "grad_norm": 0.49663183093070984, "learning_rate": 2.694178670311569e-05, "loss": 2.227, "step": 131640 }, { "epoch": 0.46157081274313955, "grad_norm": 0.5875915288925171, "learning_rate": 2.6931272561595344e-05, "loss": 2.2093, "step": 131700 }, { "epoch": 0.4617810955735464, "grad_norm": 0.5310595631599426, "learning_rate": 2.6920758420075002e-05, "loss": 2.227, "step": 131760 }, { "epoch": 0.4619913784039533, "grad_norm": 0.5796633958816528, "learning_rate": 2.6910244278554658e-05, "loss": 2.2256, "step": 131820 }, { "epoch": 0.46220166123436024, "grad_norm": 0.5720958113670349, "learning_rate": 2.6899730137034313e-05, "loss": 2.222, "step": 131880 }, { "epoch": 0.4624119440647671, "grad_norm": 0.5652265548706055, "learning_rate": 2.6889215995513968e-05, "loss": 2.2221, "step": 131940 }, { "epoch": 0.462622226895174, "grad_norm": 0.5959270000457764, "learning_rate": 2.687870185399362e-05, "loss": 2.2209, "step": 132000 }, { "epoch": 0.46283250972558093, "grad_norm": 0.573942244052887, "learning_rate": 2.6868187712473282e-05, "loss": 2.2271, "step": 132060 }, { "epoch": 0.4630427925559878, "grad_norm": 0.5455266237258911, "learning_rate": 2.6857673570952934e-05, "loss": 2.2122, "step": 132120 }, { "epoch": 0.4632530753863947, "grad_norm": 0.5603702664375305, "learning_rate": 2.684715942943259e-05, "loss": 2.2281, "step": 132180 }, { "epoch": 0.4634633582168016, "grad_norm": 0.548740565776825, "learning_rate": 2.6836645287912245e-05, "loss": 2.2262, "step": 132240 }, { "epoch": 0.4636736410472085, "grad_norm": 0.5343613624572754, "learning_rate": 2.6826131146391897e-05, "loss": 2.2154, "step": 132300 }, { "epoch": 0.4638839238776154, "grad_norm": 0.5475893020629883, "learning_rate": 2.6815617004871552e-05, "loss": 2.2279, "step": 132360 }, { "epoch": 0.4640942067080223, "grad_norm": 0.522315502166748, "learning_rate": 2.680510286335121e-05, "loss": 2.2222, "step": 132420 }, { "epoch": 0.4643044895384292, "grad_norm": 0.5225598812103271, "learning_rate": 2.6794588721830866e-05, "loss": 2.2285, "step": 132480 }, { "epoch": 0.4645147723688361, "grad_norm": 0.5249638557434082, "learning_rate": 2.678407458031052e-05, "loss": 2.2349, "step": 132540 }, { "epoch": 0.464725055199243, "grad_norm": 0.5643758773803711, "learning_rate": 2.6773560438790173e-05, "loss": 2.2221, "step": 132600 }, { "epoch": 0.46493533802964987, "grad_norm": 0.4918893873691559, "learning_rate": 2.6763046297269828e-05, "loss": 2.2187, "step": 132660 }, { "epoch": 0.4651456208600568, "grad_norm": 0.5675564408302307, "learning_rate": 2.6752532155749483e-05, "loss": 2.2358, "step": 132720 }, { "epoch": 0.46535590369046365, "grad_norm": 0.5786041021347046, "learning_rate": 2.6742018014229142e-05, "loss": 2.2305, "step": 132780 }, { "epoch": 0.46556618652087056, "grad_norm": 0.6326382756233215, "learning_rate": 2.6731503872708797e-05, "loss": 2.2225, "step": 132840 }, { "epoch": 0.4657764693512775, "grad_norm": 0.5252036452293396, "learning_rate": 2.672098973118845e-05, "loss": 2.2237, "step": 132900 }, { "epoch": 0.46598675218168434, "grad_norm": 0.5439803004264832, "learning_rate": 2.6710475589668105e-05, "loss": 2.2233, "step": 132960 }, { "epoch": 0.46619703501209125, "grad_norm": 0.5557428598403931, "learning_rate": 2.669996144814776e-05, "loss": 2.2139, "step": 133020 }, { "epoch": 0.46640731784249817, "grad_norm": 0.5631593465805054, "learning_rate": 2.6689447306627412e-05, "loss": 2.2353, "step": 133080 }, { "epoch": 0.46661760067290503, "grad_norm": 0.5134449005126953, "learning_rate": 2.6679108400799076e-05, "loss": 2.2281, "step": 133140 }, { "epoch": 0.46682788350331195, "grad_norm": 0.554543137550354, "learning_rate": 2.6668594259278728e-05, "loss": 2.2387, "step": 133200 }, { "epoch": 0.46703816633371886, "grad_norm": 0.5604194402694702, "learning_rate": 2.665808011775839e-05, "loss": 2.2123, "step": 133260 }, { "epoch": 0.4672484491641257, "grad_norm": 0.5490676760673523, "learning_rate": 2.664756597623804e-05, "loss": 2.2075, "step": 133320 }, { "epoch": 0.46745873199453264, "grad_norm": 0.5634739995002747, "learning_rate": 2.6637051834717697e-05, "loss": 2.2261, "step": 133380 }, { "epoch": 0.46766901482493956, "grad_norm": 0.5604760646820068, "learning_rate": 2.6626537693197352e-05, "loss": 2.2322, "step": 133440 }, { "epoch": 0.4678792976553464, "grad_norm": 0.5978127121925354, "learning_rate": 2.6616023551677004e-05, "loss": 2.237, "step": 133500 }, { "epoch": 0.46808958048575333, "grad_norm": 0.6151342988014221, "learning_rate": 2.660550941015666e-05, "loss": 2.2133, "step": 133560 }, { "epoch": 0.46829986331616025, "grad_norm": 0.597916305065155, "learning_rate": 2.6594995268636318e-05, "loss": 2.2218, "step": 133620 }, { "epoch": 0.4685101461465671, "grad_norm": 0.4812368154525757, "learning_rate": 2.6584481127115973e-05, "loss": 2.2089, "step": 133680 }, { "epoch": 0.468720428976974, "grad_norm": 0.5443069338798523, "learning_rate": 2.657396698559563e-05, "loss": 2.2308, "step": 133740 }, { "epoch": 0.46893071180738094, "grad_norm": 0.5664808750152588, "learning_rate": 2.656345284407528e-05, "loss": 2.2107, "step": 133800 }, { "epoch": 0.4691409946377878, "grad_norm": 0.5802755355834961, "learning_rate": 2.6552938702554936e-05, "loss": 2.2307, "step": 133860 }, { "epoch": 0.4693512774681947, "grad_norm": 0.5749623775482178, "learning_rate": 2.654242456103459e-05, "loss": 2.2241, "step": 133920 }, { "epoch": 0.46956156029860163, "grad_norm": 0.5218759775161743, "learning_rate": 2.653191041951425e-05, "loss": 2.2253, "step": 133980 }, { "epoch": 0.4697718431290085, "grad_norm": 0.5462409257888794, "learning_rate": 2.6521396277993905e-05, "loss": 2.2294, "step": 134040 }, { "epoch": 0.4699821259594154, "grad_norm": 0.5934569835662842, "learning_rate": 2.6510882136473557e-05, "loss": 2.2173, "step": 134100 }, { "epoch": 0.4701924087898223, "grad_norm": 0.5049318671226501, "learning_rate": 2.6500367994953212e-05, "loss": 2.2172, "step": 134160 }, { "epoch": 0.4704026916202292, "grad_norm": 0.5666798949241638, "learning_rate": 2.6489853853432867e-05, "loss": 2.2277, "step": 134220 }, { "epoch": 0.4706129744506361, "grad_norm": 0.5260395407676697, "learning_rate": 2.6479339711912526e-05, "loss": 2.2152, "step": 134280 }, { "epoch": 0.470823257281043, "grad_norm": 0.5611526966094971, "learning_rate": 2.646882557039218e-05, "loss": 2.2153, "step": 134340 }, { "epoch": 0.4710335401114499, "grad_norm": 0.5322962999343872, "learning_rate": 2.6458311428871833e-05, "loss": 2.2136, "step": 134400 }, { "epoch": 0.4712438229418568, "grad_norm": 0.5388842821121216, "learning_rate": 2.644779728735149e-05, "loss": 2.2204, "step": 134460 }, { "epoch": 0.4714541057722637, "grad_norm": 0.5794078707695007, "learning_rate": 2.6437283145831144e-05, "loss": 2.2324, "step": 134520 }, { "epoch": 0.47166438860267057, "grad_norm": 0.5857319235801697, "learning_rate": 2.64267690043108e-05, "loss": 2.2398, "step": 134580 }, { "epoch": 0.4718746714330775, "grad_norm": 0.5116488933563232, "learning_rate": 2.6416254862790458e-05, "loss": 2.2248, "step": 134640 }, { "epoch": 0.4720849542634844, "grad_norm": 0.5278642177581787, "learning_rate": 2.6405740721270113e-05, "loss": 2.2372, "step": 134700 }, { "epoch": 0.47229523709389126, "grad_norm": 0.5648896098136902, "learning_rate": 2.6395226579749765e-05, "loss": 2.22, "step": 134760 }, { "epoch": 0.4725055199242982, "grad_norm": 0.5393842458724976, "learning_rate": 2.638471243822942e-05, "loss": 2.2364, "step": 134820 }, { "epoch": 0.4727158027547051, "grad_norm": 0.558422863483429, "learning_rate": 2.6374198296709075e-05, "loss": 2.2243, "step": 134880 }, { "epoch": 0.47292608558511195, "grad_norm": 0.6029451489448547, "learning_rate": 2.6363684155188727e-05, "loss": 2.2318, "step": 134940 }, { "epoch": 0.47313636841551887, "grad_norm": 0.7082358002662659, "learning_rate": 2.635317001366839e-05, "loss": 2.2211, "step": 135000 }, { "epoch": 0.4733466512459258, "grad_norm": 0.551494300365448, "learning_rate": 2.634265587214804e-05, "loss": 2.2234, "step": 135060 }, { "epoch": 0.47355693407633265, "grad_norm": 0.4907706081867218, "learning_rate": 2.6332141730627697e-05, "loss": 2.2184, "step": 135120 }, { "epoch": 0.47376721690673956, "grad_norm": 0.5930429100990295, "learning_rate": 2.6321627589107352e-05, "loss": 2.2248, "step": 135180 }, { "epoch": 0.4739774997371465, "grad_norm": 0.6308527588844299, "learning_rate": 2.6311113447587004e-05, "loss": 2.2286, "step": 135240 }, { "epoch": 0.47418778256755334, "grad_norm": 0.5455154180526733, "learning_rate": 2.630059930606666e-05, "loss": 2.2261, "step": 135300 }, { "epoch": 0.47439806539796026, "grad_norm": 0.5316567420959473, "learning_rate": 2.6290085164546318e-05, "loss": 2.2183, "step": 135360 }, { "epoch": 0.47460834822836717, "grad_norm": 0.5488483309745789, "learning_rate": 2.6279571023025973e-05, "loss": 2.2377, "step": 135420 }, { "epoch": 0.47481863105877403, "grad_norm": 0.5488642454147339, "learning_rate": 2.6269056881505628e-05, "loss": 2.213, "step": 135480 }, { "epoch": 0.47502891388918095, "grad_norm": 0.5235707759857178, "learning_rate": 2.625854273998528e-05, "loss": 2.2269, "step": 135540 }, { "epoch": 0.47523919671958786, "grad_norm": 0.5823555588722229, "learning_rate": 2.6248028598464935e-05, "loss": 2.2304, "step": 135600 }, { "epoch": 0.4754494795499947, "grad_norm": 0.5386966466903687, "learning_rate": 2.623751445694459e-05, "loss": 2.2283, "step": 135660 }, { "epoch": 0.47565976238040164, "grad_norm": 0.5936428904533386, "learning_rate": 2.622700031542425e-05, "loss": 2.2251, "step": 135720 }, { "epoch": 0.47587004521080856, "grad_norm": 0.5601207613945007, "learning_rate": 2.6216486173903905e-05, "loss": 2.235, "step": 135780 }, { "epoch": 0.4760803280412154, "grad_norm": 0.4968276023864746, "learning_rate": 2.6205972032383557e-05, "loss": 2.2109, "step": 135840 }, { "epoch": 0.47629061087162233, "grad_norm": 0.5630781650543213, "learning_rate": 2.6195457890863212e-05, "loss": 2.2279, "step": 135900 }, { "epoch": 0.47650089370202925, "grad_norm": 0.552478551864624, "learning_rate": 2.6184943749342867e-05, "loss": 2.2225, "step": 135960 }, { "epoch": 0.4767111765324361, "grad_norm": 0.5814388394355774, "learning_rate": 2.617442960782252e-05, "loss": 2.2248, "step": 136020 }, { "epoch": 0.476921459362843, "grad_norm": 0.5658822059631348, "learning_rate": 2.616391546630218e-05, "loss": 2.2234, "step": 136080 }, { "epoch": 0.47713174219324994, "grad_norm": 0.5445215106010437, "learning_rate": 2.6153401324781833e-05, "loss": 2.2142, "step": 136140 }, { "epoch": 0.4773420250236568, "grad_norm": 0.6274155378341675, "learning_rate": 2.6142887183261488e-05, "loss": 2.2332, "step": 136200 }, { "epoch": 0.4775523078540637, "grad_norm": 0.527101457118988, "learning_rate": 2.6132373041741143e-05, "loss": 2.2325, "step": 136260 }, { "epoch": 0.47776259068447063, "grad_norm": 0.5380088090896606, "learning_rate": 2.6121858900220795e-05, "loss": 2.2114, "step": 136320 }, { "epoch": 0.4779728735148775, "grad_norm": 0.5434974431991577, "learning_rate": 2.611134475870045e-05, "loss": 2.2182, "step": 136380 }, { "epoch": 0.4781831563452844, "grad_norm": 0.5429152846336365, "learning_rate": 2.610083061718011e-05, "loss": 2.2297, "step": 136440 }, { "epoch": 0.4783934391756913, "grad_norm": 0.5782210826873779, "learning_rate": 2.6090316475659765e-05, "loss": 2.2281, "step": 136500 }, { "epoch": 0.4786037220060982, "grad_norm": 0.5653987526893616, "learning_rate": 2.607980233413942e-05, "loss": 2.2234, "step": 136560 }, { "epoch": 0.4788140048365051, "grad_norm": 0.5530957579612732, "learning_rate": 2.6069288192619075e-05, "loss": 2.208, "step": 136620 }, { "epoch": 0.479024287666912, "grad_norm": 0.5284966230392456, "learning_rate": 2.6058774051098727e-05, "loss": 2.2094, "step": 136680 }, { "epoch": 0.4792345704973189, "grad_norm": 0.5516782999038696, "learning_rate": 2.6048259909578382e-05, "loss": 2.2212, "step": 136740 }, { "epoch": 0.4794448533277258, "grad_norm": 0.498627245426178, "learning_rate": 2.603774576805804e-05, "loss": 2.222, "step": 136800 }, { "epoch": 0.4796551361581327, "grad_norm": 0.5247315168380737, "learning_rate": 2.6027231626537696e-05, "loss": 2.2061, "step": 136860 }, { "epoch": 0.47986541898853957, "grad_norm": 0.6753693222999573, "learning_rate": 2.601671748501735e-05, "loss": 2.2286, "step": 136920 }, { "epoch": 0.4800757018189465, "grad_norm": 0.5473688244819641, "learning_rate": 2.6006203343497003e-05, "loss": 2.2231, "step": 136980 }, { "epoch": 0.4802859846493534, "grad_norm": 0.5400590300559998, "learning_rate": 2.599568920197666e-05, "loss": 2.2172, "step": 137040 }, { "epoch": 0.48049626747976026, "grad_norm": 0.5710771679878235, "learning_rate": 2.5985175060456314e-05, "loss": 2.2218, "step": 137100 }, { "epoch": 0.4807065503101672, "grad_norm": 0.5183876752853394, "learning_rate": 2.5974660918935973e-05, "loss": 2.226, "step": 137160 }, { "epoch": 0.4809168331405741, "grad_norm": 0.5513122081756592, "learning_rate": 2.5964322013107633e-05, "loss": 2.2181, "step": 137220 }, { "epoch": 0.48112711597098096, "grad_norm": 0.4864659905433655, "learning_rate": 2.595380787158729e-05, "loss": 2.2214, "step": 137280 }, { "epoch": 0.4813373988013879, "grad_norm": 0.5089411735534668, "learning_rate": 2.5943293730066944e-05, "loss": 2.2298, "step": 137340 }, { "epoch": 0.4815476816317948, "grad_norm": 0.6495891809463501, "learning_rate": 2.5932779588546596e-05, "loss": 2.2157, "step": 137400 }, { "epoch": 0.48175796446220165, "grad_norm": 0.5468581914901733, "learning_rate": 2.592226544702625e-05, "loss": 2.2176, "step": 137460 }, { "epoch": 0.48196824729260856, "grad_norm": 0.49662095308303833, "learning_rate": 2.5911751305505906e-05, "loss": 2.2285, "step": 137520 }, { "epoch": 0.4821785301230155, "grad_norm": 0.5509756207466125, "learning_rate": 2.5901237163985565e-05, "loss": 2.2188, "step": 137580 }, { "epoch": 0.48238881295342234, "grad_norm": 0.4889494776725769, "learning_rate": 2.589072302246522e-05, "loss": 2.2104, "step": 137640 }, { "epoch": 0.48259909578382926, "grad_norm": 0.5467410087585449, "learning_rate": 2.5880208880944872e-05, "loss": 2.2241, "step": 137700 }, { "epoch": 0.4828093786142362, "grad_norm": 0.5617779493331909, "learning_rate": 2.5869694739424527e-05, "loss": 2.2173, "step": 137760 }, { "epoch": 0.48301966144464303, "grad_norm": 0.5608574748039246, "learning_rate": 2.5859180597904183e-05, "loss": 2.225, "step": 137820 }, { "epoch": 0.48322994427504995, "grad_norm": 0.5132883787155151, "learning_rate": 2.5848666456383834e-05, "loss": 2.2, "step": 137880 }, { "epoch": 0.48344022710545687, "grad_norm": 0.5200146436691284, "learning_rate": 2.5838152314863497e-05, "loss": 2.2331, "step": 137940 }, { "epoch": 0.4836505099358637, "grad_norm": 0.5218937993049622, "learning_rate": 2.582763817334315e-05, "loss": 2.2115, "step": 138000 }, { "epoch": 0.48386079276627064, "grad_norm": 0.5255389213562012, "learning_rate": 2.5817124031822804e-05, "loss": 2.2251, "step": 138060 }, { "epoch": 0.48407107559667756, "grad_norm": 0.5643746852874756, "learning_rate": 2.580660989030246e-05, "loss": 2.2332, "step": 138120 }, { "epoch": 0.4842813584270844, "grad_norm": 0.6352805495262146, "learning_rate": 2.579609574878211e-05, "loss": 2.2218, "step": 138180 }, { "epoch": 0.48449164125749133, "grad_norm": 0.559818685054779, "learning_rate": 2.5785756842953775e-05, "loss": 2.2327, "step": 138240 }, { "epoch": 0.48470192408789825, "grad_norm": 0.5820249319076538, "learning_rate": 2.5775242701433427e-05, "loss": 2.2132, "step": 138300 }, { "epoch": 0.4849122069183051, "grad_norm": 0.551038384437561, "learning_rate": 2.5764728559913082e-05, "loss": 2.2305, "step": 138360 }, { "epoch": 0.485122489748712, "grad_norm": 0.512161374092102, "learning_rate": 2.575421441839274e-05, "loss": 2.2239, "step": 138420 }, { "epoch": 0.4853327725791189, "grad_norm": 0.5664234161376953, "learning_rate": 2.5743700276872396e-05, "loss": 2.2149, "step": 138480 }, { "epoch": 0.4855430554095258, "grad_norm": 0.525309681892395, "learning_rate": 2.573318613535205e-05, "loss": 2.2167, "step": 138540 }, { "epoch": 0.4857533382399327, "grad_norm": 0.5458988547325134, "learning_rate": 2.5722671993831703e-05, "loss": 2.2209, "step": 138600 }, { "epoch": 0.4859636210703396, "grad_norm": 0.5710783004760742, "learning_rate": 2.571215785231136e-05, "loss": 2.2239, "step": 138660 }, { "epoch": 0.4861739039007465, "grad_norm": 0.537351131439209, "learning_rate": 2.5701643710791014e-05, "loss": 2.2072, "step": 138720 }, { "epoch": 0.4863841867311534, "grad_norm": 0.5294988751411438, "learning_rate": 2.5691129569270672e-05, "loss": 2.2131, "step": 138780 }, { "epoch": 0.48659446956156027, "grad_norm": 0.4968686103820801, "learning_rate": 2.5680615427750328e-05, "loss": 2.2291, "step": 138840 }, { "epoch": 0.4868047523919672, "grad_norm": 0.5579273700714111, "learning_rate": 2.567010128622998e-05, "loss": 2.2172, "step": 138900 }, { "epoch": 0.4870150352223741, "grad_norm": 0.5695374608039856, "learning_rate": 2.5659587144709635e-05, "loss": 2.2108, "step": 138960 }, { "epoch": 0.48722531805278096, "grad_norm": 0.5216060280799866, "learning_rate": 2.564907300318929e-05, "loss": 2.2162, "step": 139020 }, { "epoch": 0.4874356008831879, "grad_norm": 0.491592139005661, "learning_rate": 2.563855886166895e-05, "loss": 2.2196, "step": 139080 }, { "epoch": 0.4876458837135948, "grad_norm": 0.6150323152542114, "learning_rate": 2.5628044720148604e-05, "loss": 2.205, "step": 139140 }, { "epoch": 0.48785616654400166, "grad_norm": 0.5466335415840149, "learning_rate": 2.5617705814320265e-05, "loss": 2.2297, "step": 139200 }, { "epoch": 0.4880664493744086, "grad_norm": 0.6138894557952881, "learning_rate": 2.560719167279992e-05, "loss": 2.2275, "step": 139260 }, { "epoch": 0.4882767322048155, "grad_norm": 0.5531201362609863, "learning_rate": 2.559667753127957e-05, "loss": 2.2157, "step": 139320 }, { "epoch": 0.48848701503522235, "grad_norm": 0.5969268083572388, "learning_rate": 2.5586163389759227e-05, "loss": 2.2197, "step": 139380 }, { "epoch": 0.48869729786562927, "grad_norm": 0.5635274648666382, "learning_rate": 2.5575649248238882e-05, "loss": 2.2253, "step": 139440 }, { "epoch": 0.4889075806960362, "grad_norm": 0.6807565689086914, "learning_rate": 2.5565135106718534e-05, "loss": 2.2206, "step": 139500 }, { "epoch": 0.48911786352644304, "grad_norm": 0.5284523963928223, "learning_rate": 2.5554620965198196e-05, "loss": 2.2179, "step": 139560 }, { "epoch": 0.48932814635684996, "grad_norm": 0.614186704158783, "learning_rate": 2.5544106823677848e-05, "loss": 2.2238, "step": 139620 }, { "epoch": 0.4895384291872569, "grad_norm": 0.48403292894363403, "learning_rate": 2.5533592682157503e-05, "loss": 2.2155, "step": 139680 }, { "epoch": 0.48974871201766373, "grad_norm": 0.536378026008606, "learning_rate": 2.552307854063716e-05, "loss": 2.2065, "step": 139740 }, { "epoch": 0.48995899484807065, "grad_norm": 0.5436242818832397, "learning_rate": 2.551256439911681e-05, "loss": 2.2155, "step": 139800 }, { "epoch": 0.49016927767847757, "grad_norm": 0.5471640825271606, "learning_rate": 2.5502050257596466e-05, "loss": 2.2185, "step": 139860 }, { "epoch": 0.4903795605088844, "grad_norm": 0.5677446722984314, "learning_rate": 2.5491536116076124e-05, "loss": 2.2149, "step": 139920 }, { "epoch": 0.49058984333929134, "grad_norm": 0.5640824437141418, "learning_rate": 2.548102197455578e-05, "loss": 2.219, "step": 139980 }, { "epoch": 0.49080012616969826, "grad_norm": 0.54488605260849, "learning_rate": 2.5470507833035435e-05, "loss": 2.2243, "step": 140040 }, { "epoch": 0.4910104090001051, "grad_norm": 0.587646484375, "learning_rate": 2.5459993691515087e-05, "loss": 2.2201, "step": 140100 }, { "epoch": 0.49122069183051204, "grad_norm": 0.5216631293296814, "learning_rate": 2.5449479549994742e-05, "loss": 2.2111, "step": 140160 }, { "epoch": 0.49143097466091895, "grad_norm": 0.5511109828948975, "learning_rate": 2.5438965408474397e-05, "loss": 2.2034, "step": 140220 }, { "epoch": 0.4916412574913258, "grad_norm": 0.5907667875289917, "learning_rate": 2.5428451266954056e-05, "loss": 2.2101, "step": 140280 }, { "epoch": 0.4918515403217327, "grad_norm": 0.6119207739830017, "learning_rate": 2.541793712543371e-05, "loss": 2.2313, "step": 140340 }, { "epoch": 0.49206182315213964, "grad_norm": 0.5808233618736267, "learning_rate": 2.5407422983913363e-05, "loss": 2.2082, "step": 140400 }, { "epoch": 0.4922721059825465, "grad_norm": 0.5466307401657104, "learning_rate": 2.539690884239302e-05, "loss": 2.2109, "step": 140460 }, { "epoch": 0.4924823888129534, "grad_norm": 0.5470625758171082, "learning_rate": 2.5386394700872674e-05, "loss": 2.2139, "step": 140520 }, { "epoch": 0.49269267164336034, "grad_norm": 0.5294373035430908, "learning_rate": 2.5375880559352326e-05, "loss": 2.2275, "step": 140580 }, { "epoch": 0.4929029544737672, "grad_norm": 0.5608066320419312, "learning_rate": 2.5365366417831988e-05, "loss": 2.2222, "step": 140640 }, { "epoch": 0.4931132373041741, "grad_norm": 0.5671436190605164, "learning_rate": 2.535485227631164e-05, "loss": 2.2208, "step": 140700 }, { "epoch": 0.49332352013458103, "grad_norm": 0.5754434466362, "learning_rate": 2.5344338134791295e-05, "loss": 2.2126, "step": 140760 }, { "epoch": 0.4935338029649879, "grad_norm": 0.5340360403060913, "learning_rate": 2.533382399327095e-05, "loss": 2.2138, "step": 140820 }, { "epoch": 0.4937440857953948, "grad_norm": 0.5738496780395508, "learning_rate": 2.5323309851750606e-05, "loss": 2.2305, "step": 140880 }, { "epoch": 0.4939543686258017, "grad_norm": 0.49853968620300293, "learning_rate": 2.5312795710230257e-05, "loss": 2.1923, "step": 140940 }, { "epoch": 0.4941646514562086, "grad_norm": 0.5699918866157532, "learning_rate": 2.530228156870992e-05, "loss": 2.2172, "step": 141000 }, { "epoch": 0.4943749342866155, "grad_norm": 0.6300356984138489, "learning_rate": 2.529176742718957e-05, "loss": 2.217, "step": 141060 }, { "epoch": 0.4945852171170224, "grad_norm": 0.5313805937767029, "learning_rate": 2.5281253285669227e-05, "loss": 2.2302, "step": 141120 }, { "epoch": 0.4947954999474293, "grad_norm": 0.6249646544456482, "learning_rate": 2.5270739144148882e-05, "loss": 2.2078, "step": 141180 }, { "epoch": 0.4950057827778362, "grad_norm": 0.5782462954521179, "learning_rate": 2.5260225002628534e-05, "loss": 2.2239, "step": 141240 }, { "epoch": 0.4952160656082431, "grad_norm": 0.5095561146736145, "learning_rate": 2.524971086110819e-05, "loss": 2.2215, "step": 141300 }, { "epoch": 0.49542634843864997, "grad_norm": 0.5634046196937561, "learning_rate": 2.5239196719587848e-05, "loss": 2.2081, "step": 141360 }, { "epoch": 0.4956366312690569, "grad_norm": 0.5690566301345825, "learning_rate": 2.5228682578067503e-05, "loss": 2.2248, "step": 141420 }, { "epoch": 0.4958469140994638, "grad_norm": 0.5184747576713562, "learning_rate": 2.521816843654716e-05, "loss": 2.2148, "step": 141480 }, { "epoch": 0.49605719692987066, "grad_norm": 0.5188051462173462, "learning_rate": 2.520765429502681e-05, "loss": 2.2166, "step": 141540 }, { "epoch": 0.4962674797602776, "grad_norm": 0.5549147725105286, "learning_rate": 2.5197140153506466e-05, "loss": 2.2322, "step": 141600 }, { "epoch": 0.4964777625906845, "grad_norm": 0.5586085319519043, "learning_rate": 2.5186626011986124e-05, "loss": 2.2367, "step": 141660 }, { "epoch": 0.49668804542109135, "grad_norm": 0.5334410667419434, "learning_rate": 2.517611187046578e-05, "loss": 2.2272, "step": 141720 }, { "epoch": 0.49689832825149827, "grad_norm": 0.560259222984314, "learning_rate": 2.5165597728945435e-05, "loss": 2.2138, "step": 141780 }, { "epoch": 0.4971086110819052, "grad_norm": 0.5895169377326965, "learning_rate": 2.5155083587425087e-05, "loss": 2.2213, "step": 141840 }, { "epoch": 0.49731889391231204, "grad_norm": 0.5676740407943726, "learning_rate": 2.5144569445904742e-05, "loss": 2.2199, "step": 141900 }, { "epoch": 0.49752917674271896, "grad_norm": 0.507685661315918, "learning_rate": 2.5134055304384397e-05, "loss": 2.2129, "step": 141960 }, { "epoch": 0.4977394595731259, "grad_norm": 0.5849261283874512, "learning_rate": 2.5123541162864056e-05, "loss": 2.2321, "step": 142020 }, { "epoch": 0.49794974240353274, "grad_norm": 0.5333006978034973, "learning_rate": 2.511302702134371e-05, "loss": 2.2164, "step": 142080 }, { "epoch": 0.49816002523393965, "grad_norm": 0.5627948045730591, "learning_rate": 2.5102512879823363e-05, "loss": 2.2245, "step": 142140 }, { "epoch": 0.49837030806434657, "grad_norm": 0.5709524154663086, "learning_rate": 2.509199873830302e-05, "loss": 2.2225, "step": 142200 }, { "epoch": 0.49858059089475343, "grad_norm": 0.5809540748596191, "learning_rate": 2.5081484596782674e-05, "loss": 2.2103, "step": 142260 }, { "epoch": 0.49879087372516034, "grad_norm": 0.5410124659538269, "learning_rate": 2.5070970455262326e-05, "loss": 2.2285, "step": 142320 }, { "epoch": 0.49900115655556726, "grad_norm": 0.5636032819747925, "learning_rate": 2.5060456313741988e-05, "loss": 2.2171, "step": 142380 }, { "epoch": 0.4992114393859741, "grad_norm": 0.5565786361694336, "learning_rate": 2.504994217222164e-05, "loss": 2.2195, "step": 142440 }, { "epoch": 0.49942172221638104, "grad_norm": 0.5169489979743958, "learning_rate": 2.5039428030701295e-05, "loss": 2.232, "step": 142500 }, { "epoch": 0.49963200504678795, "grad_norm": 0.5828045010566711, "learning_rate": 2.502891388918095e-05, "loss": 2.226, "step": 142560 }, { "epoch": 0.4998422878771948, "grad_norm": 0.5285956859588623, "learning_rate": 2.5018399747660602e-05, "loss": 2.2235, "step": 142620 }, { "epoch": 0.5000525707076017, "grad_norm": 0.5771108865737915, "learning_rate": 2.5007885606140257e-05, "loss": 2.2077, "step": 142680 }, { "epoch": 0.5002628535380086, "grad_norm": 0.5183426737785339, "learning_rate": 2.4997371464619916e-05, "loss": 2.2273, "step": 142740 }, { "epoch": 0.5004731363684155, "grad_norm": 0.5981959700584412, "learning_rate": 2.4986857323099568e-05, "loss": 2.2246, "step": 142800 }, { "epoch": 0.5006834191988224, "grad_norm": 0.5162586569786072, "learning_rate": 2.4976343181579226e-05, "loss": 2.215, "step": 142860 }, { "epoch": 0.5008937020292293, "grad_norm": 0.5488058924674988, "learning_rate": 2.4965829040058882e-05, "loss": 2.2118, "step": 142920 }, { "epoch": 0.5011039848596363, "grad_norm": 0.5368149280548096, "learning_rate": 2.4955314898538534e-05, "loss": 2.216, "step": 142980 }, { "epoch": 0.5013142676900431, "grad_norm": 0.5487305521965027, "learning_rate": 2.4944800757018192e-05, "loss": 2.211, "step": 143040 }, { "epoch": 0.50152455052045, "grad_norm": 0.5786258578300476, "learning_rate": 2.4934286615497844e-05, "loss": 2.2345, "step": 143100 }, { "epoch": 0.5017348333508569, "grad_norm": 0.549926221370697, "learning_rate": 2.49237724739775e-05, "loss": 2.2161, "step": 143160 }, { "epoch": 0.5019451161812638, "grad_norm": 0.5187004208564758, "learning_rate": 2.4913258332457158e-05, "loss": 2.233, "step": 143220 }, { "epoch": 0.5021553990116707, "grad_norm": 0.5308973789215088, "learning_rate": 2.490274419093681e-05, "loss": 2.2098, "step": 143280 }, { "epoch": 0.5023656818420776, "grad_norm": 0.5926359295845032, "learning_rate": 2.4892230049416465e-05, "loss": 2.2082, "step": 143340 }, { "epoch": 0.5025759646724844, "grad_norm": 0.5576338768005371, "learning_rate": 2.488171590789612e-05, "loss": 2.2275, "step": 143400 }, { "epoch": 0.5027862475028914, "grad_norm": 0.5443142652511597, "learning_rate": 2.4871201766375776e-05, "loss": 2.2097, "step": 143460 }, { "epoch": 0.5029965303332983, "grad_norm": 0.5324608683586121, "learning_rate": 2.486068762485543e-05, "loss": 2.2276, "step": 143520 }, { "epoch": 0.5032068131637052, "grad_norm": 0.6219227313995361, "learning_rate": 2.4850173483335086e-05, "loss": 2.2128, "step": 143580 }, { "epoch": 0.5034170959941121, "grad_norm": 0.5511086583137512, "learning_rate": 2.483965934181474e-05, "loss": 2.2231, "step": 143640 }, { "epoch": 0.503627378824519, "grad_norm": 0.5457072257995605, "learning_rate": 2.4829145200294397e-05, "loss": 2.2117, "step": 143700 }, { "epoch": 0.5038376616549258, "grad_norm": 0.5231776237487793, "learning_rate": 2.4818631058774052e-05, "loss": 2.2244, "step": 143760 }, { "epoch": 0.5040479444853327, "grad_norm": 0.5896267890930176, "learning_rate": 2.4808116917253708e-05, "loss": 2.2073, "step": 143820 }, { "epoch": 0.5042582273157397, "grad_norm": 0.5525411367416382, "learning_rate": 2.4797602775733363e-05, "loss": 2.2325, "step": 143880 }, { "epoch": 0.5044685101461466, "grad_norm": 0.5126237869262695, "learning_rate": 2.4787088634213018e-05, "loss": 2.2142, "step": 143940 }, { "epoch": 0.5046787929765535, "grad_norm": 0.5332605242729187, "learning_rate": 2.4776574492692673e-05, "loss": 2.2041, "step": 144000 }, { "epoch": 0.5048890758069604, "grad_norm": 0.5159221291542053, "learning_rate": 2.4766060351172325e-05, "loss": 2.2258, "step": 144060 }, { "epoch": 0.5050993586373672, "grad_norm": 0.546165406703949, "learning_rate": 2.4755546209651984e-05, "loss": 2.2118, "step": 144120 }, { "epoch": 0.5053096414677741, "grad_norm": 0.6250487565994263, "learning_rate": 2.474503206813164e-05, "loss": 2.2144, "step": 144180 }, { "epoch": 0.505519924298181, "grad_norm": 0.5558093190193176, "learning_rate": 2.4734517926611294e-05, "loss": 2.2113, "step": 144240 }, { "epoch": 0.505730207128588, "grad_norm": 0.5311017036437988, "learning_rate": 2.472400378509095e-05, "loss": 2.2064, "step": 144300 }, { "epoch": 0.5059404899589949, "grad_norm": 0.5303267240524292, "learning_rate": 2.47134896435706e-05, "loss": 2.2178, "step": 144360 }, { "epoch": 0.5061507727894018, "grad_norm": 0.5508550405502319, "learning_rate": 2.470297550205026e-05, "loss": 2.2266, "step": 144420 }, { "epoch": 0.5063610556198086, "grad_norm": 0.5501142144203186, "learning_rate": 2.4692461360529916e-05, "loss": 2.2181, "step": 144480 }, { "epoch": 0.5065713384502155, "grad_norm": 0.5321212410926819, "learning_rate": 2.4681947219009567e-05, "loss": 2.2072, "step": 144540 }, { "epoch": 0.5067816212806224, "grad_norm": 0.598643958568573, "learning_rate": 2.4671433077489226e-05, "loss": 2.2166, "step": 144600 }, { "epoch": 0.5069919041110293, "grad_norm": 0.5446153879165649, "learning_rate": 2.4660918935968878e-05, "loss": 2.2168, "step": 144660 }, { "epoch": 0.5072021869414363, "grad_norm": 0.5070498585700989, "learning_rate": 2.4650404794448533e-05, "loss": 2.2242, "step": 144720 }, { "epoch": 0.5074124697718432, "grad_norm": 0.5821142792701721, "learning_rate": 2.4639890652928192e-05, "loss": 2.2081, "step": 144780 }, { "epoch": 0.50762275260225, "grad_norm": 0.6096354126930237, "learning_rate": 2.4629376511407844e-05, "loss": 2.2235, "step": 144840 }, { "epoch": 0.5078330354326569, "grad_norm": 0.5673126578330994, "learning_rate": 2.46188623698875e-05, "loss": 2.2095, "step": 144900 }, { "epoch": 0.5080433182630638, "grad_norm": 0.6011648774147034, "learning_rate": 2.4608348228367158e-05, "loss": 2.2172, "step": 144960 }, { "epoch": 0.5082536010934707, "grad_norm": 0.5500099062919617, "learning_rate": 2.459783408684681e-05, "loss": 2.2266, "step": 145020 }, { "epoch": 0.5084638839238776, "grad_norm": 0.6012193560600281, "learning_rate": 2.4587319945326465e-05, "loss": 2.2244, "step": 145080 }, { "epoch": 0.5086741667542846, "grad_norm": 0.5006005764007568, "learning_rate": 2.457680580380612e-05, "loss": 2.2187, "step": 145140 }, { "epoch": 0.5088844495846914, "grad_norm": 0.5464049577713013, "learning_rate": 2.456646689797778e-05, "loss": 2.2264, "step": 145200 }, { "epoch": 0.5090947324150983, "grad_norm": 0.5614712238311768, "learning_rate": 2.4555952756457436e-05, "loss": 2.223, "step": 145260 }, { "epoch": 0.5093050152455052, "grad_norm": 0.5789541602134705, "learning_rate": 2.454543861493709e-05, "loss": 2.1983, "step": 145320 }, { "epoch": 0.5095152980759121, "grad_norm": 0.5474076271057129, "learning_rate": 2.4534924473416747e-05, "loss": 2.2146, "step": 145380 }, { "epoch": 0.509725580906319, "grad_norm": 0.49787166714668274, "learning_rate": 2.4524410331896402e-05, "loss": 2.2116, "step": 145440 }, { "epoch": 0.509935863736726, "grad_norm": 0.5263041853904724, "learning_rate": 2.4513896190376057e-05, "loss": 2.2207, "step": 145500 }, { "epoch": 0.5101461465671328, "grad_norm": 0.5450867414474487, "learning_rate": 2.4503382048855712e-05, "loss": 2.2196, "step": 145560 }, { "epoch": 0.5103564293975397, "grad_norm": 0.5154622793197632, "learning_rate": 2.4492867907335368e-05, "loss": 2.2239, "step": 145620 }, { "epoch": 0.5105667122279466, "grad_norm": 0.5420225858688354, "learning_rate": 2.4482353765815023e-05, "loss": 2.2192, "step": 145680 }, { "epoch": 0.5107769950583535, "grad_norm": 0.544823944568634, "learning_rate": 2.4471839624294675e-05, "loss": 2.2148, "step": 145740 }, { "epoch": 0.5109872778887604, "grad_norm": 0.5374436378479004, "learning_rate": 2.4461325482774334e-05, "loss": 2.2034, "step": 145800 }, { "epoch": 0.5111975607191673, "grad_norm": 0.5344963073730469, "learning_rate": 2.445081134125399e-05, "loss": 2.2237, "step": 145860 }, { "epoch": 0.5114078435495741, "grad_norm": 0.5483312010765076, "learning_rate": 2.444029719973364e-05, "loss": 2.2139, "step": 145920 }, { "epoch": 0.511618126379981, "grad_norm": 0.5935961604118347, "learning_rate": 2.44297830582133e-05, "loss": 2.2039, "step": 145980 }, { "epoch": 0.511828409210388, "grad_norm": 0.4969542920589447, "learning_rate": 2.441926891669295e-05, "loss": 2.2228, "step": 146040 }, { "epoch": 0.5120386920407949, "grad_norm": 0.5557306408882141, "learning_rate": 2.4408754775172607e-05, "loss": 2.2329, "step": 146100 }, { "epoch": 0.5122489748712018, "grad_norm": 0.6203109622001648, "learning_rate": 2.4398240633652265e-05, "loss": 2.2258, "step": 146160 }, { "epoch": 0.5124592577016087, "grad_norm": 0.5021337270736694, "learning_rate": 2.4387726492131917e-05, "loss": 2.2227, "step": 146220 }, { "epoch": 0.5126695405320155, "grad_norm": 0.5517904162406921, "learning_rate": 2.4377212350611572e-05, "loss": 2.213, "step": 146280 }, { "epoch": 0.5128798233624224, "grad_norm": 0.5719086527824402, "learning_rate": 2.4366698209091228e-05, "loss": 2.2296, "step": 146340 }, { "epoch": 0.5130901061928294, "grad_norm": 0.6046878695487976, "learning_rate": 2.4356184067570883e-05, "loss": 2.2074, "step": 146400 }, { "epoch": 0.5133003890232363, "grad_norm": 0.5404784679412842, "learning_rate": 2.4345669926050538e-05, "loss": 2.231, "step": 146460 }, { "epoch": 0.5135106718536432, "grad_norm": 0.49592384696006775, "learning_rate": 2.4335155784530194e-05, "loss": 2.2109, "step": 146520 }, { "epoch": 0.5137209546840501, "grad_norm": 0.5282294750213623, "learning_rate": 2.432464164300985e-05, "loss": 2.2199, "step": 146580 }, { "epoch": 0.5139312375144569, "grad_norm": 0.5357757806777954, "learning_rate": 2.4314127501489508e-05, "loss": 2.2123, "step": 146640 }, { "epoch": 0.5141415203448638, "grad_norm": 0.5113288760185242, "learning_rate": 2.430361335996916e-05, "loss": 2.2078, "step": 146700 }, { "epoch": 0.5143518031752707, "grad_norm": 0.5509752631187439, "learning_rate": 2.4293099218448815e-05, "loss": 2.2105, "step": 146760 }, { "epoch": 0.5145620860056777, "grad_norm": 0.5632410049438477, "learning_rate": 2.428258507692847e-05, "loss": 2.2158, "step": 146820 }, { "epoch": 0.5147723688360846, "grad_norm": 0.525120198726654, "learning_rate": 2.4272070935408125e-05, "loss": 2.2228, "step": 146880 }, { "epoch": 0.5149826516664914, "grad_norm": 0.5465018153190613, "learning_rate": 2.426155679388778e-05, "loss": 2.201, "step": 146940 }, { "epoch": 0.5151929344968983, "grad_norm": 0.5666688084602356, "learning_rate": 2.4251042652367436e-05, "loss": 2.199, "step": 147000 }, { "epoch": 0.5154032173273052, "grad_norm": 0.5029938220977783, "learning_rate": 2.424052851084709e-05, "loss": 2.21, "step": 147060 }, { "epoch": 0.5156135001577121, "grad_norm": 0.5832599401473999, "learning_rate": 2.4230014369326746e-05, "loss": 2.2302, "step": 147120 }, { "epoch": 0.515823782988119, "grad_norm": 0.5170128345489502, "learning_rate": 2.4219675463498407e-05, "loss": 2.2179, "step": 147180 }, { "epoch": 0.516034065818526, "grad_norm": 0.6044071316719055, "learning_rate": 2.4209161321978062e-05, "loss": 2.2212, "step": 147240 }, { "epoch": 0.5162443486489328, "grad_norm": 0.5323086977005005, "learning_rate": 2.4198647180457717e-05, "loss": 2.2144, "step": 147300 }, { "epoch": 0.5164546314793397, "grad_norm": 0.5192455649375916, "learning_rate": 2.4188133038937373e-05, "loss": 2.2064, "step": 147360 }, { "epoch": 0.5166649143097466, "grad_norm": 0.5478724241256714, "learning_rate": 2.4177618897417025e-05, "loss": 2.2047, "step": 147420 }, { "epoch": 0.5168751971401535, "grad_norm": 0.5389701724052429, "learning_rate": 2.4167104755896683e-05, "loss": 2.2104, "step": 147480 }, { "epoch": 0.5170854799705604, "grad_norm": 0.5311550498008728, "learning_rate": 2.415659061437634e-05, "loss": 2.214, "step": 147540 }, { "epoch": 0.5172957628009673, "grad_norm": 0.6084071397781372, "learning_rate": 2.414607647285599e-05, "loss": 2.2091, "step": 147600 }, { "epoch": 0.5175060456313741, "grad_norm": 0.5623484253883362, "learning_rate": 2.413556233133565e-05, "loss": 2.2095, "step": 147660 }, { "epoch": 0.5177163284617811, "grad_norm": 0.509027898311615, "learning_rate": 2.41250481898153e-05, "loss": 2.2212, "step": 147720 }, { "epoch": 0.517926611292188, "grad_norm": 0.5712621808052063, "learning_rate": 2.4114534048294956e-05, "loss": 2.2106, "step": 147780 }, { "epoch": 0.5181368941225949, "grad_norm": 0.5070745348930359, "learning_rate": 2.4104019906774615e-05, "loss": 2.2146, "step": 147840 }, { "epoch": 0.5183471769530018, "grad_norm": 0.5508371591567993, "learning_rate": 2.4093505765254267e-05, "loss": 2.2136, "step": 147900 }, { "epoch": 0.5185574597834087, "grad_norm": 0.5198752284049988, "learning_rate": 2.4082991623733922e-05, "loss": 2.2137, "step": 147960 }, { "epoch": 0.5187677426138155, "grad_norm": 0.555348813533783, "learning_rate": 2.4072477482213577e-05, "loss": 2.2097, "step": 148020 }, { "epoch": 0.5189780254442224, "grad_norm": 0.5508757829666138, "learning_rate": 2.4061963340693233e-05, "loss": 2.2097, "step": 148080 }, { "epoch": 0.5191883082746294, "grad_norm": 0.5607093572616577, "learning_rate": 2.4051449199172888e-05, "loss": 2.2048, "step": 148140 }, { "epoch": 0.5193985911050363, "grad_norm": 0.5992648601531982, "learning_rate": 2.4040935057652543e-05, "loss": 2.2078, "step": 148200 }, { "epoch": 0.5196088739354432, "grad_norm": 0.5487757921218872, "learning_rate": 2.40304209161322e-05, "loss": 2.2259, "step": 148260 }, { "epoch": 0.5198191567658501, "grad_norm": 0.5373034477233887, "learning_rate": 2.4019906774611854e-05, "loss": 2.219, "step": 148320 }, { "epoch": 0.5200294395962569, "grad_norm": 0.502025306224823, "learning_rate": 2.400939263309151e-05, "loss": 2.2192, "step": 148380 }, { "epoch": 0.5202397224266638, "grad_norm": 0.5354958176612854, "learning_rate": 2.3998878491571164e-05, "loss": 2.2023, "step": 148440 }, { "epoch": 0.5204500052570707, "grad_norm": 0.5596054196357727, "learning_rate": 2.398836435005082e-05, "loss": 2.2085, "step": 148500 }, { "epoch": 0.5206602880874777, "grad_norm": 0.5417528748512268, "learning_rate": 2.3977850208530475e-05, "loss": 2.2219, "step": 148560 }, { "epoch": 0.5208705709178846, "grad_norm": 0.636630117893219, "learning_rate": 2.396733606701013e-05, "loss": 2.2194, "step": 148620 }, { "epoch": 0.5210808537482915, "grad_norm": 0.4849955439567566, "learning_rate": 2.3956821925489782e-05, "loss": 2.2215, "step": 148680 }, { "epoch": 0.5212911365786983, "grad_norm": 0.5673374533653259, "learning_rate": 2.394630778396944e-05, "loss": 2.2132, "step": 148740 }, { "epoch": 0.5215014194091052, "grad_norm": 0.563437283039093, "learning_rate": 2.3935793642449096e-05, "loss": 2.1998, "step": 148800 }, { "epoch": 0.5217117022395121, "grad_norm": 0.5512784123420715, "learning_rate": 2.392527950092875e-05, "loss": 2.212, "step": 148860 }, { "epoch": 0.521921985069919, "grad_norm": 0.5081817507743835, "learning_rate": 2.3914765359408407e-05, "loss": 2.2078, "step": 148920 }, { "epoch": 0.522132267900326, "grad_norm": 0.5619904398918152, "learning_rate": 2.390425121788806e-05, "loss": 2.2137, "step": 148980 }, { "epoch": 0.5223425507307329, "grad_norm": 0.5273348093032837, "learning_rate": 2.3893737076367717e-05, "loss": 2.2204, "step": 149040 }, { "epoch": 0.5225528335611397, "grad_norm": 0.5501142740249634, "learning_rate": 2.3883222934847372e-05, "loss": 2.2188, "step": 149100 }, { "epoch": 0.5227631163915466, "grad_norm": 0.5600405335426331, "learning_rate": 2.3872708793327024e-05, "loss": 2.2064, "step": 149160 }, { "epoch": 0.5229733992219535, "grad_norm": 0.5650786757469177, "learning_rate": 2.3862194651806683e-05, "loss": 2.2123, "step": 149220 }, { "epoch": 0.5231836820523604, "grad_norm": 0.49991318583488464, "learning_rate": 2.3851680510286338e-05, "loss": 2.2215, "step": 149280 }, { "epoch": 0.5233939648827673, "grad_norm": 0.5382364392280579, "learning_rate": 2.384116636876599e-05, "loss": 2.2115, "step": 149340 }, { "epoch": 0.5236042477131743, "grad_norm": 0.5215093493461609, "learning_rate": 2.383065222724565e-05, "loss": 2.2176, "step": 149400 }, { "epoch": 0.5238145305435811, "grad_norm": 0.5873035192489624, "learning_rate": 2.38201380857253e-05, "loss": 2.2033, "step": 149460 }, { "epoch": 0.524024813373988, "grad_norm": 0.5551034808158875, "learning_rate": 2.3809623944204956e-05, "loss": 2.2174, "step": 149520 }, { "epoch": 0.5242350962043949, "grad_norm": 0.5522902607917786, "learning_rate": 2.3799109802684615e-05, "loss": 2.202, "step": 149580 }, { "epoch": 0.5244453790348018, "grad_norm": 0.6057192087173462, "learning_rate": 2.3788595661164267e-05, "loss": 2.2006, "step": 149640 }, { "epoch": 0.5246556618652087, "grad_norm": 0.5531818270683289, "learning_rate": 2.3778081519643922e-05, "loss": 2.2174, "step": 149700 }, { "epoch": 0.5248659446956156, "grad_norm": 0.5004293322563171, "learning_rate": 2.3767567378123577e-05, "loss": 2.2027, "step": 149760 }, { "epoch": 0.5250762275260225, "grad_norm": 0.5355766415596008, "learning_rate": 2.3757053236603232e-05, "loss": 2.2172, "step": 149820 }, { "epoch": 0.5252865103564294, "grad_norm": 0.5332327485084534, "learning_rate": 2.3746539095082888e-05, "loss": 2.2174, "step": 149880 }, { "epoch": 0.5254967931868363, "grad_norm": 0.5604259371757507, "learning_rate": 2.3736024953562543e-05, "loss": 2.2165, "step": 149940 }, { "epoch": 0.5257070760172432, "grad_norm": 0.5731934905052185, "learning_rate": 2.3725510812042198e-05, "loss": 2.2177, "step": 150000 }, { "epoch": 0.5259173588476501, "grad_norm": 0.5286061763763428, "learning_rate": 2.3714996670521854e-05, "loss": 2.1979, "step": 150060 }, { "epoch": 0.526127641678057, "grad_norm": 0.5667920112609863, "learning_rate": 2.370448252900151e-05, "loss": 2.2066, "step": 150120 }, { "epoch": 0.5263379245084638, "grad_norm": 0.5224534869194031, "learning_rate": 2.3693968387481164e-05, "loss": 2.199, "step": 150180 }, { "epoch": 0.5265482073388708, "grad_norm": 0.5209370851516724, "learning_rate": 2.368345424596082e-05, "loss": 2.2104, "step": 150240 }, { "epoch": 0.5267584901692777, "grad_norm": 0.4856961965560913, "learning_rate": 2.3672940104440475e-05, "loss": 2.2186, "step": 150300 }, { "epoch": 0.5269687729996846, "grad_norm": 0.5362430810928345, "learning_rate": 2.366242596292013e-05, "loss": 2.2201, "step": 150360 }, { "epoch": 0.5271790558300915, "grad_norm": 0.5632970929145813, "learning_rate": 2.3651911821399782e-05, "loss": 2.2002, "step": 150420 }, { "epoch": 0.5273893386604984, "grad_norm": 0.5169422626495361, "learning_rate": 2.364139767987944e-05, "loss": 2.2105, "step": 150480 }, { "epoch": 0.5275996214909052, "grad_norm": 0.5175157785415649, "learning_rate": 2.3630883538359096e-05, "loss": 2.2193, "step": 150540 }, { "epoch": 0.5278099043213121, "grad_norm": 0.5866751074790955, "learning_rate": 2.3620544632530756e-05, "loss": 2.2127, "step": 150600 }, { "epoch": 0.5280201871517191, "grad_norm": 0.5538196563720703, "learning_rate": 2.3610030491010408e-05, "loss": 2.2139, "step": 150660 }, { "epoch": 0.528230469982126, "grad_norm": 0.5771428942680359, "learning_rate": 2.3599516349490063e-05, "loss": 2.2089, "step": 150720 }, { "epoch": 0.5284407528125329, "grad_norm": 0.5791342258453369, "learning_rate": 2.3589002207969722e-05, "loss": 2.2157, "step": 150780 }, { "epoch": 0.5286510356429398, "grad_norm": 0.5100274085998535, "learning_rate": 2.3578488066449374e-05, "loss": 2.2075, "step": 150840 }, { "epoch": 0.5288613184733466, "grad_norm": 0.5530140399932861, "learning_rate": 2.356797392492903e-05, "loss": 2.2152, "step": 150900 }, { "epoch": 0.5290716013037535, "grad_norm": 0.590545654296875, "learning_rate": 2.3557459783408688e-05, "loss": 2.2085, "step": 150960 }, { "epoch": 0.5292818841341604, "grad_norm": 0.5271270275115967, "learning_rate": 2.354694564188834e-05, "loss": 2.2225, "step": 151020 }, { "epoch": 0.5294921669645674, "grad_norm": 0.5703055262565613, "learning_rate": 2.3536431500367995e-05, "loss": 2.228, "step": 151080 }, { "epoch": 0.5297024497949743, "grad_norm": 0.5086321830749512, "learning_rate": 2.352591735884765e-05, "loss": 2.2178, "step": 151140 }, { "epoch": 0.5299127326253812, "grad_norm": 0.5059917569160461, "learning_rate": 2.3515403217327306e-05, "loss": 2.2016, "step": 151200 }, { "epoch": 0.530123015455788, "grad_norm": 0.5583618879318237, "learning_rate": 2.3504889075806964e-05, "loss": 2.229, "step": 151260 }, { "epoch": 0.5303332982861949, "grad_norm": 0.5437110662460327, "learning_rate": 2.3494374934286616e-05, "loss": 2.1988, "step": 151320 }, { "epoch": 0.5305435811166018, "grad_norm": 0.5776737332344055, "learning_rate": 2.348386079276627e-05, "loss": 2.2158, "step": 151380 }, { "epoch": 0.5307538639470087, "grad_norm": 0.5363079309463501, "learning_rate": 2.3473346651245927e-05, "loss": 2.2242, "step": 151440 }, { "epoch": 0.5309641467774157, "grad_norm": 0.5857604742050171, "learning_rate": 2.3462832509725582e-05, "loss": 2.214, "step": 151500 }, { "epoch": 0.5311744296078226, "grad_norm": 0.5153557062149048, "learning_rate": 2.3452318368205237e-05, "loss": 2.2025, "step": 151560 }, { "epoch": 0.5313847124382294, "grad_norm": 0.6134850978851318, "learning_rate": 2.3441804226684893e-05, "loss": 2.2091, "step": 151620 }, { "epoch": 0.5315949952686363, "grad_norm": 0.5441007018089294, "learning_rate": 2.3431290085164548e-05, "loss": 2.2208, "step": 151680 }, { "epoch": 0.5318052780990432, "grad_norm": 0.565891444683075, "learning_rate": 2.3420775943644203e-05, "loss": 2.2169, "step": 151740 }, { "epoch": 0.5320155609294501, "grad_norm": 0.5420317053794861, "learning_rate": 2.341026180212386e-05, "loss": 2.2207, "step": 151800 }, { "epoch": 0.532225843759857, "grad_norm": 0.5484797954559326, "learning_rate": 2.3399747660603514e-05, "loss": 2.2072, "step": 151860 }, { "epoch": 0.532436126590264, "grad_norm": 0.559556245803833, "learning_rate": 2.338923351908317e-05, "loss": 2.2114, "step": 151920 }, { "epoch": 0.5326464094206708, "grad_norm": 0.5181391835212708, "learning_rate": 2.3378719377562824e-05, "loss": 2.2165, "step": 151980 }, { "epoch": 0.5328566922510777, "grad_norm": 0.5498536825180054, "learning_rate": 2.336820523604248e-05, "loss": 2.2114, "step": 152040 }, { "epoch": 0.5330669750814846, "grad_norm": 0.559701144695282, "learning_rate": 2.335769109452213e-05, "loss": 2.2195, "step": 152100 }, { "epoch": 0.5332772579118915, "grad_norm": 0.5087351202964783, "learning_rate": 2.334717695300179e-05, "loss": 2.2114, "step": 152160 }, { "epoch": 0.5334875407422984, "grad_norm": 0.590671718120575, "learning_rate": 2.3336662811481445e-05, "loss": 2.2161, "step": 152220 }, { "epoch": 0.5336978235727053, "grad_norm": 0.5296396017074585, "learning_rate": 2.3326148669961097e-05, "loss": 2.2173, "step": 152280 }, { "epoch": 0.5339081064031121, "grad_norm": 0.4890645742416382, "learning_rate": 2.3315634528440756e-05, "loss": 2.2094, "step": 152340 }, { "epoch": 0.5341183892335191, "grad_norm": 0.5311561822891235, "learning_rate": 2.3305120386920408e-05, "loss": 2.2167, "step": 152400 }, { "epoch": 0.534328672063926, "grad_norm": 0.5763415694236755, "learning_rate": 2.3294606245400063e-05, "loss": 2.2274, "step": 152460 }, { "epoch": 0.5345389548943329, "grad_norm": 0.5071849822998047, "learning_rate": 2.3284092103879722e-05, "loss": 2.1984, "step": 152520 }, { "epoch": 0.5347492377247398, "grad_norm": 0.5014827847480774, "learning_rate": 2.3273577962359374e-05, "loss": 2.228, "step": 152580 }, { "epoch": 0.5349595205551466, "grad_norm": 0.562168538570404, "learning_rate": 2.326306382083903e-05, "loss": 2.1957, "step": 152640 }, { "epoch": 0.5351698033855535, "grad_norm": 0.5440489649772644, "learning_rate": 2.3252549679318684e-05, "loss": 2.2289, "step": 152700 }, { "epoch": 0.5353800862159604, "grad_norm": 0.5407817959785461, "learning_rate": 2.324203553779834e-05, "loss": 2.2165, "step": 152760 }, { "epoch": 0.5355903690463674, "grad_norm": 0.5259541869163513, "learning_rate": 2.3231521396277995e-05, "loss": 2.2268, "step": 152820 }, { "epoch": 0.5358006518767743, "grad_norm": 0.5506186485290527, "learning_rate": 2.322100725475765e-05, "loss": 2.2008, "step": 152880 }, { "epoch": 0.5360109347071812, "grad_norm": 0.5958437323570251, "learning_rate": 2.3210493113237305e-05, "loss": 2.2186, "step": 152940 }, { "epoch": 0.536221217537588, "grad_norm": 0.5881892442703247, "learning_rate": 2.319997897171696e-05, "loss": 2.224, "step": 153000 }, { "epoch": 0.5364315003679949, "grad_norm": 0.5412174463272095, "learning_rate": 2.3189464830196616e-05, "loss": 2.2077, "step": 153060 }, { "epoch": 0.5366417831984018, "grad_norm": 0.5946020483970642, "learning_rate": 2.317895068867627e-05, "loss": 2.2145, "step": 153120 }, { "epoch": 0.5368520660288087, "grad_norm": 0.5380927920341492, "learning_rate": 2.3168436547155927e-05, "loss": 2.2029, "step": 153180 }, { "epoch": 0.5370623488592157, "grad_norm": 0.5236719250679016, "learning_rate": 2.3157922405635582e-05, "loss": 2.22, "step": 153240 }, { "epoch": 0.5372726316896226, "grad_norm": 0.6787263751029968, "learning_rate": 2.3147408264115237e-05, "loss": 2.2274, "step": 153300 }, { "epoch": 0.5374829145200294, "grad_norm": 0.5436735153198242, "learning_rate": 2.3137069358286898e-05, "loss": 2.2076, "step": 153360 }, { "epoch": 0.5376931973504363, "grad_norm": 0.5597900748252869, "learning_rate": 2.3126555216766553e-05, "loss": 2.2129, "step": 153420 }, { "epoch": 0.5379034801808432, "grad_norm": 0.5178171992301941, "learning_rate": 2.3116041075246205e-05, "loss": 2.215, "step": 153480 }, { "epoch": 0.5381137630112501, "grad_norm": 0.5543037056922913, "learning_rate": 2.3105526933725863e-05, "loss": 2.2151, "step": 153540 }, { "epoch": 0.538324045841657, "grad_norm": 0.6140149235725403, "learning_rate": 2.3095012792205515e-05, "loss": 2.1995, "step": 153600 }, { "epoch": 0.538534328672064, "grad_norm": 0.5255257487297058, "learning_rate": 2.3084498650685174e-05, "loss": 2.225, "step": 153660 }, { "epoch": 0.5387446115024708, "grad_norm": 0.5231916904449463, "learning_rate": 2.307398450916483e-05, "loss": 2.2208, "step": 153720 }, { "epoch": 0.5389548943328777, "grad_norm": 0.5627821087837219, "learning_rate": 2.306347036764448e-05, "loss": 2.2165, "step": 153780 }, { "epoch": 0.5391651771632846, "grad_norm": 0.5242416262626648, "learning_rate": 2.305295622612414e-05, "loss": 2.1959, "step": 153840 }, { "epoch": 0.5393754599936915, "grad_norm": 0.5642799735069275, "learning_rate": 2.3042442084603795e-05, "loss": 2.2203, "step": 153900 }, { "epoch": 0.5395857428240984, "grad_norm": 0.5174145698547363, "learning_rate": 2.3031927943083447e-05, "loss": 2.2069, "step": 153960 }, { "epoch": 0.5397960256545054, "grad_norm": 0.5788621306419373, "learning_rate": 2.3021413801563106e-05, "loss": 2.2111, "step": 154020 }, { "epoch": 0.5400063084849122, "grad_norm": 0.5217883586883545, "learning_rate": 2.3010899660042758e-05, "loss": 2.2185, "step": 154080 }, { "epoch": 0.5402165913153191, "grad_norm": 0.5840219855308533, "learning_rate": 2.3000385518522413e-05, "loss": 2.2113, "step": 154140 }, { "epoch": 0.540426874145726, "grad_norm": 0.5109094977378845, "learning_rate": 2.298987137700207e-05, "loss": 2.2095, "step": 154200 }, { "epoch": 0.5406371569761329, "grad_norm": 0.5547183156013489, "learning_rate": 2.2979357235481723e-05, "loss": 2.1991, "step": 154260 }, { "epoch": 0.5408474398065398, "grad_norm": 0.5175747275352478, "learning_rate": 2.296884309396138e-05, "loss": 2.2171, "step": 154320 }, { "epoch": 0.5410577226369467, "grad_norm": 0.5823888778686523, "learning_rate": 2.2958328952441034e-05, "loss": 2.2003, "step": 154380 }, { "epoch": 0.5412680054673535, "grad_norm": 0.5631999969482422, "learning_rate": 2.294781481092069e-05, "loss": 2.2135, "step": 154440 }, { "epoch": 0.5414782882977605, "grad_norm": 0.7047464847564697, "learning_rate": 2.2937300669400345e-05, "loss": 2.2136, "step": 154500 }, { "epoch": 0.5416885711281674, "grad_norm": 0.5780332088470459, "learning_rate": 2.292678652788e-05, "loss": 2.2104, "step": 154560 }, { "epoch": 0.5418988539585743, "grad_norm": 0.5036734938621521, "learning_rate": 2.2916272386359655e-05, "loss": 2.2023, "step": 154620 }, { "epoch": 0.5421091367889812, "grad_norm": 0.5215955972671509, "learning_rate": 2.290575824483931e-05, "loss": 2.196, "step": 154680 }, { "epoch": 0.5423194196193881, "grad_norm": 0.5139001607894897, "learning_rate": 2.2895244103318966e-05, "loss": 2.2188, "step": 154740 }, { "epoch": 0.5425297024497949, "grad_norm": 0.5407582521438599, "learning_rate": 2.288472996179862e-05, "loss": 2.2033, "step": 154800 }, { "epoch": 0.5427399852802018, "grad_norm": 0.6017780900001526, "learning_rate": 2.2874215820278276e-05, "loss": 2.2156, "step": 154860 }, { "epoch": 0.5429502681106088, "grad_norm": 0.6027617454528809, "learning_rate": 2.286370167875793e-05, "loss": 2.2104, "step": 154920 }, { "epoch": 0.5431605509410157, "grad_norm": 0.5565388798713684, "learning_rate": 2.2853187537237587e-05, "loss": 2.2037, "step": 154980 }, { "epoch": 0.5433708337714226, "grad_norm": 0.5749856233596802, "learning_rate": 2.284267339571724e-05, "loss": 2.2239, "step": 155040 }, { "epoch": 0.5435811166018295, "grad_norm": 0.5343650579452515, "learning_rate": 2.2832159254196897e-05, "loss": 2.1944, "step": 155100 }, { "epoch": 0.5437913994322363, "grad_norm": 0.5500574707984924, "learning_rate": 2.2821645112676553e-05, "loss": 2.2129, "step": 155160 }, { "epoch": 0.5440016822626432, "grad_norm": 0.5807549953460693, "learning_rate": 2.2811130971156205e-05, "loss": 2.2116, "step": 155220 }, { "epoch": 0.5442119650930501, "grad_norm": 0.5436511635780334, "learning_rate": 2.2800616829635863e-05, "loss": 2.2123, "step": 155280 }, { "epoch": 0.5444222479234571, "grad_norm": 0.5555745363235474, "learning_rate": 2.2790102688115515e-05, "loss": 2.2056, "step": 155340 }, { "epoch": 0.544632530753864, "grad_norm": 0.5600705742835999, "learning_rate": 2.277958854659517e-05, "loss": 2.2064, "step": 155400 }, { "epoch": 0.5448428135842709, "grad_norm": 0.5020048022270203, "learning_rate": 2.276907440507483e-05, "loss": 2.2199, "step": 155460 }, { "epoch": 0.5450530964146777, "grad_norm": 0.5523339509963989, "learning_rate": 2.275856026355448e-05, "loss": 2.2202, "step": 155520 }, { "epoch": 0.5452633792450846, "grad_norm": 0.6014343500137329, "learning_rate": 2.2748046122034136e-05, "loss": 2.2138, "step": 155580 }, { "epoch": 0.5454736620754915, "grad_norm": 0.5882730484008789, "learning_rate": 2.273753198051379e-05, "loss": 2.2182, "step": 155640 }, { "epoch": 0.5456839449058984, "grad_norm": 0.5294257998466492, "learning_rate": 2.2727017838993447e-05, "loss": 2.2024, "step": 155700 }, { "epoch": 0.5458942277363054, "grad_norm": 0.4940982460975647, "learning_rate": 2.2716503697473102e-05, "loss": 2.2176, "step": 155760 }, { "epoch": 0.5461045105667123, "grad_norm": 0.6698052883148193, "learning_rate": 2.2705989555952757e-05, "loss": 2.2183, "step": 155820 }, { "epoch": 0.5463147933971191, "grad_norm": 0.5630810260772705, "learning_rate": 2.2695475414432413e-05, "loss": 2.2259, "step": 155880 }, { "epoch": 0.546525076227526, "grad_norm": 0.6864802241325378, "learning_rate": 2.2684961272912068e-05, "loss": 2.2025, "step": 155940 }, { "epoch": 0.5467353590579329, "grad_norm": 0.5602729320526123, "learning_rate": 2.2674447131391723e-05, "loss": 2.2127, "step": 156000 }, { "epoch": 0.5469456418883398, "grad_norm": 0.5600816607475281, "learning_rate": 2.266393298987138e-05, "loss": 2.2099, "step": 156060 }, { "epoch": 0.5471559247187467, "grad_norm": 0.5645787715911865, "learning_rate": 2.265359408404304e-05, "loss": 2.2127, "step": 156120 }, { "epoch": 0.5473662075491537, "grad_norm": 0.59485924243927, "learning_rate": 2.2643079942522694e-05, "loss": 2.2066, "step": 156180 }, { "epoch": 0.5475764903795605, "grad_norm": 0.5252460837364197, "learning_rate": 2.263256580100235e-05, "loss": 2.2002, "step": 156240 }, { "epoch": 0.5477867732099674, "grad_norm": 0.5992864966392517, "learning_rate": 2.2622051659482005e-05, "loss": 2.218, "step": 156300 }, { "epoch": 0.5479970560403743, "grad_norm": 0.5336859226226807, "learning_rate": 2.261153751796166e-05, "loss": 2.2217, "step": 156360 }, { "epoch": 0.5482073388707812, "grad_norm": 0.5347955226898193, "learning_rate": 2.2601023376441315e-05, "loss": 2.2083, "step": 156420 }, { "epoch": 0.5484176217011881, "grad_norm": 0.5723451972007751, "learning_rate": 2.259050923492097e-05, "loss": 2.2145, "step": 156480 }, { "epoch": 0.548627904531595, "grad_norm": 0.5852446556091309, "learning_rate": 2.2579995093400626e-05, "loss": 2.2054, "step": 156540 }, { "epoch": 0.5488381873620019, "grad_norm": 0.5354224443435669, "learning_rate": 2.256948095188028e-05, "loss": 2.199, "step": 156600 }, { "epoch": 0.5490484701924088, "grad_norm": 0.5295231938362122, "learning_rate": 2.2558966810359937e-05, "loss": 2.2171, "step": 156660 }, { "epoch": 0.5492587530228157, "grad_norm": 0.5262629985809326, "learning_rate": 2.254845266883959e-05, "loss": 2.2272, "step": 156720 }, { "epoch": 0.5494690358532226, "grad_norm": 0.5583018660545349, "learning_rate": 2.2537938527319247e-05, "loss": 2.2085, "step": 156780 }, { "epoch": 0.5496793186836295, "grad_norm": 0.6145007610321045, "learning_rate": 2.2527424385798902e-05, "loss": 2.1992, "step": 156840 }, { "epoch": 0.5498896015140364, "grad_norm": 0.4931606650352478, "learning_rate": 2.2516910244278554e-05, "loss": 2.2027, "step": 156900 }, { "epoch": 0.5500998843444432, "grad_norm": 0.568114697933197, "learning_rate": 2.2506396102758213e-05, "loss": 2.2207, "step": 156960 }, { "epoch": 0.5503101671748502, "grad_norm": 0.5709725022315979, "learning_rate": 2.2495881961237865e-05, "loss": 2.2085, "step": 157020 }, { "epoch": 0.5505204500052571, "grad_norm": 0.5881896615028381, "learning_rate": 2.248536781971752e-05, "loss": 2.2058, "step": 157080 }, { "epoch": 0.550730732835664, "grad_norm": 0.5648079514503479, "learning_rate": 2.247485367819718e-05, "loss": 2.2209, "step": 157140 }, { "epoch": 0.5509410156660709, "grad_norm": 0.6545466184616089, "learning_rate": 2.246433953667683e-05, "loss": 2.2135, "step": 157200 }, { "epoch": 0.5511512984964778, "grad_norm": 0.5051990151405334, "learning_rate": 2.2453825395156486e-05, "loss": 2.1934, "step": 157260 }, { "epoch": 0.5513615813268846, "grad_norm": 0.5131949186325073, "learning_rate": 2.244331125363614e-05, "loss": 2.2062, "step": 157320 }, { "epoch": 0.5515718641572915, "grad_norm": 0.5909935832023621, "learning_rate": 2.2432797112115796e-05, "loss": 2.2123, "step": 157380 }, { "epoch": 0.5517821469876985, "grad_norm": 0.5474815368652344, "learning_rate": 2.2422282970595452e-05, "loss": 2.201, "step": 157440 }, { "epoch": 0.5519924298181054, "grad_norm": 0.5805018544197083, "learning_rate": 2.2411768829075107e-05, "loss": 2.2214, "step": 157500 }, { "epoch": 0.5522027126485123, "grad_norm": 0.6290664672851562, "learning_rate": 2.2401254687554762e-05, "loss": 2.2116, "step": 157560 }, { "epoch": 0.5524129954789192, "grad_norm": 0.5544005036354065, "learning_rate": 2.2390740546034418e-05, "loss": 2.2184, "step": 157620 }, { "epoch": 0.552623278309326, "grad_norm": 0.591930627822876, "learning_rate": 2.2380226404514073e-05, "loss": 2.2026, "step": 157680 }, { "epoch": 0.5528335611397329, "grad_norm": 0.5133166909217834, "learning_rate": 2.2369712262993728e-05, "loss": 2.2067, "step": 157740 }, { "epoch": 0.5530438439701398, "grad_norm": 0.5512820482254028, "learning_rate": 2.2359198121473383e-05, "loss": 2.2148, "step": 157800 }, { "epoch": 0.5532541268005468, "grad_norm": 0.5863820314407349, "learning_rate": 2.234868397995304e-05, "loss": 2.2151, "step": 157860 }, { "epoch": 0.5534644096309537, "grad_norm": 0.510724663734436, "learning_rate": 2.2338169838432694e-05, "loss": 2.2123, "step": 157920 }, { "epoch": 0.5536746924613606, "grad_norm": 0.522894024848938, "learning_rate": 2.2327655696912346e-05, "loss": 2.2078, "step": 157980 }, { "epoch": 0.5538849752917674, "grad_norm": 0.5410124659538269, "learning_rate": 2.2317141555392005e-05, "loss": 2.2038, "step": 158040 }, { "epoch": 0.5540952581221743, "grad_norm": 0.5146405696868896, "learning_rate": 2.230662741387166e-05, "loss": 2.1995, "step": 158100 }, { "epoch": 0.5543055409525812, "grad_norm": 0.5585925579071045, "learning_rate": 2.2296113272351312e-05, "loss": 2.219, "step": 158160 }, { "epoch": 0.5545158237829881, "grad_norm": 0.530822217464447, "learning_rate": 2.228559913083097e-05, "loss": 2.2088, "step": 158220 }, { "epoch": 0.554726106613395, "grad_norm": 0.5129376649856567, "learning_rate": 2.2275084989310622e-05, "loss": 2.2021, "step": 158280 }, { "epoch": 0.5549363894438019, "grad_norm": 0.5571216344833374, "learning_rate": 2.2264570847790278e-05, "loss": 2.2075, "step": 158340 }, { "epoch": 0.5551466722742088, "grad_norm": 0.5772547125816345, "learning_rate": 2.2254231941961938e-05, "loss": 2.2175, "step": 158400 }, { "epoch": 0.5553569551046157, "grad_norm": 0.508144199848175, "learning_rate": 2.2243717800441597e-05, "loss": 2.2162, "step": 158460 }, { "epoch": 0.5555672379350226, "grad_norm": 0.6046881079673767, "learning_rate": 2.2233203658921252e-05, "loss": 2.1968, "step": 158520 }, { "epoch": 0.5557775207654295, "grad_norm": 0.530588686466217, "learning_rate": 2.2222689517400904e-05, "loss": 2.2059, "step": 158580 }, { "epoch": 0.5559878035958364, "grad_norm": 0.5466212034225464, "learning_rate": 2.2212175375880563e-05, "loss": 2.2118, "step": 158640 }, { "epoch": 0.5561980864262432, "grad_norm": 0.5684359073638916, "learning_rate": 2.2201661234360214e-05, "loss": 2.2001, "step": 158700 }, { "epoch": 0.5564083692566502, "grad_norm": 0.5453093647956848, "learning_rate": 2.219114709283987e-05, "loss": 2.1946, "step": 158760 }, { "epoch": 0.5566186520870571, "grad_norm": 0.5468719601631165, "learning_rate": 2.218063295131953e-05, "loss": 2.2095, "step": 158820 }, { "epoch": 0.556828934917464, "grad_norm": 0.5114817023277283, "learning_rate": 2.217011880979918e-05, "loss": 2.204, "step": 158880 }, { "epoch": 0.5570392177478709, "grad_norm": 0.5197003483772278, "learning_rate": 2.2159604668278836e-05, "loss": 2.204, "step": 158940 }, { "epoch": 0.5572495005782778, "grad_norm": 0.5789101719856262, "learning_rate": 2.214909052675849e-05, "loss": 2.2304, "step": 159000 }, { "epoch": 0.5574597834086846, "grad_norm": 0.5665631890296936, "learning_rate": 2.2138576385238146e-05, "loss": 2.2046, "step": 159060 }, { "epoch": 0.5576700662390915, "grad_norm": 0.5702418684959412, "learning_rate": 2.21280622437178e-05, "loss": 2.2051, "step": 159120 }, { "epoch": 0.5578803490694985, "grad_norm": 0.5206588506698608, "learning_rate": 2.2117548102197457e-05, "loss": 2.1996, "step": 159180 }, { "epoch": 0.5580906318999054, "grad_norm": 0.5394989848136902, "learning_rate": 2.2107033960677112e-05, "loss": 2.1917, "step": 159240 }, { "epoch": 0.5583009147303123, "grad_norm": 0.5701467394828796, "learning_rate": 2.2096519819156767e-05, "loss": 2.195, "step": 159300 }, { "epoch": 0.5585111975607192, "grad_norm": 0.5451265573501587, "learning_rate": 2.2086005677636423e-05, "loss": 2.2014, "step": 159360 }, { "epoch": 0.558721480391126, "grad_norm": 0.5584667325019836, "learning_rate": 2.2075491536116078e-05, "loss": 2.2138, "step": 159420 }, { "epoch": 0.5589317632215329, "grad_norm": 0.5295857787132263, "learning_rate": 2.2064977394595733e-05, "loss": 2.2191, "step": 159480 }, { "epoch": 0.5591420460519398, "grad_norm": 0.678843080997467, "learning_rate": 2.205446325307539e-05, "loss": 2.2222, "step": 159540 }, { "epoch": 0.5593523288823468, "grad_norm": 0.5450401306152344, "learning_rate": 2.2043949111555044e-05, "loss": 2.2098, "step": 159600 }, { "epoch": 0.5595626117127537, "grad_norm": 0.5541589856147766, "learning_rate": 2.2033434970034696e-05, "loss": 2.2096, "step": 159660 }, { "epoch": 0.5597728945431606, "grad_norm": 0.5362442135810852, "learning_rate": 2.2022920828514354e-05, "loss": 2.1889, "step": 159720 }, { "epoch": 0.5599831773735674, "grad_norm": 0.5870858430862427, "learning_rate": 2.201240668699401e-05, "loss": 2.2146, "step": 159780 }, { "epoch": 0.5601934602039743, "grad_norm": 0.5245428085327148, "learning_rate": 2.200189254547366e-05, "loss": 2.2165, "step": 159840 }, { "epoch": 0.5604037430343812, "grad_norm": 0.5324500203132629, "learning_rate": 2.199137840395332e-05, "loss": 2.2096, "step": 159900 }, { "epoch": 0.5606140258647881, "grad_norm": 0.5151851773262024, "learning_rate": 2.1980864262432972e-05, "loss": 2.2044, "step": 159960 }, { "epoch": 0.5608243086951951, "grad_norm": 0.532479465007782, "learning_rate": 2.1970350120912627e-05, "loss": 2.2112, "step": 160020 }, { "epoch": 0.561034591525602, "grad_norm": 0.5969590544700623, "learning_rate": 2.1959835979392286e-05, "loss": 2.2185, "step": 160080 }, { "epoch": 0.5612448743560088, "grad_norm": 0.5450465083122253, "learning_rate": 2.1949321837871938e-05, "loss": 2.204, "step": 160140 }, { "epoch": 0.5614551571864157, "grad_norm": 0.5737215280532837, "learning_rate": 2.1938807696351593e-05, "loss": 2.2075, "step": 160200 }, { "epoch": 0.5616654400168226, "grad_norm": 0.5520704984664917, "learning_rate": 2.1928293554831252e-05, "loss": 2.1986, "step": 160260 }, { "epoch": 0.5618757228472295, "grad_norm": 0.5581754446029663, "learning_rate": 2.1917779413310904e-05, "loss": 2.2046, "step": 160320 }, { "epoch": 0.5620860056776364, "grad_norm": 0.556607186794281, "learning_rate": 2.190726527179056e-05, "loss": 2.2018, "step": 160380 }, { "epoch": 0.5622962885080434, "grad_norm": 0.5431088209152222, "learning_rate": 2.1896751130270214e-05, "loss": 2.2187, "step": 160440 }, { "epoch": 0.5625065713384502, "grad_norm": 0.5602589249610901, "learning_rate": 2.188623698874987e-05, "loss": 2.2032, "step": 160500 }, { "epoch": 0.5627168541688571, "grad_norm": 0.5283489227294922, "learning_rate": 2.1875722847229525e-05, "loss": 2.1863, "step": 160560 }, { "epoch": 0.562927136999264, "grad_norm": 0.6582157015800476, "learning_rate": 2.186520870570918e-05, "loss": 2.1946, "step": 160620 }, { "epoch": 0.5631374198296709, "grad_norm": 0.5504642128944397, "learning_rate": 2.1854694564188835e-05, "loss": 2.2088, "step": 160680 }, { "epoch": 0.5633477026600778, "grad_norm": 0.5475605726242065, "learning_rate": 2.184418042266849e-05, "loss": 2.2134, "step": 160740 }, { "epoch": 0.5635579854904847, "grad_norm": 0.5584597587585449, "learning_rate": 2.1833666281148146e-05, "loss": 2.2034, "step": 160800 }, { "epoch": 0.5637682683208916, "grad_norm": 0.6042028069496155, "learning_rate": 2.18231521396278e-05, "loss": 2.2178, "step": 160860 }, { "epoch": 0.5639785511512985, "grad_norm": 0.5251367092132568, "learning_rate": 2.1812637998107453e-05, "loss": 2.1982, "step": 160920 }, { "epoch": 0.5641888339817054, "grad_norm": 0.518165111541748, "learning_rate": 2.1802123856587112e-05, "loss": 2.2248, "step": 160980 }, { "epoch": 0.5643991168121123, "grad_norm": 0.5064350366592407, "learning_rate": 2.1791609715066767e-05, "loss": 2.2202, "step": 161040 }, { "epoch": 0.5646093996425192, "grad_norm": 0.5454937219619751, "learning_rate": 2.178109557354642e-05, "loss": 2.2062, "step": 161100 }, { "epoch": 0.5648196824729261, "grad_norm": 0.592663049697876, "learning_rate": 2.1770581432026078e-05, "loss": 2.2157, "step": 161160 }, { "epoch": 0.5650299653033329, "grad_norm": 0.5274229049682617, "learning_rate": 2.1760067290505733e-05, "loss": 2.2065, "step": 161220 }, { "epoch": 0.5652402481337399, "grad_norm": 0.5737342834472656, "learning_rate": 2.1749553148985385e-05, "loss": 2.1895, "step": 161280 }, { "epoch": 0.5654505309641468, "grad_norm": 0.5299097895622253, "learning_rate": 2.1739039007465043e-05, "loss": 2.2206, "step": 161340 }, { "epoch": 0.5656608137945537, "grad_norm": 0.547879159450531, "learning_rate": 2.1728524865944695e-05, "loss": 2.2083, "step": 161400 }, { "epoch": 0.5658710966249606, "grad_norm": 0.548787534236908, "learning_rate": 2.171801072442435e-05, "loss": 2.2084, "step": 161460 }, { "epoch": 0.5660813794553675, "grad_norm": 0.5963943600654602, "learning_rate": 2.170749658290401e-05, "loss": 2.2096, "step": 161520 }, { "epoch": 0.5662916622857743, "grad_norm": 0.5302314162254333, "learning_rate": 2.169698244138366e-05, "loss": 2.2034, "step": 161580 }, { "epoch": 0.5665019451161812, "grad_norm": 0.531383216381073, "learning_rate": 2.1686468299863316e-05, "loss": 2.1979, "step": 161640 }, { "epoch": 0.5667122279465882, "grad_norm": 0.5811280012130737, "learning_rate": 2.1675954158342972e-05, "loss": 2.2139, "step": 161700 }, { "epoch": 0.5669225107769951, "grad_norm": 0.5571404695510864, "learning_rate": 2.1665440016822627e-05, "loss": 2.21, "step": 161760 }, { "epoch": 0.567132793607402, "grad_norm": 0.5254374742507935, "learning_rate": 2.1654925875302282e-05, "loss": 2.2137, "step": 161820 }, { "epoch": 0.5673430764378089, "grad_norm": 0.5230144262313843, "learning_rate": 2.1644411733781938e-05, "loss": 2.227, "step": 161880 }, { "epoch": 0.5675533592682157, "grad_norm": 0.524534285068512, "learning_rate": 2.1633897592261593e-05, "loss": 2.2066, "step": 161940 }, { "epoch": 0.5677636420986226, "grad_norm": 0.5795220136642456, "learning_rate": 2.1623383450741248e-05, "loss": 2.21, "step": 162000 }, { "epoch": 0.5679739249290295, "grad_norm": 0.5567732453346252, "learning_rate": 2.1612869309220903e-05, "loss": 2.2213, "step": 162060 }, { "epoch": 0.5681842077594365, "grad_norm": 0.528534471988678, "learning_rate": 2.160235516770056e-05, "loss": 2.196, "step": 162120 }, { "epoch": 0.5683944905898434, "grad_norm": 0.5799056887626648, "learning_rate": 2.1591841026180214e-05, "loss": 2.2136, "step": 162180 }, { "epoch": 0.5686047734202503, "grad_norm": 0.5151017308235168, "learning_rate": 2.158132688465987e-05, "loss": 2.2089, "step": 162240 }, { "epoch": 0.5688150562506571, "grad_norm": 0.6659327745437622, "learning_rate": 2.1570812743139524e-05, "loss": 2.2154, "step": 162300 }, { "epoch": 0.569025339081064, "grad_norm": 0.5421218276023865, "learning_rate": 2.1560473837311185e-05, "loss": 2.2131, "step": 162360 }, { "epoch": 0.5692356219114709, "grad_norm": 0.5917002558708191, "learning_rate": 2.154995969579084e-05, "loss": 2.2086, "step": 162420 }, { "epoch": 0.5694459047418778, "grad_norm": 0.5525832772254944, "learning_rate": 2.1539445554270496e-05, "loss": 2.2043, "step": 162480 }, { "epoch": 0.5696561875722848, "grad_norm": 0.5710530281066895, "learning_rate": 2.152893141275015e-05, "loss": 2.2084, "step": 162540 }, { "epoch": 0.5698664704026917, "grad_norm": 0.5232380032539368, "learning_rate": 2.1518417271229803e-05, "loss": 2.2021, "step": 162600 }, { "epoch": 0.5700767532330985, "grad_norm": 0.5594772696495056, "learning_rate": 2.150790312970946e-05, "loss": 2.2137, "step": 162660 }, { "epoch": 0.5702870360635054, "grad_norm": 0.4988769292831421, "learning_rate": 2.1497388988189117e-05, "loss": 2.1901, "step": 162720 }, { "epoch": 0.5704973188939123, "grad_norm": 0.5252644419670105, "learning_rate": 2.148687484666877e-05, "loss": 2.2049, "step": 162780 }, { "epoch": 0.5707076017243192, "grad_norm": 0.5093538165092468, "learning_rate": 2.1476360705148427e-05, "loss": 2.1893, "step": 162840 }, { "epoch": 0.5709178845547261, "grad_norm": 0.6334115266799927, "learning_rate": 2.1465846563628083e-05, "loss": 2.2228, "step": 162900 }, { "epoch": 0.5711281673851331, "grad_norm": 0.5815603733062744, "learning_rate": 2.1455332422107734e-05, "loss": 2.2282, "step": 162960 }, { "epoch": 0.5713384502155399, "grad_norm": 0.5428621172904968, "learning_rate": 2.1444818280587393e-05, "loss": 2.195, "step": 163020 }, { "epoch": 0.5715487330459468, "grad_norm": 0.5853855013847351, "learning_rate": 2.1434304139067045e-05, "loss": 2.2075, "step": 163080 }, { "epoch": 0.5717590158763537, "grad_norm": 0.5330763459205627, "learning_rate": 2.14237899975467e-05, "loss": 2.2133, "step": 163140 }, { "epoch": 0.5719692987067606, "grad_norm": 0.6086885333061218, "learning_rate": 2.141327585602636e-05, "loss": 2.2198, "step": 163200 }, { "epoch": 0.5721795815371675, "grad_norm": 0.5251525044441223, "learning_rate": 2.140276171450601e-05, "loss": 2.2149, "step": 163260 }, { "epoch": 0.5723898643675744, "grad_norm": 0.5404868721961975, "learning_rate": 2.1392247572985666e-05, "loss": 2.1866, "step": 163320 }, { "epoch": 0.5726001471979812, "grad_norm": 0.5327269434928894, "learning_rate": 2.138173343146532e-05, "loss": 2.2018, "step": 163380 }, { "epoch": 0.5728104300283882, "grad_norm": 0.5136888027191162, "learning_rate": 2.1371219289944977e-05, "loss": 2.2092, "step": 163440 }, { "epoch": 0.5730207128587951, "grad_norm": 0.5119737386703491, "learning_rate": 2.1360705148424632e-05, "loss": 2.1987, "step": 163500 }, { "epoch": 0.573230995689202, "grad_norm": 0.519402027130127, "learning_rate": 2.1350191006904287e-05, "loss": 2.2073, "step": 163560 }, { "epoch": 0.5734412785196089, "grad_norm": 0.5646945238113403, "learning_rate": 2.1339676865383943e-05, "loss": 2.2054, "step": 163620 }, { "epoch": 0.5736515613500158, "grad_norm": 0.5275649428367615, "learning_rate": 2.1329162723863598e-05, "loss": 2.2091, "step": 163680 }, { "epoch": 0.5738618441804226, "grad_norm": 0.5596910715103149, "learning_rate": 2.1318648582343253e-05, "loss": 2.2094, "step": 163740 }, { "epoch": 0.5740721270108295, "grad_norm": 0.5533624887466431, "learning_rate": 2.130813444082291e-05, "loss": 2.1974, "step": 163800 }, { "epoch": 0.5742824098412365, "grad_norm": 0.5841308236122131, "learning_rate": 2.1297620299302564e-05, "loss": 2.2073, "step": 163860 }, { "epoch": 0.5744926926716434, "grad_norm": 0.5480549931526184, "learning_rate": 2.128710615778222e-05, "loss": 2.2091, "step": 163920 }, { "epoch": 0.5747029755020503, "grad_norm": 0.5628250241279602, "learning_rate": 2.1276592016261874e-05, "loss": 2.1969, "step": 163980 }, { "epoch": 0.5749132583324571, "grad_norm": 0.5552694797515869, "learning_rate": 2.1266077874741526e-05, "loss": 2.2012, "step": 164040 }, { "epoch": 0.575123541162864, "grad_norm": 0.5316144824028015, "learning_rate": 2.1255563733221185e-05, "loss": 2.2024, "step": 164100 }, { "epoch": 0.5753338239932709, "grad_norm": 0.5356711745262146, "learning_rate": 2.124504959170084e-05, "loss": 2.21, "step": 164160 }, { "epoch": 0.5755441068236778, "grad_norm": 0.5729750394821167, "learning_rate": 2.1234535450180492e-05, "loss": 2.2165, "step": 164220 }, { "epoch": 0.5757543896540848, "grad_norm": 0.5203530788421631, "learning_rate": 2.122402130866015e-05, "loss": 2.2064, "step": 164280 }, { "epoch": 0.5759646724844917, "grad_norm": 0.5358246564865112, "learning_rate": 2.1213507167139802e-05, "loss": 2.1968, "step": 164340 }, { "epoch": 0.5761749553148985, "grad_norm": 0.5929320454597473, "learning_rate": 2.1203168261311466e-05, "loss": 2.2166, "step": 164400 }, { "epoch": 0.5763852381453054, "grad_norm": 0.5435928106307983, "learning_rate": 2.1192654119791118e-05, "loss": 2.1965, "step": 164460 }, { "epoch": 0.5765955209757123, "grad_norm": 0.6071345210075378, "learning_rate": 2.1182139978270777e-05, "loss": 2.1928, "step": 164520 }, { "epoch": 0.5768058038061192, "grad_norm": 0.5283774137496948, "learning_rate": 2.1171625836750432e-05, "loss": 2.1927, "step": 164580 }, { "epoch": 0.5770160866365261, "grad_norm": 0.5249363780021667, "learning_rate": 2.1161111695230084e-05, "loss": 2.1854, "step": 164640 }, { "epoch": 0.5772263694669331, "grad_norm": 0.5568847060203552, "learning_rate": 2.1150597553709743e-05, "loss": 2.1961, "step": 164700 }, { "epoch": 0.5774366522973399, "grad_norm": 0.545061469078064, "learning_rate": 2.1140083412189395e-05, "loss": 2.2026, "step": 164760 }, { "epoch": 0.5776469351277468, "grad_norm": 0.5466181635856628, "learning_rate": 2.112956927066905e-05, "loss": 2.201, "step": 164820 }, { "epoch": 0.5778572179581537, "grad_norm": 0.5336632132530212, "learning_rate": 2.111905512914871e-05, "loss": 2.2067, "step": 164880 }, { "epoch": 0.5780675007885606, "grad_norm": 0.5472707152366638, "learning_rate": 2.110854098762836e-05, "loss": 2.196, "step": 164940 }, { "epoch": 0.5782777836189675, "grad_norm": 0.5615684390068054, "learning_rate": 2.1098026846108016e-05, "loss": 2.1958, "step": 165000 }, { "epoch": 0.5784880664493744, "grad_norm": 0.5983463525772095, "learning_rate": 2.108751270458767e-05, "loss": 2.204, "step": 165060 }, { "epoch": 0.5786983492797813, "grad_norm": 0.5347958207130432, "learning_rate": 2.1076998563067326e-05, "loss": 2.1982, "step": 165120 }, { "epoch": 0.5789086321101882, "grad_norm": 0.5867051482200623, "learning_rate": 2.106648442154698e-05, "loss": 2.1985, "step": 165180 }, { "epoch": 0.5791189149405951, "grad_norm": 0.5597366094589233, "learning_rate": 2.1055970280026637e-05, "loss": 2.2171, "step": 165240 }, { "epoch": 0.579329197771002, "grad_norm": 0.5548207759857178, "learning_rate": 2.1045456138506292e-05, "loss": 2.2144, "step": 165300 }, { "epoch": 0.5795394806014089, "grad_norm": 0.5555901527404785, "learning_rate": 2.1034941996985947e-05, "loss": 2.2007, "step": 165360 }, { "epoch": 0.5797497634318158, "grad_norm": 0.5185126662254333, "learning_rate": 2.1024427855465603e-05, "loss": 2.2192, "step": 165420 }, { "epoch": 0.5799600462622226, "grad_norm": 0.5712375044822693, "learning_rate": 2.1013913713945258e-05, "loss": 2.2049, "step": 165480 }, { "epoch": 0.5801703290926296, "grad_norm": 0.5157219171524048, "learning_rate": 2.1003399572424913e-05, "loss": 2.2082, "step": 165540 }, { "epoch": 0.5803806119230365, "grad_norm": 0.5245518684387207, "learning_rate": 2.099288543090457e-05, "loss": 2.2327, "step": 165600 }, { "epoch": 0.5805908947534434, "grad_norm": 0.5682709813117981, "learning_rate": 2.0982371289384224e-05, "loss": 2.2124, "step": 165660 }, { "epoch": 0.5808011775838503, "grad_norm": 0.5247425436973572, "learning_rate": 2.0971857147863876e-05, "loss": 2.1882, "step": 165720 }, { "epoch": 0.5810114604142572, "grad_norm": 0.5044289231300354, "learning_rate": 2.0961343006343534e-05, "loss": 2.1964, "step": 165780 }, { "epoch": 0.581221743244664, "grad_norm": 0.5318524837493896, "learning_rate": 2.095082886482319e-05, "loss": 2.2039, "step": 165840 }, { "epoch": 0.5814320260750709, "grad_norm": 0.5390702486038208, "learning_rate": 2.094031472330284e-05, "loss": 2.2166, "step": 165900 }, { "epoch": 0.5816423089054779, "grad_norm": 0.5584826469421387, "learning_rate": 2.09298005817825e-05, "loss": 2.204, "step": 165960 }, { "epoch": 0.5818525917358848, "grad_norm": 0.6024618148803711, "learning_rate": 2.0919286440262152e-05, "loss": 2.1936, "step": 166020 }, { "epoch": 0.5820628745662917, "grad_norm": 0.5568735003471375, "learning_rate": 2.0908772298741807e-05, "loss": 2.1944, "step": 166080 }, { "epoch": 0.5822731573966986, "grad_norm": 0.4855721890926361, "learning_rate": 2.0898258157221466e-05, "loss": 2.1942, "step": 166140 }, { "epoch": 0.5824834402271054, "grad_norm": 0.5432448387145996, "learning_rate": 2.0887744015701118e-05, "loss": 2.217, "step": 166200 }, { "epoch": 0.5826937230575123, "grad_norm": 0.5233843922615051, "learning_rate": 2.0877229874180773e-05, "loss": 2.178, "step": 166260 }, { "epoch": 0.5829040058879192, "grad_norm": 0.5257920026779175, "learning_rate": 2.086671573266043e-05, "loss": 2.2078, "step": 166320 }, { "epoch": 0.5831142887183262, "grad_norm": 0.4958641529083252, "learning_rate": 2.0856201591140084e-05, "loss": 2.2105, "step": 166380 }, { "epoch": 0.5833245715487331, "grad_norm": 0.5342472195625305, "learning_rate": 2.084568744961974e-05, "loss": 2.2074, "step": 166440 }, { "epoch": 0.58353485437914, "grad_norm": 0.5222705006599426, "learning_rate": 2.08353485437914e-05, "loss": 2.2021, "step": 166500 }, { "epoch": 0.5837451372095468, "grad_norm": 0.588127076625824, "learning_rate": 2.082483440227106e-05, "loss": 2.2006, "step": 166560 }, { "epoch": 0.5839554200399537, "grad_norm": 0.5478534698486328, "learning_rate": 2.081432026075071e-05, "loss": 2.1941, "step": 166620 }, { "epoch": 0.5841657028703606, "grad_norm": 0.6026087403297424, "learning_rate": 2.0803806119230365e-05, "loss": 2.1984, "step": 166680 }, { "epoch": 0.5843759857007675, "grad_norm": 0.5317620038986206, "learning_rate": 2.079329197771002e-05, "loss": 2.2086, "step": 166740 }, { "epoch": 0.5845862685311745, "grad_norm": 0.5124843120574951, "learning_rate": 2.0782777836189676e-05, "loss": 2.2168, "step": 166800 }, { "epoch": 0.5847965513615814, "grad_norm": 0.5259480476379395, "learning_rate": 2.077226369466933e-05, "loss": 2.2097, "step": 166860 }, { "epoch": 0.5850068341919882, "grad_norm": 0.5472617149353027, "learning_rate": 2.0761749553148987e-05, "loss": 2.204, "step": 166920 }, { "epoch": 0.5852171170223951, "grad_norm": 0.540069043636322, "learning_rate": 2.0751235411628642e-05, "loss": 2.2079, "step": 166980 }, { "epoch": 0.585427399852802, "grad_norm": 0.5131522417068481, "learning_rate": 2.0740721270108297e-05, "loss": 2.2178, "step": 167040 }, { "epoch": 0.5856376826832089, "grad_norm": 0.5550267100334167, "learning_rate": 2.0730207128587952e-05, "loss": 2.2018, "step": 167100 }, { "epoch": 0.5858479655136158, "grad_norm": 0.5923956036567688, "learning_rate": 2.0719692987067608e-05, "loss": 2.1929, "step": 167160 }, { "epoch": 0.5860582483440228, "grad_norm": 0.541883111000061, "learning_rate": 2.070917884554726e-05, "loss": 2.1886, "step": 167220 }, { "epoch": 0.5862685311744296, "grad_norm": 0.5703868865966797, "learning_rate": 2.0698664704026918e-05, "loss": 2.2095, "step": 167280 }, { "epoch": 0.5864788140048365, "grad_norm": 0.5646779537200928, "learning_rate": 2.0688150562506574e-05, "loss": 2.2195, "step": 167340 }, { "epoch": 0.5866890968352434, "grad_norm": 0.5657762289047241, "learning_rate": 2.0677636420986225e-05, "loss": 2.2057, "step": 167400 }, { "epoch": 0.5868993796656503, "grad_norm": 0.5300728678703308, "learning_rate": 2.0667122279465884e-05, "loss": 2.2111, "step": 167460 }, { "epoch": 0.5871096624960572, "grad_norm": 0.5432790517807007, "learning_rate": 2.065660813794554e-05, "loss": 2.199, "step": 167520 }, { "epoch": 0.5873199453264641, "grad_norm": 0.5578957796096802, "learning_rate": 2.064609399642519e-05, "loss": 2.1944, "step": 167580 }, { "epoch": 0.587530228156871, "grad_norm": 0.6584115624427795, "learning_rate": 2.063557985490485e-05, "loss": 2.2217, "step": 167640 }, { "epoch": 0.5877405109872779, "grad_norm": 0.5259327292442322, "learning_rate": 2.0625065713384502e-05, "loss": 2.2109, "step": 167700 }, { "epoch": 0.5879507938176848, "grad_norm": 0.5304680466651917, "learning_rate": 2.0614551571864157e-05, "loss": 2.1984, "step": 167760 }, { "epoch": 0.5881610766480917, "grad_norm": 0.515880823135376, "learning_rate": 2.0604037430343816e-05, "loss": 2.2042, "step": 167820 }, { "epoch": 0.5883713594784986, "grad_norm": 0.6544479131698608, "learning_rate": 2.0593523288823468e-05, "loss": 2.2056, "step": 167880 }, { "epoch": 0.5885816423089055, "grad_norm": 0.5079035758972168, "learning_rate": 2.0583009147303123e-05, "loss": 2.2061, "step": 167940 }, { "epoch": 0.5887919251393123, "grad_norm": 0.5269632935523987, "learning_rate": 2.0572495005782778e-05, "loss": 2.2078, "step": 168000 }, { "epoch": 0.5890022079697192, "grad_norm": 0.6275792121887207, "learning_rate": 2.0561980864262434e-05, "loss": 2.2012, "step": 168060 }, { "epoch": 0.5892124908001262, "grad_norm": 0.5413856506347656, "learning_rate": 2.055146672274209e-05, "loss": 2.1943, "step": 168120 }, { "epoch": 0.5894227736305331, "grad_norm": 0.5271443724632263, "learning_rate": 2.0540952581221744e-05, "loss": 2.1954, "step": 168180 }, { "epoch": 0.58963305646094, "grad_norm": 0.5837568044662476, "learning_rate": 2.0530613675393408e-05, "loss": 2.2011, "step": 168240 }, { "epoch": 0.5898433392913469, "grad_norm": 0.5604709386825562, "learning_rate": 2.052009953387306e-05, "loss": 2.1948, "step": 168300 }, { "epoch": 0.5900536221217537, "grad_norm": 0.5001662969589233, "learning_rate": 2.0509585392352715e-05, "loss": 2.2032, "step": 168360 }, { "epoch": 0.5902639049521606, "grad_norm": 0.5290040969848633, "learning_rate": 2.049907125083237e-05, "loss": 2.2068, "step": 168420 }, { "epoch": 0.5904741877825676, "grad_norm": 0.5256708264350891, "learning_rate": 2.0488557109312026e-05, "loss": 2.2006, "step": 168480 }, { "epoch": 0.5906844706129745, "grad_norm": 0.5151348114013672, "learning_rate": 2.047804296779168e-05, "loss": 2.18, "step": 168540 }, { "epoch": 0.5908947534433814, "grad_norm": 0.555904746055603, "learning_rate": 2.0467528826271336e-05, "loss": 2.2069, "step": 168600 }, { "epoch": 0.5911050362737883, "grad_norm": 0.5817997455596924, "learning_rate": 2.045701468475099e-05, "loss": 2.1841, "step": 168660 }, { "epoch": 0.5913153191041951, "grad_norm": 0.6211709976196289, "learning_rate": 2.0446500543230647e-05, "loss": 2.1905, "step": 168720 }, { "epoch": 0.591525601934602, "grad_norm": 0.5097838640213013, "learning_rate": 2.0435986401710302e-05, "loss": 2.2004, "step": 168780 }, { "epoch": 0.5917358847650089, "grad_norm": 0.5386205315589905, "learning_rate": 2.0425472260189957e-05, "loss": 2.1964, "step": 168840 }, { "epoch": 0.5919461675954159, "grad_norm": 0.5313547849655151, "learning_rate": 2.041495811866961e-05, "loss": 2.1926, "step": 168900 }, { "epoch": 0.5921564504258228, "grad_norm": 0.5706102848052979, "learning_rate": 2.0404443977149268e-05, "loss": 2.2198, "step": 168960 }, { "epoch": 0.5923667332562297, "grad_norm": 0.5210338234901428, "learning_rate": 2.0393929835628923e-05, "loss": 2.2126, "step": 169020 }, { "epoch": 0.5925770160866365, "grad_norm": 0.6056246161460876, "learning_rate": 2.0383415694108575e-05, "loss": 2.185, "step": 169080 }, { "epoch": 0.5927872989170434, "grad_norm": 0.5859523415565491, "learning_rate": 2.0372901552588234e-05, "loss": 2.2102, "step": 169140 }, { "epoch": 0.5929975817474503, "grad_norm": 0.5275964736938477, "learning_rate": 2.036238741106789e-05, "loss": 2.209, "step": 169200 }, { "epoch": 0.5932078645778572, "grad_norm": 0.5471135973930359, "learning_rate": 2.035187326954754e-05, "loss": 2.1826, "step": 169260 }, { "epoch": 0.5934181474082642, "grad_norm": 0.5576337575912476, "learning_rate": 2.03413591280272e-05, "loss": 2.1961, "step": 169320 }, { "epoch": 0.5936284302386711, "grad_norm": 0.5606521368026733, "learning_rate": 2.033084498650685e-05, "loss": 2.2022, "step": 169380 }, { "epoch": 0.5938387130690779, "grad_norm": 0.5469135642051697, "learning_rate": 2.0320330844986507e-05, "loss": 2.2126, "step": 169440 }, { "epoch": 0.5940489958994848, "grad_norm": 0.5704513788223267, "learning_rate": 2.0309816703466165e-05, "loss": 2.1926, "step": 169500 }, { "epoch": 0.5942592787298917, "grad_norm": 0.5261359214782715, "learning_rate": 2.0299302561945817e-05, "loss": 2.2098, "step": 169560 }, { "epoch": 0.5944695615602986, "grad_norm": 0.5451574921607971, "learning_rate": 2.0288788420425473e-05, "loss": 2.1908, "step": 169620 }, { "epoch": 0.5946798443907055, "grad_norm": 0.599793553352356, "learning_rate": 2.0278274278905128e-05, "loss": 2.2151, "step": 169680 }, { "epoch": 0.5948901272211123, "grad_norm": 0.5122420191764832, "learning_rate": 2.0267760137384783e-05, "loss": 2.1889, "step": 169740 }, { "epoch": 0.5951004100515193, "grad_norm": 0.49233028292655945, "learning_rate": 2.025724599586444e-05, "loss": 2.1872, "step": 169800 }, { "epoch": 0.5953106928819262, "grad_norm": 0.553433358669281, "learning_rate": 2.0246731854344094e-05, "loss": 2.2133, "step": 169860 }, { "epoch": 0.5955209757123331, "grad_norm": 0.6067487001419067, "learning_rate": 2.023621771282375e-05, "loss": 2.2073, "step": 169920 }, { "epoch": 0.59573125854274, "grad_norm": 0.5803714394569397, "learning_rate": 2.0225703571303404e-05, "loss": 2.2029, "step": 169980 }, { "epoch": 0.5959415413731469, "grad_norm": 0.5488730669021606, "learning_rate": 2.021518942978306e-05, "loss": 2.2062, "step": 170040 }, { "epoch": 0.5961518242035537, "grad_norm": 0.5398715734481812, "learning_rate": 2.0204675288262715e-05, "loss": 2.2067, "step": 170100 }, { "epoch": 0.5963621070339606, "grad_norm": 0.5281250476837158, "learning_rate": 2.019416114674237e-05, "loss": 2.192, "step": 170160 }, { "epoch": 0.5965723898643676, "grad_norm": 0.53722083568573, "learning_rate": 2.0183647005222025e-05, "loss": 2.2, "step": 170220 }, { "epoch": 0.5967826726947745, "grad_norm": 0.5472487807273865, "learning_rate": 2.017313286370168e-05, "loss": 2.1978, "step": 170280 }, { "epoch": 0.5969929555251814, "grad_norm": 0.6086287498474121, "learning_rate": 2.0162618722181333e-05, "loss": 2.1849, "step": 170340 }, { "epoch": 0.5972032383555883, "grad_norm": 0.655676543712616, "learning_rate": 2.015210458066099e-05, "loss": 2.2063, "step": 170400 }, { "epoch": 0.5974135211859951, "grad_norm": 0.5415231585502625, "learning_rate": 2.0141590439140647e-05, "loss": 2.185, "step": 170460 }, { "epoch": 0.597623804016402, "grad_norm": 0.5675277709960938, "learning_rate": 2.01310762976203e-05, "loss": 2.2077, "step": 170520 }, { "epoch": 0.5978340868468089, "grad_norm": 0.5531576871871948, "learning_rate": 2.0120562156099957e-05, "loss": 2.1948, "step": 170580 }, { "epoch": 0.5980443696772159, "grad_norm": 0.5377048850059509, "learning_rate": 2.011004801457961e-05, "loss": 2.1867, "step": 170640 }, { "epoch": 0.5982546525076228, "grad_norm": 0.47934389114379883, "learning_rate": 2.0099533873059264e-05, "loss": 2.2068, "step": 170700 }, { "epoch": 0.5984649353380297, "grad_norm": 0.576343297958374, "learning_rate": 2.0089019731538923e-05, "loss": 2.1905, "step": 170760 }, { "epoch": 0.5986752181684365, "grad_norm": 0.5875000953674316, "learning_rate": 2.0078505590018575e-05, "loss": 2.2133, "step": 170820 }, { "epoch": 0.5988855009988434, "grad_norm": 0.5581299066543579, "learning_rate": 2.006799144849823e-05, "loss": 2.2149, "step": 170880 }, { "epoch": 0.5990957838292503, "grad_norm": 0.5211154818534851, "learning_rate": 2.0057477306977885e-05, "loss": 2.2056, "step": 170940 }, { "epoch": 0.5993060666596572, "grad_norm": 0.6828522682189941, "learning_rate": 2.004696316545754e-05, "loss": 2.2017, "step": 171000 }, { "epoch": 0.5995163494900642, "grad_norm": 0.6186325550079346, "learning_rate": 2.0036449023937196e-05, "loss": 2.2047, "step": 171060 }, { "epoch": 0.5997266323204711, "grad_norm": 0.5010774731636047, "learning_rate": 2.002593488241685e-05, "loss": 2.2054, "step": 171120 }, { "epoch": 0.5999369151508779, "grad_norm": 0.5373931527137756, "learning_rate": 2.0015420740896507e-05, "loss": 2.2007, "step": 171180 }, { "epoch": 0.6001471979812848, "grad_norm": 0.6149768829345703, "learning_rate": 2.0004906599376162e-05, "loss": 2.1917, "step": 171240 }, { "epoch": 0.6003574808116917, "grad_norm": 0.5230481028556824, "learning_rate": 1.9994392457855817e-05, "loss": 2.1969, "step": 171300 }, { "epoch": 0.6005677636420986, "grad_norm": 0.6458109021186829, "learning_rate": 1.9983878316335472e-05, "loss": 2.1982, "step": 171360 }, { "epoch": 0.6007780464725055, "grad_norm": 0.5456601977348328, "learning_rate": 1.9973364174815128e-05, "loss": 2.1954, "step": 171420 }, { "epoch": 0.6009883293029125, "grad_norm": 0.534690260887146, "learning_rate": 1.9962850033294783e-05, "loss": 2.2124, "step": 171480 }, { "epoch": 0.6011986121333193, "grad_norm": 0.5408187508583069, "learning_rate": 1.9952335891774438e-05, "loss": 2.2105, "step": 171540 }, { "epoch": 0.6014088949637262, "grad_norm": 0.5347569584846497, "learning_rate": 1.994182175025409e-05, "loss": 2.2046, "step": 171600 }, { "epoch": 0.6016191777941331, "grad_norm": 0.5162372589111328, "learning_rate": 1.993130760873375e-05, "loss": 2.1956, "step": 171660 }, { "epoch": 0.60182946062454, "grad_norm": 0.570464551448822, "learning_rate": 1.9920793467213404e-05, "loss": 2.1895, "step": 171720 }, { "epoch": 0.6020397434549469, "grad_norm": 0.5477625727653503, "learning_rate": 1.9910279325693056e-05, "loss": 2.1948, "step": 171780 }, { "epoch": 0.6022500262853538, "grad_norm": 0.5018092393875122, "learning_rate": 1.9899765184172715e-05, "loss": 2.2074, "step": 171840 }, { "epoch": 0.6024603091157607, "grad_norm": 0.4974982738494873, "learning_rate": 1.9889251042652367e-05, "loss": 2.2148, "step": 171900 }, { "epoch": 0.6026705919461676, "grad_norm": 0.5513793230056763, "learning_rate": 1.9878736901132022e-05, "loss": 2.2062, "step": 171960 }, { "epoch": 0.6028808747765745, "grad_norm": 0.5236059427261353, "learning_rate": 1.986822275961168e-05, "loss": 2.1883, "step": 172020 }, { "epoch": 0.6030911576069814, "grad_norm": 0.5051701664924622, "learning_rate": 1.9857708618091332e-05, "loss": 2.2044, "step": 172080 }, { "epoch": 0.6033014404373883, "grad_norm": 0.5532500147819519, "learning_rate": 1.9847194476570988e-05, "loss": 2.1961, "step": 172140 }, { "epoch": 0.6035117232677952, "grad_norm": 0.5269227623939514, "learning_rate": 1.9836855570742648e-05, "loss": 2.1847, "step": 172200 }, { "epoch": 0.603722006098202, "grad_norm": 0.5466260313987732, "learning_rate": 1.9826341429222307e-05, "loss": 2.2018, "step": 172260 }, { "epoch": 0.603932288928609, "grad_norm": 0.5222917795181274, "learning_rate": 1.981582728770196e-05, "loss": 2.2044, "step": 172320 }, { "epoch": 0.6041425717590159, "grad_norm": 0.5396358370780945, "learning_rate": 1.9805313146181614e-05, "loss": 2.1967, "step": 172380 }, { "epoch": 0.6043528545894228, "grad_norm": 0.5147253274917603, "learning_rate": 1.9794799004661273e-05, "loss": 2.198, "step": 172440 }, { "epoch": 0.6045631374198297, "grad_norm": 0.5578526258468628, "learning_rate": 1.9784284863140925e-05, "loss": 2.2013, "step": 172500 }, { "epoch": 0.6047734202502366, "grad_norm": 0.5797805786132812, "learning_rate": 1.977377072162058e-05, "loss": 2.1967, "step": 172560 }, { "epoch": 0.6049837030806434, "grad_norm": 0.6064589023590088, "learning_rate": 1.9763256580100235e-05, "loss": 2.2014, "step": 172620 }, { "epoch": 0.6051939859110503, "grad_norm": 0.5353837609291077, "learning_rate": 1.975274243857989e-05, "loss": 2.2, "step": 172680 }, { "epoch": 0.6054042687414573, "grad_norm": 0.5772452354431152, "learning_rate": 1.9742228297059546e-05, "loss": 2.1983, "step": 172740 }, { "epoch": 0.6056145515718642, "grad_norm": 0.5389087200164795, "learning_rate": 1.97317141555392e-05, "loss": 2.1982, "step": 172800 }, { "epoch": 0.6058248344022711, "grad_norm": 0.5934854745864868, "learning_rate": 1.9721200014018856e-05, "loss": 2.206, "step": 172860 }, { "epoch": 0.606035117232678, "grad_norm": 0.5362783670425415, "learning_rate": 1.971068587249851e-05, "loss": 2.1964, "step": 172920 }, { "epoch": 0.6062454000630848, "grad_norm": 0.5497432947158813, "learning_rate": 1.9700171730978167e-05, "loss": 2.2167, "step": 172980 }, { "epoch": 0.6064556828934917, "grad_norm": 0.5316357016563416, "learning_rate": 1.9689657589457822e-05, "loss": 2.1987, "step": 173040 }, { "epoch": 0.6066659657238986, "grad_norm": 0.5472717881202698, "learning_rate": 1.9679143447937477e-05, "loss": 2.2006, "step": 173100 }, { "epoch": 0.6068762485543056, "grad_norm": 0.564258337020874, "learning_rate": 1.9668629306417133e-05, "loss": 2.1874, "step": 173160 }, { "epoch": 0.6070865313847125, "grad_norm": 0.5468765497207642, "learning_rate": 1.9658115164896788e-05, "loss": 2.2048, "step": 173220 }, { "epoch": 0.6072968142151194, "grad_norm": 0.5391786694526672, "learning_rate": 1.964760102337644e-05, "loss": 2.2042, "step": 173280 }, { "epoch": 0.6075070970455262, "grad_norm": 0.5502793788909912, "learning_rate": 1.96370868818561e-05, "loss": 2.2096, "step": 173340 }, { "epoch": 0.6077173798759331, "grad_norm": 0.49307966232299805, "learning_rate": 1.9626572740335754e-05, "loss": 2.2055, "step": 173400 }, { "epoch": 0.60792766270634, "grad_norm": 0.5799115896224976, "learning_rate": 1.9616058598815406e-05, "loss": 2.1992, "step": 173460 }, { "epoch": 0.6081379455367469, "grad_norm": 0.554755449295044, "learning_rate": 1.9605544457295064e-05, "loss": 2.1998, "step": 173520 }, { "epoch": 0.6083482283671539, "grad_norm": 0.5916181802749634, "learning_rate": 1.9595030315774716e-05, "loss": 2.2041, "step": 173580 }, { "epoch": 0.6085585111975608, "grad_norm": 0.5109410285949707, "learning_rate": 1.958451617425437e-05, "loss": 2.1912, "step": 173640 }, { "epoch": 0.6087687940279676, "grad_norm": 0.6058209538459778, "learning_rate": 1.957400203273403e-05, "loss": 2.2032, "step": 173700 }, { "epoch": 0.6089790768583745, "grad_norm": 0.5700769424438477, "learning_rate": 1.9563487891213682e-05, "loss": 2.2047, "step": 173760 }, { "epoch": 0.6091893596887814, "grad_norm": 0.5366501212120056, "learning_rate": 1.9552973749693337e-05, "loss": 2.2086, "step": 173820 }, { "epoch": 0.6093996425191883, "grad_norm": 0.530959963798523, "learning_rate": 1.9542459608172996e-05, "loss": 2.194, "step": 173880 }, { "epoch": 0.6096099253495952, "grad_norm": 0.5777100324630737, "learning_rate": 1.9531945466652648e-05, "loss": 2.187, "step": 173940 }, { "epoch": 0.6098202081800022, "grad_norm": 0.5903493762016296, "learning_rate": 1.9521431325132303e-05, "loss": 2.1917, "step": 174000 }, { "epoch": 0.610030491010409, "grad_norm": 0.5649183988571167, "learning_rate": 1.951091718361196e-05, "loss": 2.2008, "step": 174060 }, { "epoch": 0.6102407738408159, "grad_norm": 0.5999853610992432, "learning_rate": 1.9500578277783622e-05, "loss": 2.1982, "step": 174120 }, { "epoch": 0.6104510566712228, "grad_norm": 0.5888971090316772, "learning_rate": 1.9490064136263274e-05, "loss": 2.1858, "step": 174180 }, { "epoch": 0.6106613395016297, "grad_norm": 0.5282642841339111, "learning_rate": 1.947954999474293e-05, "loss": 2.2078, "step": 174240 }, { "epoch": 0.6108716223320366, "grad_norm": 0.5580457448959351, "learning_rate": 1.9469035853222585e-05, "loss": 2.2056, "step": 174300 }, { "epoch": 0.6110819051624435, "grad_norm": 0.5449835658073425, "learning_rate": 1.945852171170224e-05, "loss": 2.1955, "step": 174360 }, { "epoch": 0.6112921879928503, "grad_norm": 0.575751543045044, "learning_rate": 1.9448007570181895e-05, "loss": 2.2093, "step": 174420 }, { "epoch": 0.6115024708232573, "grad_norm": 0.5661749243736267, "learning_rate": 1.943749342866155e-05, "loss": 2.1966, "step": 174480 }, { "epoch": 0.6117127536536642, "grad_norm": 0.5168628692626953, "learning_rate": 1.9426979287141206e-05, "loss": 2.1901, "step": 174540 }, { "epoch": 0.6119230364840711, "grad_norm": 0.5125284194946289, "learning_rate": 1.941646514562086e-05, "loss": 2.1967, "step": 174600 }, { "epoch": 0.612133319314478, "grad_norm": 0.5725422501564026, "learning_rate": 1.9405951004100516e-05, "loss": 2.1878, "step": 174660 }, { "epoch": 0.6123436021448849, "grad_norm": 0.5970425009727478, "learning_rate": 1.9395436862580172e-05, "loss": 2.2048, "step": 174720 }, { "epoch": 0.6125538849752917, "grad_norm": 0.6201969385147095, "learning_rate": 1.9384922721059827e-05, "loss": 2.2, "step": 174780 }, { "epoch": 0.6127641678056986, "grad_norm": 0.5165169835090637, "learning_rate": 1.9374408579539482e-05, "loss": 2.2012, "step": 174840 }, { "epoch": 0.6129744506361056, "grad_norm": 0.5577533841133118, "learning_rate": 1.9363894438019138e-05, "loss": 2.1943, "step": 174900 }, { "epoch": 0.6131847334665125, "grad_norm": 0.5277051329612732, "learning_rate": 1.935338029649879e-05, "loss": 2.2078, "step": 174960 }, { "epoch": 0.6133950162969194, "grad_norm": 0.5218917727470398, "learning_rate": 1.9342866154978448e-05, "loss": 2.2117, "step": 175020 }, { "epoch": 0.6136052991273263, "grad_norm": 0.5269528031349182, "learning_rate": 1.9332352013458103e-05, "loss": 2.1972, "step": 175080 }, { "epoch": 0.6138155819577331, "grad_norm": 0.6118609309196472, "learning_rate": 1.9321837871937755e-05, "loss": 2.2075, "step": 175140 }, { "epoch": 0.61402586478814, "grad_norm": 0.545382559299469, "learning_rate": 1.9311323730417414e-05, "loss": 2.1901, "step": 175200 }, { "epoch": 0.614236147618547, "grad_norm": 0.6513542532920837, "learning_rate": 1.9300809588897066e-05, "loss": 2.1867, "step": 175260 }, { "epoch": 0.6144464304489539, "grad_norm": 0.5605107545852661, "learning_rate": 1.929029544737672e-05, "loss": 2.1884, "step": 175320 }, { "epoch": 0.6146567132793608, "grad_norm": 0.6436792612075806, "learning_rate": 1.927978130585638e-05, "loss": 2.1898, "step": 175380 }, { "epoch": 0.6148669961097676, "grad_norm": 0.5533410310745239, "learning_rate": 1.9269267164336032e-05, "loss": 2.2032, "step": 175440 }, { "epoch": 0.6150772789401745, "grad_norm": 0.501750111579895, "learning_rate": 1.9258753022815687e-05, "loss": 2.2081, "step": 175500 }, { "epoch": 0.6152875617705814, "grad_norm": 0.498298317193985, "learning_rate": 1.9248238881295346e-05, "loss": 2.2056, "step": 175560 }, { "epoch": 0.6154978446009883, "grad_norm": 0.5598739981651306, "learning_rate": 1.9237724739774998e-05, "loss": 2.179, "step": 175620 }, { "epoch": 0.6157081274313952, "grad_norm": 0.5532486438751221, "learning_rate": 1.9227210598254653e-05, "loss": 2.1799, "step": 175680 }, { "epoch": 0.6159184102618022, "grad_norm": 0.5259189009666443, "learning_rate": 1.9216696456734308e-05, "loss": 2.189, "step": 175740 }, { "epoch": 0.616128693092209, "grad_norm": 0.5915912389755249, "learning_rate": 1.9206182315213963e-05, "loss": 2.1899, "step": 175800 }, { "epoch": 0.6163389759226159, "grad_norm": 0.511455774307251, "learning_rate": 1.919566817369362e-05, "loss": 2.2011, "step": 175860 }, { "epoch": 0.6165492587530228, "grad_norm": 0.5347529649734497, "learning_rate": 1.9185154032173274e-05, "loss": 2.2029, "step": 175920 }, { "epoch": 0.6167595415834297, "grad_norm": 0.5264248251914978, "learning_rate": 1.917463989065293e-05, "loss": 2.1933, "step": 175980 }, { "epoch": 0.6169698244138366, "grad_norm": 0.5930898189544678, "learning_rate": 1.9164125749132585e-05, "loss": 2.1939, "step": 176040 }, { "epoch": 0.6171801072442435, "grad_norm": 0.6320661902427673, "learning_rate": 1.915361160761224e-05, "loss": 2.1923, "step": 176100 }, { "epoch": 0.6173903900746504, "grad_norm": 0.5503607988357544, "learning_rate": 1.9143097466091895e-05, "loss": 2.1956, "step": 176160 }, { "epoch": 0.6176006729050573, "grad_norm": 0.5331404209136963, "learning_rate": 1.9132583324571547e-05, "loss": 2.2246, "step": 176220 }, { "epoch": 0.6178109557354642, "grad_norm": 0.5129519701004028, "learning_rate": 1.9122069183051206e-05, "loss": 2.1986, "step": 176280 }, { "epoch": 0.6180212385658711, "grad_norm": 0.5717986226081848, "learning_rate": 1.911155504153086e-05, "loss": 2.1992, "step": 176340 }, { "epoch": 0.618231521396278, "grad_norm": 0.5437119603157043, "learning_rate": 1.9101040900010513e-05, "loss": 2.1926, "step": 176400 }, { "epoch": 0.6184418042266849, "grad_norm": 0.5349199175834656, "learning_rate": 1.909052675849017e-05, "loss": 2.2062, "step": 176460 }, { "epoch": 0.6186520870570917, "grad_norm": 0.5290020704269409, "learning_rate": 1.9080012616969827e-05, "loss": 2.2002, "step": 176520 }, { "epoch": 0.6188623698874987, "grad_norm": 0.5506412386894226, "learning_rate": 1.906949847544948e-05, "loss": 2.1958, "step": 176580 }, { "epoch": 0.6190726527179056, "grad_norm": 0.5792592167854309, "learning_rate": 1.9058984333929137e-05, "loss": 2.2002, "step": 176640 }, { "epoch": 0.6192829355483125, "grad_norm": 0.5670069456100464, "learning_rate": 1.904847019240879e-05, "loss": 2.1975, "step": 176700 }, { "epoch": 0.6194932183787194, "grad_norm": 0.6118037700653076, "learning_rate": 1.9037956050888445e-05, "loss": 2.186, "step": 176760 }, { "epoch": 0.6197035012091263, "grad_norm": 0.5524449348449707, "learning_rate": 1.9027441909368103e-05, "loss": 2.197, "step": 176820 }, { "epoch": 0.6199137840395331, "grad_norm": 0.5148321986198425, "learning_rate": 1.9016927767847755e-05, "loss": 2.1969, "step": 176880 }, { "epoch": 0.62012406686994, "grad_norm": 0.5635314583778381, "learning_rate": 1.900641362632741e-05, "loss": 2.2055, "step": 176940 }, { "epoch": 0.620334349700347, "grad_norm": 0.5319798588752747, "learning_rate": 1.8995899484807066e-05, "loss": 2.1948, "step": 177000 }, { "epoch": 0.6205446325307539, "grad_norm": 0.6794459819793701, "learning_rate": 1.898538534328672e-05, "loss": 2.2103, "step": 177060 }, { "epoch": 0.6207549153611608, "grad_norm": 0.6157546639442444, "learning_rate": 1.8974871201766376e-05, "loss": 2.2019, "step": 177120 }, { "epoch": 0.6209651981915677, "grad_norm": 0.5367477536201477, "learning_rate": 1.896435706024603e-05, "loss": 2.1928, "step": 177180 }, { "epoch": 0.6211754810219745, "grad_norm": 0.5531861782073975, "learning_rate": 1.8953842918725687e-05, "loss": 2.2016, "step": 177240 }, { "epoch": 0.6213857638523814, "grad_norm": 0.5841910243034363, "learning_rate": 1.8943328777205342e-05, "loss": 2.1906, "step": 177300 }, { "epoch": 0.6215960466827883, "grad_norm": 0.5331421494483948, "learning_rate": 1.8932814635684997e-05, "loss": 2.2002, "step": 177360 }, { "epoch": 0.6218063295131953, "grad_norm": 0.5444777011871338, "learning_rate": 1.8922300494164653e-05, "loss": 2.1937, "step": 177420 }, { "epoch": 0.6220166123436022, "grad_norm": 0.5303032994270325, "learning_rate": 1.8911786352644308e-05, "loss": 2.2036, "step": 177480 }, { "epoch": 0.6222268951740091, "grad_norm": 0.5160943865776062, "learning_rate": 1.890144744681597e-05, "loss": 2.1968, "step": 177540 }, { "epoch": 0.6224371780044159, "grad_norm": 0.6058926582336426, "learning_rate": 1.8890933305295624e-05, "loss": 2.1976, "step": 177600 }, { "epoch": 0.6226474608348228, "grad_norm": 0.6021593809127808, "learning_rate": 1.888041916377528e-05, "loss": 2.1935, "step": 177660 }, { "epoch": 0.6228577436652297, "grad_norm": 0.5436554551124573, "learning_rate": 1.8869905022254934e-05, "loss": 2.2202, "step": 177720 }, { "epoch": 0.6230680264956366, "grad_norm": 0.527804970741272, "learning_rate": 1.885939088073459e-05, "loss": 2.1995, "step": 177780 }, { "epoch": 0.6232783093260436, "grad_norm": 0.6060287952423096, "learning_rate": 1.8848876739214245e-05, "loss": 2.2054, "step": 177840 }, { "epoch": 0.6234885921564505, "grad_norm": 0.5040077567100525, "learning_rate": 1.8838362597693897e-05, "loss": 2.1946, "step": 177900 }, { "epoch": 0.6236988749868573, "grad_norm": 0.5401580929756165, "learning_rate": 1.8827848456173555e-05, "loss": 2.1775, "step": 177960 }, { "epoch": 0.6239091578172642, "grad_norm": 0.6010558009147644, "learning_rate": 1.881733431465321e-05, "loss": 2.1757, "step": 178020 }, { "epoch": 0.6241194406476711, "grad_norm": 0.6008610725402832, "learning_rate": 1.8806820173132863e-05, "loss": 2.1986, "step": 178080 }, { "epoch": 0.624329723478078, "grad_norm": 0.6357063055038452, "learning_rate": 1.879630603161252e-05, "loss": 2.1998, "step": 178140 }, { "epoch": 0.6245400063084849, "grad_norm": 0.5567299127578735, "learning_rate": 1.8785791890092176e-05, "loss": 2.1938, "step": 178200 }, { "epoch": 0.6247502891388919, "grad_norm": 0.5733098983764648, "learning_rate": 1.877527774857183e-05, "loss": 2.2024, "step": 178260 }, { "epoch": 0.6249605719692987, "grad_norm": 0.6415252685546875, "learning_rate": 1.8764763607051487e-05, "loss": 2.2091, "step": 178320 }, { "epoch": 0.6251708547997056, "grad_norm": 0.5701323747634888, "learning_rate": 1.875424946553114e-05, "loss": 2.2194, "step": 178380 }, { "epoch": 0.6253811376301125, "grad_norm": 0.5979422330856323, "learning_rate": 1.8743735324010794e-05, "loss": 2.2182, "step": 178440 }, { "epoch": 0.6255914204605194, "grad_norm": 0.5337743759155273, "learning_rate": 1.8733221182490453e-05, "loss": 2.2097, "step": 178500 }, { "epoch": 0.6258017032909263, "grad_norm": 0.5219767689704895, "learning_rate": 1.8722707040970105e-05, "loss": 2.2057, "step": 178560 }, { "epoch": 0.6260119861213332, "grad_norm": 0.5585417747497559, "learning_rate": 1.871219289944976e-05, "loss": 2.1904, "step": 178620 }, { "epoch": 0.62622226895174, "grad_norm": 0.5472621321678162, "learning_rate": 1.8701678757929415e-05, "loss": 2.1978, "step": 178680 }, { "epoch": 0.626432551782147, "grad_norm": 0.5493637919425964, "learning_rate": 1.869116461640907e-05, "loss": 2.2048, "step": 178740 }, { "epoch": 0.6266428346125539, "grad_norm": 0.5349371433258057, "learning_rate": 1.8680650474888726e-05, "loss": 2.1892, "step": 178800 }, { "epoch": 0.6268531174429608, "grad_norm": 0.5108512043952942, "learning_rate": 1.867013633336838e-05, "loss": 2.2086, "step": 178860 }, { "epoch": 0.6270634002733677, "grad_norm": 0.5267765522003174, "learning_rate": 1.8659622191848036e-05, "loss": 2.1787, "step": 178920 }, { "epoch": 0.6272736831037746, "grad_norm": 0.5868079662322998, "learning_rate": 1.8649108050327692e-05, "loss": 2.2027, "step": 178980 }, { "epoch": 0.6274839659341814, "grad_norm": 0.5836610794067383, "learning_rate": 1.8638593908807347e-05, "loss": 2.1919, "step": 179040 }, { "epoch": 0.6276942487645883, "grad_norm": 0.5579856038093567, "learning_rate": 1.8628079767287002e-05, "loss": 2.1972, "step": 179100 }, { "epoch": 0.6279045315949953, "grad_norm": 0.5386043190956116, "learning_rate": 1.8617565625766658e-05, "loss": 2.1811, "step": 179160 }, { "epoch": 0.6281148144254022, "grad_norm": 0.5331193208694458, "learning_rate": 1.8607051484246313e-05, "loss": 2.189, "step": 179220 }, { "epoch": 0.6283250972558091, "grad_norm": 0.546983540058136, "learning_rate": 1.8596537342725968e-05, "loss": 2.1948, "step": 179280 }, { "epoch": 0.628535380086216, "grad_norm": 0.531303882598877, "learning_rate": 1.858602320120562e-05, "loss": 2.1898, "step": 179340 }, { "epoch": 0.6287456629166228, "grad_norm": 0.5148618221282959, "learning_rate": 1.857550905968528e-05, "loss": 2.187, "step": 179400 }, { "epoch": 0.6289559457470297, "grad_norm": 0.5330983996391296, "learning_rate": 1.8564994918164934e-05, "loss": 2.1847, "step": 179460 }, { "epoch": 0.6291662285774366, "grad_norm": 0.5286121368408203, "learning_rate": 1.8554480776644586e-05, "loss": 2.2227, "step": 179520 }, { "epoch": 0.6293765114078436, "grad_norm": 0.5923421382904053, "learning_rate": 1.8543966635124245e-05, "loss": 2.1934, "step": 179580 }, { "epoch": 0.6295867942382505, "grad_norm": 0.5364999175071716, "learning_rate": 1.8533452493603896e-05, "loss": 2.176, "step": 179640 }, { "epoch": 0.6297970770686574, "grad_norm": 0.5409441590309143, "learning_rate": 1.852293835208355e-05, "loss": 2.2059, "step": 179700 }, { "epoch": 0.6300073598990642, "grad_norm": 0.5342493653297424, "learning_rate": 1.851242421056321e-05, "loss": 2.1979, "step": 179760 }, { "epoch": 0.6302176427294711, "grad_norm": 0.5905653834342957, "learning_rate": 1.8501910069042862e-05, "loss": 2.1909, "step": 179820 }, { "epoch": 0.630427925559878, "grad_norm": 0.5535312294960022, "learning_rate": 1.8491395927522518e-05, "loss": 2.1869, "step": 179880 }, { "epoch": 0.630638208390285, "grad_norm": 0.5361486077308655, "learning_rate": 1.8480881786002173e-05, "loss": 2.1917, "step": 179940 }, { "epoch": 0.6308484912206919, "grad_norm": 0.5434253215789795, "learning_rate": 1.8470367644481828e-05, "loss": 2.1744, "step": 180000 }, { "epoch": 0.6310587740510988, "grad_norm": 0.6362529993057251, "learning_rate": 1.8459853502961483e-05, "loss": 2.1978, "step": 180060 }, { "epoch": 0.6312690568815056, "grad_norm": 0.5024562478065491, "learning_rate": 1.844933936144114e-05, "loss": 2.1903, "step": 180120 }, { "epoch": 0.6314793397119125, "grad_norm": 0.5112056732177734, "learning_rate": 1.8438825219920794e-05, "loss": 2.2001, "step": 180180 }, { "epoch": 0.6316896225423194, "grad_norm": 0.5255155563354492, "learning_rate": 1.842831107840045e-05, "loss": 2.1856, "step": 180240 }, { "epoch": 0.6318999053727263, "grad_norm": 0.5178373456001282, "learning_rate": 1.8417796936880104e-05, "loss": 2.1976, "step": 180300 }, { "epoch": 0.6321101882031332, "grad_norm": 0.5733485221862793, "learning_rate": 1.840728279535976e-05, "loss": 2.1893, "step": 180360 }, { "epoch": 0.6323204710335402, "grad_norm": 0.6175151467323303, "learning_rate": 1.8396768653839415e-05, "loss": 2.1902, "step": 180420 }, { "epoch": 0.632530753863947, "grad_norm": 0.5645368099212646, "learning_rate": 1.838625451231907e-05, "loss": 2.1836, "step": 180480 }, { "epoch": 0.6327410366943539, "grad_norm": 0.5591124296188354, "learning_rate": 1.8375740370798726e-05, "loss": 2.1984, "step": 180540 }, { "epoch": 0.6329513195247608, "grad_norm": 0.5352583527565002, "learning_rate": 1.836522622927838e-05, "loss": 2.1962, "step": 180600 }, { "epoch": 0.6331616023551677, "grad_norm": 0.6056286096572876, "learning_rate": 1.8354712087758036e-05, "loss": 2.1884, "step": 180660 }, { "epoch": 0.6333718851855746, "grad_norm": 0.7630537748336792, "learning_rate": 1.834419794623769e-05, "loss": 2.1988, "step": 180720 }, { "epoch": 0.6335821680159816, "grad_norm": 0.5268065333366394, "learning_rate": 1.8333683804717347e-05, "loss": 2.1808, "step": 180780 }, { "epoch": 0.6337924508463884, "grad_norm": 0.5918155312538147, "learning_rate": 1.8323169663197002e-05, "loss": 2.1979, "step": 180840 }, { "epoch": 0.6340027336767953, "grad_norm": 0.5612367987632751, "learning_rate": 1.8312655521676654e-05, "loss": 2.2091, "step": 180900 }, { "epoch": 0.6342130165072022, "grad_norm": 0.5907817482948303, "learning_rate": 1.8302141380156313e-05, "loss": 2.2071, "step": 180960 }, { "epoch": 0.6344232993376091, "grad_norm": 0.5619683265686035, "learning_rate": 1.8291627238635968e-05, "loss": 2.1921, "step": 181020 }, { "epoch": 0.634633582168016, "grad_norm": 0.6120419502258301, "learning_rate": 1.828111309711562e-05, "loss": 2.1918, "step": 181080 }, { "epoch": 0.6348438649984228, "grad_norm": 0.5742617249488831, "learning_rate": 1.827059895559528e-05, "loss": 2.2084, "step": 181140 }, { "epoch": 0.6350541478288297, "grad_norm": 0.4941420257091522, "learning_rate": 1.8260084814074934e-05, "loss": 2.1804, "step": 181200 }, { "epoch": 0.6352644306592367, "grad_norm": 0.5339905619621277, "learning_rate": 1.8249570672554586e-05, "loss": 2.194, "step": 181260 }, { "epoch": 0.6354747134896436, "grad_norm": 0.5762035846710205, "learning_rate": 1.8239056531034244e-05, "loss": 2.1876, "step": 181320 }, { "epoch": 0.6356849963200505, "grad_norm": 0.55418860912323, "learning_rate": 1.8228542389513896e-05, "loss": 2.2229, "step": 181380 }, { "epoch": 0.6358952791504574, "grad_norm": 0.6099485158920288, "learning_rate": 1.821802824799355e-05, "loss": 2.1944, "step": 181440 }, { "epoch": 0.6361055619808642, "grad_norm": 0.5443301200866699, "learning_rate": 1.820751410647321e-05, "loss": 2.1773, "step": 181500 }, { "epoch": 0.6363158448112711, "grad_norm": 0.5454010367393494, "learning_rate": 1.8196999964952862e-05, "loss": 2.2003, "step": 181560 }, { "epoch": 0.636526127641678, "grad_norm": 0.5376167297363281, "learning_rate": 1.8186661059124522e-05, "loss": 2.183, "step": 181620 }, { "epoch": 0.636736410472085, "grad_norm": 0.6612685918807983, "learning_rate": 1.8176146917604178e-05, "loss": 2.2047, "step": 181680 }, { "epoch": 0.6369466933024919, "grad_norm": 0.5321860313415527, "learning_rate": 1.8165632776083833e-05, "loss": 2.2129, "step": 181740 }, { "epoch": 0.6371569761328988, "grad_norm": 0.5190796852111816, "learning_rate": 1.815511863456349e-05, "loss": 2.1888, "step": 181800 }, { "epoch": 0.6373672589633056, "grad_norm": 0.5587844252586365, "learning_rate": 1.8144604493043144e-05, "loss": 2.206, "step": 181860 }, { "epoch": 0.6375775417937125, "grad_norm": 0.5742557048797607, "learning_rate": 1.81340903515228e-05, "loss": 2.1899, "step": 181920 }, { "epoch": 0.6377878246241194, "grad_norm": 0.5507059097290039, "learning_rate": 1.8123576210002454e-05, "loss": 2.2189, "step": 181980 }, { "epoch": 0.6379981074545263, "grad_norm": 0.5660874247550964, "learning_rate": 1.811306206848211e-05, "loss": 2.1985, "step": 182040 }, { "epoch": 0.6382083902849333, "grad_norm": 0.5507509112358093, "learning_rate": 1.8102547926961765e-05, "loss": 2.191, "step": 182100 }, { "epoch": 0.6384186731153402, "grad_norm": 0.6052454710006714, "learning_rate": 1.809203378544142e-05, "loss": 2.1788, "step": 182160 }, { "epoch": 0.638628955945747, "grad_norm": 0.5226848125457764, "learning_rate": 1.8081519643921075e-05, "loss": 2.1897, "step": 182220 }, { "epoch": 0.6388392387761539, "grad_norm": 0.5561570525169373, "learning_rate": 1.8071005502400727e-05, "loss": 2.1932, "step": 182280 }, { "epoch": 0.6390495216065608, "grad_norm": 0.5580832362174988, "learning_rate": 1.8060491360880386e-05, "loss": 2.1983, "step": 182340 }, { "epoch": 0.6392598044369677, "grad_norm": 0.5689629316329956, "learning_rate": 1.804997721936004e-05, "loss": 2.1837, "step": 182400 }, { "epoch": 0.6394700872673746, "grad_norm": 0.5467937588691711, "learning_rate": 1.8039463077839693e-05, "loss": 2.2017, "step": 182460 }, { "epoch": 0.6396803700977816, "grad_norm": 0.5341092348098755, "learning_rate": 1.802894893631935e-05, "loss": 2.1936, "step": 182520 }, { "epoch": 0.6398906529281884, "grad_norm": 0.5286155939102173, "learning_rate": 1.8018434794799004e-05, "loss": 2.2146, "step": 182580 }, { "epoch": 0.6401009357585953, "grad_norm": 0.5773108601570129, "learning_rate": 1.800792065327866e-05, "loss": 2.2021, "step": 182640 }, { "epoch": 0.6403112185890022, "grad_norm": 0.5345368385314941, "learning_rate": 1.7997406511758318e-05, "loss": 2.1836, "step": 182700 }, { "epoch": 0.6405215014194091, "grad_norm": 0.5411863923072815, "learning_rate": 1.798689237023797e-05, "loss": 2.1921, "step": 182760 }, { "epoch": 0.640731784249816, "grad_norm": 0.5300861597061157, "learning_rate": 1.7976378228717628e-05, "loss": 2.206, "step": 182820 }, { "epoch": 0.6409420670802229, "grad_norm": 0.5324180722236633, "learning_rate": 1.796586408719728e-05, "loss": 2.1792, "step": 182880 }, { "epoch": 0.6411523499106297, "grad_norm": 0.6221733689308167, "learning_rate": 1.7955349945676935e-05, "loss": 2.2037, "step": 182940 }, { "epoch": 0.6413626327410367, "grad_norm": 0.5904287695884705, "learning_rate": 1.7944835804156594e-05, "loss": 2.1877, "step": 183000 }, { "epoch": 0.6415729155714436, "grad_norm": 0.610864520072937, "learning_rate": 1.7934321662636246e-05, "loss": 2.1882, "step": 183060 }, { "epoch": 0.6417831984018505, "grad_norm": 0.6096898913383484, "learning_rate": 1.79238075211159e-05, "loss": 2.1964, "step": 183120 }, { "epoch": 0.6419934812322574, "grad_norm": 0.5498456358909607, "learning_rate": 1.791329337959556e-05, "loss": 2.1888, "step": 183180 }, { "epoch": 0.6422037640626643, "grad_norm": 0.5559455156326294, "learning_rate": 1.790277923807521e-05, "loss": 2.192, "step": 183240 }, { "epoch": 0.6424140468930711, "grad_norm": 0.5066351294517517, "learning_rate": 1.7892265096554867e-05, "loss": 2.1838, "step": 183300 }, { "epoch": 0.642624329723478, "grad_norm": 0.5600133538246155, "learning_rate": 1.7881750955034522e-05, "loss": 2.1902, "step": 183360 }, { "epoch": 0.642834612553885, "grad_norm": 0.6039444804191589, "learning_rate": 1.7871236813514177e-05, "loss": 2.1976, "step": 183420 }, { "epoch": 0.6430448953842919, "grad_norm": 0.524412214756012, "learning_rate": 1.7860722671993833e-05, "loss": 2.1931, "step": 183480 }, { "epoch": 0.6432551782146988, "grad_norm": 0.5259726047515869, "learning_rate": 1.7850208530473488e-05, "loss": 2.2079, "step": 183540 }, { "epoch": 0.6434654610451057, "grad_norm": 0.5919510126113892, "learning_rate": 1.7839694388953143e-05, "loss": 2.1983, "step": 183600 }, { "epoch": 0.6436757438755125, "grad_norm": 0.5589147806167603, "learning_rate": 1.78291802474328e-05, "loss": 2.2145, "step": 183660 }, { "epoch": 0.6438860267059194, "grad_norm": 0.581453800201416, "learning_rate": 1.781884134160446e-05, "loss": 2.1932, "step": 183720 }, { "epoch": 0.6440963095363264, "grad_norm": 0.5136606693267822, "learning_rate": 1.7808327200084114e-05, "loss": 2.1914, "step": 183780 }, { "epoch": 0.6443065923667333, "grad_norm": 0.5781887173652649, "learning_rate": 1.779781305856377e-05, "loss": 2.2011, "step": 183840 }, { "epoch": 0.6445168751971402, "grad_norm": 0.5550873875617981, "learning_rate": 1.7787298917043425e-05, "loss": 2.1915, "step": 183900 }, { "epoch": 0.6447271580275471, "grad_norm": 0.5426739454269409, "learning_rate": 1.7776784775523077e-05, "loss": 2.2125, "step": 183960 }, { "epoch": 0.6449374408579539, "grad_norm": 0.5990216732025146, "learning_rate": 1.7766270634002736e-05, "loss": 2.1797, "step": 184020 }, { "epoch": 0.6451477236883608, "grad_norm": 0.5692834258079529, "learning_rate": 1.775575649248239e-05, "loss": 2.1976, "step": 184080 }, { "epoch": 0.6453580065187677, "grad_norm": 0.5651482343673706, "learning_rate": 1.7745242350962043e-05, "loss": 2.1983, "step": 184140 }, { "epoch": 0.6455682893491747, "grad_norm": 0.5360584259033203, "learning_rate": 1.77347282094417e-05, "loss": 2.2047, "step": 184200 }, { "epoch": 0.6457785721795816, "grad_norm": 0.5235934257507324, "learning_rate": 1.7724214067921353e-05, "loss": 2.1807, "step": 184260 }, { "epoch": 0.6459888550099885, "grad_norm": 0.5651029348373413, "learning_rate": 1.771369992640101e-05, "loss": 2.2096, "step": 184320 }, { "epoch": 0.6461991378403953, "grad_norm": 0.5518803000450134, "learning_rate": 1.7703185784880667e-05, "loss": 2.2053, "step": 184380 }, { "epoch": 0.6464094206708022, "grad_norm": 0.5300690531730652, "learning_rate": 1.769267164336032e-05, "loss": 2.1847, "step": 184440 }, { "epoch": 0.6466197035012091, "grad_norm": 0.5407575368881226, "learning_rate": 1.7682157501839974e-05, "loss": 2.1976, "step": 184500 }, { "epoch": 0.646829986331616, "grad_norm": 0.6042479276657104, "learning_rate": 1.767164336031963e-05, "loss": 2.1695, "step": 184560 }, { "epoch": 0.647040269162023, "grad_norm": 0.5453103184700012, "learning_rate": 1.7661129218799285e-05, "loss": 2.1789, "step": 184620 }, { "epoch": 0.6472505519924299, "grad_norm": 0.5787133574485779, "learning_rate": 1.765061507727894e-05, "loss": 2.1866, "step": 184680 }, { "epoch": 0.6474608348228367, "grad_norm": 0.5406185984611511, "learning_rate": 1.7640100935758596e-05, "loss": 2.1825, "step": 184740 }, { "epoch": 0.6476711176532436, "grad_norm": 0.5635714530944824, "learning_rate": 1.762958679423825e-05, "loss": 2.1914, "step": 184800 }, { "epoch": 0.6478814004836505, "grad_norm": 0.5155267715454102, "learning_rate": 1.7619072652717906e-05, "loss": 2.19, "step": 184860 }, { "epoch": 0.6480916833140574, "grad_norm": 0.5049726366996765, "learning_rate": 1.760855851119756e-05, "loss": 2.1987, "step": 184920 }, { "epoch": 0.6483019661444643, "grad_norm": 0.522000253200531, "learning_rate": 1.7598044369677217e-05, "loss": 2.1838, "step": 184980 }, { "epoch": 0.6485122489748713, "grad_norm": 0.566197395324707, "learning_rate": 1.7587530228156872e-05, "loss": 2.1912, "step": 185040 }, { "epoch": 0.6487225318052781, "grad_norm": 0.6304585337638855, "learning_rate": 1.7577016086636527e-05, "loss": 2.1952, "step": 185100 }, { "epoch": 0.648932814635685, "grad_norm": 0.5605672001838684, "learning_rate": 1.7566501945116182e-05, "loss": 2.2132, "step": 185160 }, { "epoch": 0.6491430974660919, "grad_norm": 0.5913508534431458, "learning_rate": 1.7555987803595838e-05, "loss": 2.1916, "step": 185220 }, { "epoch": 0.6493533802964988, "grad_norm": 0.5016937851905823, "learning_rate": 1.7545473662075493e-05, "loss": 2.1779, "step": 185280 }, { "epoch": 0.6495636631269057, "grad_norm": 0.5301057696342468, "learning_rate": 1.7534959520555148e-05, "loss": 2.1948, "step": 185340 }, { "epoch": 0.6497739459573126, "grad_norm": 0.5172202587127686, "learning_rate": 1.7524445379034804e-05, "loss": 2.1957, "step": 185400 }, { "epoch": 0.6499842287877194, "grad_norm": 0.5441404581069946, "learning_rate": 1.751393123751446e-05, "loss": 2.2083, "step": 185460 }, { "epoch": 0.6501945116181264, "grad_norm": 0.5819316506385803, "learning_rate": 1.750341709599411e-05, "loss": 2.1794, "step": 185520 }, { "epoch": 0.6504047944485333, "grad_norm": 0.5801051259040833, "learning_rate": 1.749290295447377e-05, "loss": 2.1856, "step": 185580 }, { "epoch": 0.6506150772789402, "grad_norm": 0.547092854976654, "learning_rate": 1.7482388812953425e-05, "loss": 2.1984, "step": 185640 }, { "epoch": 0.6508253601093471, "grad_norm": 0.5455633997917175, "learning_rate": 1.7472049907125085e-05, "loss": 2.1923, "step": 185700 }, { "epoch": 0.651035642939754, "grad_norm": 0.5476002097129822, "learning_rate": 1.746153576560474e-05, "loss": 2.1953, "step": 185760 }, { "epoch": 0.6512459257701608, "grad_norm": 0.5624036192893982, "learning_rate": 1.74511968597764e-05, "loss": 2.198, "step": 185820 }, { "epoch": 0.6514562086005677, "grad_norm": 0.5351853370666504, "learning_rate": 1.7440682718256053e-05, "loss": 2.1904, "step": 185880 }, { "epoch": 0.6516664914309747, "grad_norm": 0.5587154626846313, "learning_rate": 1.743016857673571e-05, "loss": 2.1892, "step": 185940 }, { "epoch": 0.6518767742613816, "grad_norm": 0.5613061189651489, "learning_rate": 1.7419654435215367e-05, "loss": 2.1974, "step": 186000 }, { "epoch": 0.6520870570917885, "grad_norm": 0.5189383029937744, "learning_rate": 1.740914029369502e-05, "loss": 2.1953, "step": 186060 }, { "epoch": 0.6522973399221954, "grad_norm": 0.5712246298789978, "learning_rate": 1.7398626152174677e-05, "loss": 2.2143, "step": 186120 }, { "epoch": 0.6525076227526022, "grad_norm": 0.5922529697418213, "learning_rate": 1.7388112010654333e-05, "loss": 2.2, "step": 186180 }, { "epoch": 0.6527179055830091, "grad_norm": 0.5634300708770752, "learning_rate": 1.7377597869133985e-05, "loss": 2.1873, "step": 186240 }, { "epoch": 0.652928188413416, "grad_norm": 0.5620796084403992, "learning_rate": 1.7367083727613643e-05, "loss": 2.1842, "step": 186300 }, { "epoch": 0.653138471243823, "grad_norm": 0.49786993861198425, "learning_rate": 1.7356569586093295e-05, "loss": 2.1809, "step": 186360 }, { "epoch": 0.6533487540742299, "grad_norm": 0.5227112174034119, "learning_rate": 1.734605544457295e-05, "loss": 2.1901, "step": 186420 }, { "epoch": 0.6535590369046367, "grad_norm": 0.604454517364502, "learning_rate": 1.733554130305261e-05, "loss": 2.1968, "step": 186480 }, { "epoch": 0.6537693197350436, "grad_norm": 0.5914393663406372, "learning_rate": 1.732502716153226e-05, "loss": 2.1877, "step": 186540 }, { "epoch": 0.6539796025654505, "grad_norm": 0.5598288178443909, "learning_rate": 1.7314513020011916e-05, "loss": 2.1957, "step": 186600 }, { "epoch": 0.6541898853958574, "grad_norm": 0.517230749130249, "learning_rate": 1.730399887849157e-05, "loss": 2.1898, "step": 186660 }, { "epoch": 0.6544001682262643, "grad_norm": 0.6065497398376465, "learning_rate": 1.7293484736971227e-05, "loss": 2.2012, "step": 186720 }, { "epoch": 0.6546104510566713, "grad_norm": 0.5286918878555298, "learning_rate": 1.7282970595450882e-05, "loss": 2.1883, "step": 186780 }, { "epoch": 0.6548207338870781, "grad_norm": 0.5354787111282349, "learning_rate": 1.7272456453930537e-05, "loss": 2.1809, "step": 186840 }, { "epoch": 0.655031016717485, "grad_norm": 0.5585691928863525, "learning_rate": 1.7261942312410193e-05, "loss": 2.1927, "step": 186900 }, { "epoch": 0.6552412995478919, "grad_norm": 0.58662348985672, "learning_rate": 1.7251428170889848e-05, "loss": 2.2075, "step": 186960 }, { "epoch": 0.6554515823782988, "grad_norm": 0.5285871624946594, "learning_rate": 1.7240914029369503e-05, "loss": 2.1944, "step": 187020 }, { "epoch": 0.6556618652087057, "grad_norm": 0.5373408794403076, "learning_rate": 1.723039988784916e-05, "loss": 2.2032, "step": 187080 }, { "epoch": 0.6558721480391126, "grad_norm": 0.5267037153244019, "learning_rate": 1.7219885746328814e-05, "loss": 2.1964, "step": 187140 }, { "epoch": 0.6560824308695195, "grad_norm": 0.5143727660179138, "learning_rate": 1.720937160480847e-05, "loss": 2.2158, "step": 187200 }, { "epoch": 0.6562927136999264, "grad_norm": 0.6170992255210876, "learning_rate": 1.7198857463288124e-05, "loss": 2.1874, "step": 187260 }, { "epoch": 0.6565029965303333, "grad_norm": 0.5920122265815735, "learning_rate": 1.7188343321767776e-05, "loss": 2.1934, "step": 187320 }, { "epoch": 0.6567132793607402, "grad_norm": 0.5491796135902405, "learning_rate": 1.7177829180247435e-05, "loss": 2.1803, "step": 187380 }, { "epoch": 0.6569235621911471, "grad_norm": 0.5345472097396851, "learning_rate": 1.716731503872709e-05, "loss": 2.1783, "step": 187440 }, { "epoch": 0.657133845021554, "grad_norm": 0.5320336222648621, "learning_rate": 1.7156800897206742e-05, "loss": 2.18, "step": 187500 }, { "epoch": 0.6573441278519608, "grad_norm": 0.54201740026474, "learning_rate": 1.71462867556864e-05, "loss": 2.1915, "step": 187560 }, { "epoch": 0.6575544106823678, "grad_norm": 0.5722365975379944, "learning_rate": 1.7135772614166053e-05, "loss": 2.184, "step": 187620 }, { "epoch": 0.6577646935127747, "grad_norm": 0.5633969902992249, "learning_rate": 1.7125258472645708e-05, "loss": 2.1872, "step": 187680 }, { "epoch": 0.6579749763431816, "grad_norm": 0.606042742729187, "learning_rate": 1.7114744331125367e-05, "loss": 2.178, "step": 187740 }, { "epoch": 0.6581852591735885, "grad_norm": 0.520921528339386, "learning_rate": 1.710423018960502e-05, "loss": 2.1992, "step": 187800 }, { "epoch": 0.6583955420039954, "grad_norm": 0.5178558230400085, "learning_rate": 1.7093716048084674e-05, "loss": 2.1974, "step": 187860 }, { "epoch": 0.6586058248344022, "grad_norm": 0.5311471223831177, "learning_rate": 1.708320190656433e-05, "loss": 2.1847, "step": 187920 }, { "epoch": 0.6588161076648091, "grad_norm": 0.5017029643058777, "learning_rate": 1.7072687765043984e-05, "loss": 2.1965, "step": 187980 }, { "epoch": 0.659026390495216, "grad_norm": 0.6602824330329895, "learning_rate": 1.706217362352364e-05, "loss": 2.1865, "step": 188040 }, { "epoch": 0.659236673325623, "grad_norm": 0.6342161297798157, "learning_rate": 1.7051659482003295e-05, "loss": 2.1896, "step": 188100 }, { "epoch": 0.6594469561560299, "grad_norm": 0.5871784090995789, "learning_rate": 1.704114534048295e-05, "loss": 2.1825, "step": 188160 }, { "epoch": 0.6596572389864368, "grad_norm": 0.5610417723655701, "learning_rate": 1.7030631198962605e-05, "loss": 2.1948, "step": 188220 }, { "epoch": 0.6598675218168436, "grad_norm": 0.54507976770401, "learning_rate": 1.702011705744226e-05, "loss": 2.187, "step": 188280 }, { "epoch": 0.6600778046472505, "grad_norm": 0.5555378198623657, "learning_rate": 1.7009602915921916e-05, "loss": 2.1982, "step": 188340 }, { "epoch": 0.6602880874776574, "grad_norm": 0.5375819802284241, "learning_rate": 1.699908877440157e-05, "loss": 2.1917, "step": 188400 }, { "epoch": 0.6604983703080644, "grad_norm": 0.48358219861984253, "learning_rate": 1.6988574632881227e-05, "loss": 2.1878, "step": 188460 }, { "epoch": 0.6607086531384713, "grad_norm": 0.59239262342453, "learning_rate": 1.6978060491360882e-05, "loss": 2.1837, "step": 188520 }, { "epoch": 0.6609189359688782, "grad_norm": 0.5474352836608887, "learning_rate": 1.6967546349840534e-05, "loss": 2.2024, "step": 188580 }, { "epoch": 0.661129218799285, "grad_norm": 0.5559254884719849, "learning_rate": 1.6957032208320192e-05, "loss": 2.1843, "step": 188640 }, { "epoch": 0.6613395016296919, "grad_norm": 0.6271349191665649, "learning_rate": 1.6946518066799848e-05, "loss": 2.1855, "step": 188700 }, { "epoch": 0.6615497844600988, "grad_norm": 0.5410918593406677, "learning_rate": 1.69360039252795e-05, "loss": 2.1845, "step": 188760 }, { "epoch": 0.6617600672905057, "grad_norm": 0.5364717245101929, "learning_rate": 1.6925489783759158e-05, "loss": 2.1826, "step": 188820 }, { "epoch": 0.6619703501209127, "grad_norm": 0.5298910737037659, "learning_rate": 1.691497564223881e-05, "loss": 2.1894, "step": 188880 }, { "epoch": 0.6621806329513196, "grad_norm": 0.5474604964256287, "learning_rate": 1.6904461500718465e-05, "loss": 2.1728, "step": 188940 }, { "epoch": 0.6623909157817264, "grad_norm": 0.5495792627334595, "learning_rate": 1.6893947359198124e-05, "loss": 2.1935, "step": 189000 }, { "epoch": 0.6626011986121333, "grad_norm": 0.5411957502365112, "learning_rate": 1.6883433217677776e-05, "loss": 2.1797, "step": 189060 }, { "epoch": 0.6628114814425402, "grad_norm": 0.5660207271575928, "learning_rate": 1.687291907615743e-05, "loss": 2.1799, "step": 189120 }, { "epoch": 0.6630217642729471, "grad_norm": 0.5661222338676453, "learning_rate": 1.686240493463709e-05, "loss": 2.1854, "step": 189180 }, { "epoch": 0.663232047103354, "grad_norm": 0.6380460262298584, "learning_rate": 1.6851890793116742e-05, "loss": 2.1977, "step": 189240 }, { "epoch": 0.663442329933761, "grad_norm": 0.5798426866531372, "learning_rate": 1.6841376651596397e-05, "loss": 2.1977, "step": 189300 }, { "epoch": 0.6636526127641678, "grad_norm": 0.588367760181427, "learning_rate": 1.6830862510076052e-05, "loss": 2.1875, "step": 189360 }, { "epoch": 0.6638628955945747, "grad_norm": 0.5828709602355957, "learning_rate": 1.6820348368555708e-05, "loss": 2.1876, "step": 189420 }, { "epoch": 0.6640731784249816, "grad_norm": 0.6115942001342773, "learning_rate": 1.6809834227035363e-05, "loss": 2.201, "step": 189480 }, { "epoch": 0.6642834612553885, "grad_norm": 0.6153404116630554, "learning_rate": 1.6799320085515018e-05, "loss": 2.1897, "step": 189540 }, { "epoch": 0.6644937440857954, "grad_norm": 0.6499913930892944, "learning_rate": 1.6788805943994673e-05, "loss": 2.1946, "step": 189600 }, { "epoch": 0.6647040269162023, "grad_norm": 0.5375574827194214, "learning_rate": 1.677829180247433e-05, "loss": 2.1858, "step": 189660 }, { "epoch": 0.6649143097466091, "grad_norm": 0.5913978815078735, "learning_rate": 1.6767777660953984e-05, "loss": 2.1969, "step": 189720 }, { "epoch": 0.6651245925770161, "grad_norm": 0.531616747379303, "learning_rate": 1.675726351943364e-05, "loss": 2.1766, "step": 189780 }, { "epoch": 0.665334875407423, "grad_norm": 0.5702480673789978, "learning_rate": 1.6746749377913295e-05, "loss": 2.1935, "step": 189840 }, { "epoch": 0.6655451582378299, "grad_norm": 0.5366515517234802, "learning_rate": 1.6736410472084955e-05, "loss": 2.1953, "step": 189900 }, { "epoch": 0.6657554410682368, "grad_norm": 0.571426272392273, "learning_rate": 1.672589633056461e-05, "loss": 2.2015, "step": 189960 } ], "logging_steps": 60, "max_steps": 285330, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.692720539303936e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }