{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9978900335112324, "eval_steps": 504, "global_step": 6042, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004964627032394191, "grad_norm": 0.48562569977411674, "learning_rate": 2.5000000000000004e-07, "loss": 0.8809, "step": 1 }, { "epoch": 0.0004964627032394191, "eval_loss": 0.9288526773452759, "eval_runtime": 135.0527, "eval_samples_per_second": 224.749, "eval_steps_per_second": 28.1, "step": 1 }, { "epoch": 0.0009929254064788382, "grad_norm": 0.49840719985370363, "learning_rate": 5.000000000000001e-07, "loss": 0.8877, "step": 2 }, { "epoch": 0.0014893881097182574, "grad_norm": 0.5073931939592379, "learning_rate": 7.5e-07, "loss": 0.912, "step": 3 }, { "epoch": 0.0019858508129576764, "grad_norm": 0.4677170410046681, "learning_rate": 1.0000000000000002e-06, "loss": 0.953, "step": 4 }, { "epoch": 0.002482313516197096, "grad_norm": 0.48958186047439367, "learning_rate": 1.25e-06, "loss": 0.9888, "step": 5 }, { "epoch": 0.002978776219436515, "grad_norm": 0.47591967932406565, "learning_rate": 1.5e-06, "loss": 0.9378, "step": 6 }, { "epoch": 0.0034752389226759338, "grad_norm": 0.4775015628908847, "learning_rate": 1.75e-06, "loss": 0.9636, "step": 7 }, { "epoch": 0.003971701625915353, "grad_norm": 0.4636218067902225, "learning_rate": 2.0000000000000003e-06, "loss": 0.9179, "step": 8 }, { "epoch": 0.004468164329154772, "grad_norm": 0.4627241033337522, "learning_rate": 2.25e-06, "loss": 0.9676, "step": 9 }, { "epoch": 0.004964627032394192, "grad_norm": 0.4866597760611659, "learning_rate": 2.5e-06, "loss": 0.9696, "step": 10 }, { "epoch": 0.00546108973563361, "grad_norm": 0.4791346551664399, "learning_rate": 2.7500000000000004e-06, "loss": 0.907, "step": 11 }, { "epoch": 0.00595755243887303, "grad_norm": 0.4769913096433881, "learning_rate": 3e-06, "loss": 0.8921, "step": 12 }, { "epoch": 0.006454015142112449, "grad_norm": 0.34294431274413834, "learning_rate": 3.2500000000000002e-06, "loss": 0.926, "step": 13 }, { "epoch": 0.0069504778453518675, "grad_norm": 0.34641042017011436, "learning_rate": 3.5e-06, "loss": 0.9275, "step": 14 }, { "epoch": 0.007446940548591287, "grad_norm": 0.34905473483457555, "learning_rate": 3.7500000000000005e-06, "loss": 0.8703, "step": 15 }, { "epoch": 0.007943403251830706, "grad_norm": 0.3172635186340447, "learning_rate": 4.000000000000001e-06, "loss": 0.9291, "step": 16 }, { "epoch": 0.008439865955070125, "grad_norm": 0.2879283799693857, "learning_rate": 4.25e-06, "loss": 0.8477, "step": 17 }, { "epoch": 0.008936328658309544, "grad_norm": 0.27723518517408907, "learning_rate": 4.5e-06, "loss": 0.8719, "step": 18 }, { "epoch": 0.009432791361548964, "grad_norm": 0.3064644489149753, "learning_rate": 4.75e-06, "loss": 0.8433, "step": 19 }, { "epoch": 0.009929254064788383, "grad_norm": 0.29412214176584833, "learning_rate": 5e-06, "loss": 0.859, "step": 20 }, { "epoch": 0.010425716768027803, "grad_norm": 0.2822069566103716, "learning_rate": 4.999999808957543e-06, "loss": 0.8315, "step": 21 }, { "epoch": 0.01092217947126722, "grad_norm": 0.2959170002125076, "learning_rate": 4.9999992358301984e-06, "loss": 0.8518, "step": 22 }, { "epoch": 0.01141864217450664, "grad_norm": 0.2847305128418995, "learning_rate": 4.9999982806180555e-06, "loss": 0.9263, "step": 23 }, { "epoch": 0.01191510487774606, "grad_norm": 0.2552021781953879, "learning_rate": 4.99999694332126e-06, "loss": 0.8462, "step": 24 }, { "epoch": 0.012411567580985479, "grad_norm": 0.24750889112085914, "learning_rate": 4.9999952239400165e-06, "loss": 0.8324, "step": 25 }, { "epoch": 0.012908030284224898, "grad_norm": 0.23795567702694292, "learning_rate": 4.9999931224745864e-06, "loss": 0.8167, "step": 26 }, { "epoch": 0.013404492987464317, "grad_norm": 0.25300589875266577, "learning_rate": 4.9999906389252926e-06, "loss": 0.8979, "step": 27 }, { "epoch": 0.013900955690703735, "grad_norm": 0.25172921500056106, "learning_rate": 4.9999877732925135e-06, "loss": 0.8962, "step": 28 }, { "epoch": 0.014397418393943155, "grad_norm": 0.24565475616345825, "learning_rate": 4.999984525576688e-06, "loss": 0.8919, "step": 29 }, { "epoch": 0.014893881097182574, "grad_norm": 0.23769439461231937, "learning_rate": 4.999980895778312e-06, "loss": 0.8506, "step": 30 }, { "epoch": 0.015390343800421993, "grad_norm": 0.23162836109371954, "learning_rate": 4.999976883897939e-06, "loss": 0.9284, "step": 31 }, { "epoch": 0.01588680650366141, "grad_norm": 0.2291685811405672, "learning_rate": 4.999972489936185e-06, "loss": 0.8594, "step": 32 }, { "epoch": 0.016383269206900832, "grad_norm": 0.22080073693898697, "learning_rate": 4.9999677138937185e-06, "loss": 0.8606, "step": 33 }, { "epoch": 0.01687973191014025, "grad_norm": 0.2168698203296081, "learning_rate": 4.999962555771272e-06, "loss": 0.8781, "step": 34 }, { "epoch": 0.01737619461337967, "grad_norm": 0.22362106702009418, "learning_rate": 4.999957015569632e-06, "loss": 0.8172, "step": 35 }, { "epoch": 0.01787265731661909, "grad_norm": 0.21848314476434746, "learning_rate": 4.999951093289645e-06, "loss": 0.823, "step": 36 }, { "epoch": 0.018369120019858506, "grad_norm": 0.21837246279483064, "learning_rate": 4.9999447889322195e-06, "loss": 0.8576, "step": 37 }, { "epoch": 0.018865582723097928, "grad_norm": 0.21667015902394673, "learning_rate": 4.999938102498315e-06, "loss": 0.8648, "step": 38 }, { "epoch": 0.019362045426337345, "grad_norm": 0.2202153889422801, "learning_rate": 4.9999310339889554e-06, "loss": 0.9336, "step": 39 }, { "epoch": 0.019858508129576766, "grad_norm": 0.22077174681701484, "learning_rate": 4.9999235834052204e-06, "loss": 0.8857, "step": 40 }, { "epoch": 0.020354970832816184, "grad_norm": 0.20143789350780875, "learning_rate": 4.9999157507482485e-06, "loss": 0.8106, "step": 41 }, { "epoch": 0.020851433536055605, "grad_norm": 0.21195838433230027, "learning_rate": 4.999907536019238e-06, "loss": 0.8221, "step": 42 }, { "epoch": 0.021347896239295023, "grad_norm": 0.20602305261000692, "learning_rate": 4.999898939219443e-06, "loss": 0.8578, "step": 43 }, { "epoch": 0.02184435894253444, "grad_norm": 0.21153710576925158, "learning_rate": 4.999889960350179e-06, "loss": 0.8217, "step": 44 }, { "epoch": 0.022340821645773862, "grad_norm": 0.21344887165712584, "learning_rate": 4.9998805994128166e-06, "loss": 0.8838, "step": 45 }, { "epoch": 0.02283728434901328, "grad_norm": 0.21002096278796756, "learning_rate": 4.999870856408787e-06, "loss": 0.8782, "step": 46 }, { "epoch": 0.0233337470522527, "grad_norm": 0.20886971438867197, "learning_rate": 4.999860731339579e-06, "loss": 0.8295, "step": 47 }, { "epoch": 0.02383020975549212, "grad_norm": 0.22095165664633118, "learning_rate": 4.999850224206741e-06, "loss": 0.8687, "step": 48 }, { "epoch": 0.02432667245873154, "grad_norm": 0.21253844707230096, "learning_rate": 4.999839335011878e-06, "loss": 0.8782, "step": 49 }, { "epoch": 0.024823135161970957, "grad_norm": 0.21604040244789194, "learning_rate": 4.999828063756655e-06, "loss": 0.8385, "step": 50 }, { "epoch": 0.025319597865210375, "grad_norm": 0.21102395354985817, "learning_rate": 4.999816410442794e-06, "loss": 0.8903, "step": 51 }, { "epoch": 0.025816060568449796, "grad_norm": 0.2062210328133959, "learning_rate": 4.999804375072076e-06, "loss": 0.8717, "step": 52 }, { "epoch": 0.026312523271689214, "grad_norm": 0.22296430893025995, "learning_rate": 4.999791957646341e-06, "loss": 0.8369, "step": 53 }, { "epoch": 0.026808985974928635, "grad_norm": 0.2172722843835763, "learning_rate": 4.9997791581674855e-06, "loss": 0.9088, "step": 54 }, { "epoch": 0.027305448678168052, "grad_norm": 0.2097980373922686, "learning_rate": 4.999765976637467e-06, "loss": 0.8559, "step": 55 }, { "epoch": 0.02780191138140747, "grad_norm": 0.20153765827962083, "learning_rate": 4.9997524130583e-06, "loss": 0.8233, "step": 56 }, { "epoch": 0.02829837408464689, "grad_norm": 0.20769665963331085, "learning_rate": 4.999738467432057e-06, "loss": 0.8321, "step": 57 }, { "epoch": 0.02879483678788631, "grad_norm": 0.21216448449114564, "learning_rate": 4.999724139760869e-06, "loss": 0.847, "step": 58 }, { "epoch": 0.02929129949112573, "grad_norm": 0.2063639919391254, "learning_rate": 4.999709430046926e-06, "loss": 0.8064, "step": 59 }, { "epoch": 0.029787762194365148, "grad_norm": 0.21452334497936051, "learning_rate": 4.999694338292478e-06, "loss": 0.8838, "step": 60 }, { "epoch": 0.03028422489760457, "grad_norm": 0.20061672335186068, "learning_rate": 4.999678864499828e-06, "loss": 0.7818, "step": 61 }, { "epoch": 0.030780687600843987, "grad_norm": 0.2046523532499392, "learning_rate": 4.999663008671344e-06, "loss": 0.8581, "step": 62 }, { "epoch": 0.03127715030408341, "grad_norm": 0.20186628257277983, "learning_rate": 4.999646770809449e-06, "loss": 0.8445, "step": 63 }, { "epoch": 0.03177361300732282, "grad_norm": 0.2012482614306437, "learning_rate": 4.9996301509166225e-06, "loss": 0.8138, "step": 64 }, { "epoch": 0.03227007571056224, "grad_norm": 0.20616591148685806, "learning_rate": 4.999613148995406e-06, "loss": 0.854, "step": 65 }, { "epoch": 0.032766538413801664, "grad_norm": 0.20274972415160764, "learning_rate": 4.999595765048399e-06, "loss": 0.8223, "step": 66 }, { "epoch": 0.033263001117041086, "grad_norm": 0.20826308052556233, "learning_rate": 4.9995779990782556e-06, "loss": 0.8669, "step": 67 }, { "epoch": 0.0337594638202805, "grad_norm": 0.20529737714316182, "learning_rate": 4.999559851087694e-06, "loss": 0.856, "step": 68 }, { "epoch": 0.03425592652351992, "grad_norm": 0.2098299357116322, "learning_rate": 4.999541321079486e-06, "loss": 0.895, "step": 69 }, { "epoch": 0.03475238922675934, "grad_norm": 0.21085272972883423, "learning_rate": 4.9995224090564645e-06, "loss": 0.8135, "step": 70 }, { "epoch": 0.035248851929998756, "grad_norm": 0.20815593231474888, "learning_rate": 4.9995031150215194e-06, "loss": 0.8325, "step": 71 }, { "epoch": 0.03574531463323818, "grad_norm": 0.2030397777365442, "learning_rate": 4.9994834389776e-06, "loss": 0.8028, "step": 72 }, { "epoch": 0.0362417773364776, "grad_norm": 0.2093993861940969, "learning_rate": 4.999463380927713e-06, "loss": 0.8222, "step": 73 }, { "epoch": 0.03673824003971701, "grad_norm": 0.20310206636138728, "learning_rate": 4.9994429408749235e-06, "loss": 0.8987, "step": 74 }, { "epoch": 0.037234702742956434, "grad_norm": 0.19635518255581516, "learning_rate": 4.999422118822357e-06, "loss": 0.8257, "step": 75 }, { "epoch": 0.037731165446195855, "grad_norm": 0.20613686945426812, "learning_rate": 4.999400914773193e-06, "loss": 0.8519, "step": 76 }, { "epoch": 0.038227628149435276, "grad_norm": 0.2148909458983486, "learning_rate": 4.999379328730676e-06, "loss": 0.8431, "step": 77 }, { "epoch": 0.03872409085267469, "grad_norm": 0.20131996731845622, "learning_rate": 4.999357360698103e-06, "loss": 0.8298, "step": 78 }, { "epoch": 0.03922055355591411, "grad_norm": 0.19799106731473673, "learning_rate": 4.999335010678831e-06, "loss": 0.8438, "step": 79 }, { "epoch": 0.03971701625915353, "grad_norm": 0.20461179643215718, "learning_rate": 4.999312278676276e-06, "loss": 0.8501, "step": 80 }, { "epoch": 0.04021347896239295, "grad_norm": 0.19950036085187078, "learning_rate": 4.999289164693913e-06, "loss": 0.802, "step": 81 }, { "epoch": 0.04070994166563237, "grad_norm": 0.20647834483552271, "learning_rate": 4.999265668735274e-06, "loss": 0.8004, "step": 82 }, { "epoch": 0.04120640436887179, "grad_norm": 0.20535506020956135, "learning_rate": 4.99924179080395e-06, "loss": 0.8398, "step": 83 }, { "epoch": 0.04170286707211121, "grad_norm": 0.20379519814931177, "learning_rate": 4.999217530903592e-06, "loss": 0.8493, "step": 84 }, { "epoch": 0.042199329775350625, "grad_norm": 0.19569632518773314, "learning_rate": 4.999192889037905e-06, "loss": 0.7781, "step": 85 }, { "epoch": 0.042695792478590046, "grad_norm": 0.2022603888395999, "learning_rate": 4.999167865210656e-06, "loss": 0.8202, "step": 86 }, { "epoch": 0.04319225518182947, "grad_norm": 0.2061084529311429, "learning_rate": 4.999142459425671e-06, "loss": 0.8112, "step": 87 }, { "epoch": 0.04368871788506888, "grad_norm": 0.20386460325471262, "learning_rate": 4.999116671686832e-06, "loss": 0.8314, "step": 88 }, { "epoch": 0.0441851805883083, "grad_norm": 0.2001689787137877, "learning_rate": 4.9990905019980795e-06, "loss": 0.8493, "step": 89 }, { "epoch": 0.044681643291547724, "grad_norm": 0.19704962097184103, "learning_rate": 4.999063950363413e-06, "loss": 0.8125, "step": 90 }, { "epoch": 0.045178105994787145, "grad_norm": 0.20288945888095392, "learning_rate": 4.999037016786891e-06, "loss": 0.8529, "step": 91 }, { "epoch": 0.04567456869802656, "grad_norm": 0.20036286496607184, "learning_rate": 4.999009701272632e-06, "loss": 0.818, "step": 92 }, { "epoch": 0.04617103140126598, "grad_norm": 0.19540166087377867, "learning_rate": 4.998982003824807e-06, "loss": 0.8044, "step": 93 }, { "epoch": 0.0466674941045054, "grad_norm": 0.19654628372196933, "learning_rate": 4.998953924447652e-06, "loss": 0.8276, "step": 94 }, { "epoch": 0.047163956807744815, "grad_norm": 0.20737336312858767, "learning_rate": 4.998925463145456e-06, "loss": 0.792, "step": 95 }, { "epoch": 0.04766041951098424, "grad_norm": 0.2059319669770031, "learning_rate": 4.998896619922571e-06, "loss": 0.8635, "step": 96 }, { "epoch": 0.04815688221422366, "grad_norm": 0.20528654692184178, "learning_rate": 4.9988673947834045e-06, "loss": 0.8605, "step": 97 }, { "epoch": 0.04865334491746308, "grad_norm": 0.20472541684877216, "learning_rate": 4.998837787732422e-06, "loss": 0.87, "step": 98 }, { "epoch": 0.04914980762070249, "grad_norm": 0.20212979201560113, "learning_rate": 4.998807798774151e-06, "loss": 0.8236, "step": 99 }, { "epoch": 0.049646270323941914, "grad_norm": 0.19931273766196522, "learning_rate": 4.998777427913172e-06, "loss": 0.8541, "step": 100 }, { "epoch": 0.050142733027181335, "grad_norm": 0.2060934643730322, "learning_rate": 4.998746675154129e-06, "loss": 0.8765, "step": 101 }, { "epoch": 0.05063919573042075, "grad_norm": 0.19905720321276965, "learning_rate": 4.99871554050172e-06, "loss": 0.8193, "step": 102 }, { "epoch": 0.05113565843366017, "grad_norm": 0.19654055207721635, "learning_rate": 4.998684023960705e-06, "loss": 0.804, "step": 103 }, { "epoch": 0.05163212113689959, "grad_norm": 0.1982198351814607, "learning_rate": 4.998652125535901e-06, "loss": 0.7891, "step": 104 }, { "epoch": 0.052128583840139006, "grad_norm": 0.39942501787277024, "learning_rate": 4.998619845232181e-06, "loss": 0.8563, "step": 105 }, { "epoch": 0.05262504654337843, "grad_norm": 0.1955611794394933, "learning_rate": 4.998587183054481e-06, "loss": 0.8287, "step": 106 }, { "epoch": 0.05312150924661785, "grad_norm": 0.20389389297242477, "learning_rate": 4.9985541390077915e-06, "loss": 0.8284, "step": 107 }, { "epoch": 0.05361797194985727, "grad_norm": 0.21045434439040575, "learning_rate": 4.998520713097164e-06, "loss": 0.8608, "step": 108 }, { "epoch": 0.054114434653096684, "grad_norm": 0.20116892551504365, "learning_rate": 4.998486905327704e-06, "loss": 0.8359, "step": 109 }, { "epoch": 0.054610897356336105, "grad_norm": 0.21230325768339284, "learning_rate": 4.9984527157045825e-06, "loss": 0.8218, "step": 110 }, { "epoch": 0.055107360059575526, "grad_norm": 0.19875350435158548, "learning_rate": 4.998418144233023e-06, "loss": 0.8115, "step": 111 }, { "epoch": 0.05560382276281494, "grad_norm": 0.19722218299068556, "learning_rate": 4.998383190918309e-06, "loss": 0.8409, "step": 112 }, { "epoch": 0.05610028546605436, "grad_norm": 0.19442312486929395, "learning_rate": 4.998347855765783e-06, "loss": 0.7861, "step": 113 }, { "epoch": 0.05659674816929378, "grad_norm": 0.19986199846633768, "learning_rate": 4.998312138780845e-06, "loss": 0.8412, "step": 114 }, { "epoch": 0.057093210872533204, "grad_norm": 0.19565952095909392, "learning_rate": 4.998276039968953e-06, "loss": 0.8415, "step": 115 }, { "epoch": 0.05758967357577262, "grad_norm": 0.19154584275348677, "learning_rate": 4.998239559335627e-06, "loss": 0.8631, "step": 116 }, { "epoch": 0.05808613627901204, "grad_norm": 0.19665878881747706, "learning_rate": 4.99820269688644e-06, "loss": 0.8598, "step": 117 }, { "epoch": 0.05858259898225146, "grad_norm": 0.2028047172295385, "learning_rate": 4.998165452627025e-06, "loss": 0.8683, "step": 118 }, { "epoch": 0.059079061685490875, "grad_norm": 0.2037639166726157, "learning_rate": 4.998127826563077e-06, "loss": 0.8112, "step": 119 }, { "epoch": 0.059575524388730296, "grad_norm": 0.1959085397655898, "learning_rate": 4.998089818700344e-06, "loss": 0.8463, "step": 120 }, { "epoch": 0.06007198709196972, "grad_norm": 0.19554313432021866, "learning_rate": 4.998051429044638e-06, "loss": 0.814, "step": 121 }, { "epoch": 0.06056844979520914, "grad_norm": 0.21327067999617655, "learning_rate": 4.998012657601823e-06, "loss": 0.8369, "step": 122 }, { "epoch": 0.06106491249844855, "grad_norm": 0.19482066937524098, "learning_rate": 4.997973504377826e-06, "loss": 0.8503, "step": 123 }, { "epoch": 0.06156137520168797, "grad_norm": 0.1921263735287261, "learning_rate": 4.99793396937863e-06, "loss": 0.8235, "step": 124 }, { "epoch": 0.062057837904927395, "grad_norm": 0.1912317820741509, "learning_rate": 4.99789405261028e-06, "loss": 0.7959, "step": 125 }, { "epoch": 0.06255430060816682, "grad_norm": 0.1930012329719747, "learning_rate": 4.997853754078873e-06, "loss": 0.8312, "step": 126 }, { "epoch": 0.06305076331140623, "grad_norm": 0.1854436986074747, "learning_rate": 4.997813073790571e-06, "loss": 0.7915, "step": 127 }, { "epoch": 0.06354722601464564, "grad_norm": 0.19266710662549197, "learning_rate": 4.997772011751589e-06, "loss": 0.8953, "step": 128 }, { "epoch": 0.06404368871788507, "grad_norm": 0.18944442325778477, "learning_rate": 4.9977305679682044e-06, "loss": 0.7999, "step": 129 }, { "epoch": 0.06454015142112449, "grad_norm": 0.19586350088433074, "learning_rate": 4.99768874244675e-06, "loss": 0.8091, "step": 130 }, { "epoch": 0.0650366141243639, "grad_norm": 0.19084476465979933, "learning_rate": 4.997646535193618e-06, "loss": 0.8733, "step": 131 }, { "epoch": 0.06553307682760333, "grad_norm": 0.1884614630341233, "learning_rate": 4.997603946215262e-06, "loss": 0.8106, "step": 132 }, { "epoch": 0.06602953953084274, "grad_norm": 0.1932654239334205, "learning_rate": 4.9975609755181875e-06, "loss": 0.9149, "step": 133 }, { "epoch": 0.06652600223408217, "grad_norm": 0.1872082345438042, "learning_rate": 4.997517623108964e-06, "loss": 0.7794, "step": 134 }, { "epoch": 0.06702246493732159, "grad_norm": 0.1873912754282765, "learning_rate": 4.997473888994215e-06, "loss": 0.8577, "step": 135 }, { "epoch": 0.067518927640561, "grad_norm": 0.18175316620522922, "learning_rate": 4.997429773180627e-06, "loss": 0.8226, "step": 136 }, { "epoch": 0.06801539034380043, "grad_norm": 0.17684485991325427, "learning_rate": 4.997385275674942e-06, "loss": 0.7802, "step": 137 }, { "epoch": 0.06851185304703984, "grad_norm": 0.1824706517270434, "learning_rate": 4.99734039648396e-06, "loss": 0.7985, "step": 138 }, { "epoch": 0.06900831575027926, "grad_norm": 0.1799259476989057, "learning_rate": 4.997295135614539e-06, "loss": 0.8036, "step": 139 }, { "epoch": 0.06950477845351868, "grad_norm": 0.17823278052011338, "learning_rate": 4.997249493073598e-06, "loss": 0.8167, "step": 140 }, { "epoch": 0.0700012411567581, "grad_norm": 0.18252365188173084, "learning_rate": 4.997203468868113e-06, "loss": 0.8295, "step": 141 }, { "epoch": 0.07049770385999751, "grad_norm": 0.19736868569754917, "learning_rate": 4.997157063005117e-06, "loss": 0.8908, "step": 142 }, { "epoch": 0.07099416656323694, "grad_norm": 0.17947955446070774, "learning_rate": 4.997110275491702e-06, "loss": 0.8115, "step": 143 }, { "epoch": 0.07149062926647635, "grad_norm": 0.19402407753710812, "learning_rate": 4.997063106335021e-06, "loss": 0.8454, "step": 144 }, { "epoch": 0.07198709196971577, "grad_norm": 0.1887305860550766, "learning_rate": 4.99701555554228e-06, "loss": 0.8945, "step": 145 }, { "epoch": 0.0724835546729552, "grad_norm": 0.17558377313650073, "learning_rate": 4.99696762312075e-06, "loss": 0.8009, "step": 146 }, { "epoch": 0.07298001737619461, "grad_norm": 0.1718469916803291, "learning_rate": 4.9969193090777526e-06, "loss": 0.8143, "step": 147 }, { "epoch": 0.07347648007943403, "grad_norm": 0.17617757928734015, "learning_rate": 4.996870613420675e-06, "loss": 0.8217, "step": 148 }, { "epoch": 0.07397294278267345, "grad_norm": 0.1706971696924589, "learning_rate": 4.996821536156958e-06, "loss": 0.7952, "step": 149 }, { "epoch": 0.07446940548591287, "grad_norm": 0.18103564748682863, "learning_rate": 4.996772077294103e-06, "loss": 0.8214, "step": 150 }, { "epoch": 0.0749658681891523, "grad_norm": 0.18622213669793447, "learning_rate": 4.9967222368396686e-06, "loss": 0.8843, "step": 151 }, { "epoch": 0.07546233089239171, "grad_norm": 0.1707461817930418, "learning_rate": 4.9966720148012714e-06, "loss": 0.7733, "step": 152 }, { "epoch": 0.07595879359563112, "grad_norm": 0.15640519366869854, "learning_rate": 4.996621411186589e-06, "loss": 0.7887, "step": 153 }, { "epoch": 0.07645525629887055, "grad_norm": 0.1624602295916605, "learning_rate": 4.996570426003354e-06, "loss": 0.8471, "step": 154 }, { "epoch": 0.07695171900210997, "grad_norm": 0.18290118651942194, "learning_rate": 4.996519059259358e-06, "loss": 0.8467, "step": 155 }, { "epoch": 0.07744818170534938, "grad_norm": 0.15912757799549243, "learning_rate": 4.996467310962453e-06, "loss": 0.8005, "step": 156 }, { "epoch": 0.07794464440858881, "grad_norm": 0.1613389987950997, "learning_rate": 4.996415181120547e-06, "loss": 0.7892, "step": 157 }, { "epoch": 0.07844110711182822, "grad_norm": 0.1570677524283997, "learning_rate": 4.996362669741609e-06, "loss": 0.8286, "step": 158 }, { "epoch": 0.07893756981506764, "grad_norm": 0.16537699681870446, "learning_rate": 4.996309776833661e-06, "loss": 0.8273, "step": 159 }, { "epoch": 0.07943403251830707, "grad_norm": 0.17043045232874193, "learning_rate": 4.99625650240479e-06, "loss": 0.7966, "step": 160 }, { "epoch": 0.07993049522154648, "grad_norm": 0.15656286097485822, "learning_rate": 4.9962028464631365e-06, "loss": 0.8332, "step": 161 }, { "epoch": 0.0804269579247859, "grad_norm": 0.1776462779512006, "learning_rate": 4.9961488090169015e-06, "loss": 0.819, "step": 162 }, { "epoch": 0.08092342062802532, "grad_norm": 0.15898779205316405, "learning_rate": 4.996094390074345e-06, "loss": 0.8326, "step": 163 }, { "epoch": 0.08141988333126474, "grad_norm": 0.16851878425262465, "learning_rate": 4.996039589643782e-06, "loss": 0.8636, "step": 164 }, { "epoch": 0.08191634603450416, "grad_norm": 0.14941991731340487, "learning_rate": 4.995984407733588e-06, "loss": 0.7804, "step": 165 }, { "epoch": 0.08241280873774358, "grad_norm": 0.16021285793408913, "learning_rate": 4.995928844352198e-06, "loss": 0.8614, "step": 166 }, { "epoch": 0.08290927144098299, "grad_norm": 0.1592609781313818, "learning_rate": 4.995872899508103e-06, "loss": 0.8355, "step": 167 }, { "epoch": 0.08340573414422242, "grad_norm": 0.14893046373707292, "learning_rate": 4.995816573209854e-06, "loss": 0.8146, "step": 168 }, { "epoch": 0.08390219684746184, "grad_norm": 0.14994377964296016, "learning_rate": 4.995759865466059e-06, "loss": 0.8073, "step": 169 }, { "epoch": 0.08439865955070125, "grad_norm": 0.15057311866648648, "learning_rate": 4.995702776285385e-06, "loss": 0.8195, "step": 170 }, { "epoch": 0.08489512225394068, "grad_norm": 0.1462021842716386, "learning_rate": 4.995645305676558e-06, "loss": 0.8054, "step": 171 }, { "epoch": 0.08539158495718009, "grad_norm": 0.15612484218678557, "learning_rate": 4.995587453648359e-06, "loss": 0.8538, "step": 172 }, { "epoch": 0.0858880476604195, "grad_norm": 0.15826299784330564, "learning_rate": 4.995529220209633e-06, "loss": 0.7967, "step": 173 }, { "epoch": 0.08638451036365893, "grad_norm": 0.14934453023970579, "learning_rate": 4.9954706053692766e-06, "loss": 0.8191, "step": 174 }, { "epoch": 0.08688097306689835, "grad_norm": 0.14487398595953221, "learning_rate": 4.995411609136252e-06, "loss": 0.7913, "step": 175 }, { "epoch": 0.08737743577013776, "grad_norm": 0.2361253677063094, "learning_rate": 4.995352231519572e-06, "loss": 0.8506, "step": 176 }, { "epoch": 0.08787389847337719, "grad_norm": 0.1542240974761206, "learning_rate": 4.995292472528315e-06, "loss": 0.8198, "step": 177 }, { "epoch": 0.0883703611766166, "grad_norm": 0.1533907925673139, "learning_rate": 4.9952323321716114e-06, "loss": 0.8095, "step": 178 }, { "epoch": 0.08886682387985602, "grad_norm": 0.14883299168355707, "learning_rate": 4.995171810458654e-06, "loss": 0.8492, "step": 179 }, { "epoch": 0.08936328658309545, "grad_norm": 0.14717378708658913, "learning_rate": 4.995110907398693e-06, "loss": 0.7989, "step": 180 }, { "epoch": 0.08985974928633486, "grad_norm": 0.1432697352230233, "learning_rate": 4.995049623001036e-06, "loss": 0.8112, "step": 181 }, { "epoch": 0.09035621198957429, "grad_norm": 0.14420342048963478, "learning_rate": 4.994987957275048e-06, "loss": 0.8113, "step": 182 }, { "epoch": 0.0908526746928137, "grad_norm": 0.1484614971553416, "learning_rate": 4.994925910230156e-06, "loss": 0.7928, "step": 183 }, { "epoch": 0.09134913739605312, "grad_norm": 0.14303299403206174, "learning_rate": 4.994863481875842e-06, "loss": 0.821, "step": 184 }, { "epoch": 0.09184560009929255, "grad_norm": 0.14990402029979874, "learning_rate": 4.9948006722216456e-06, "loss": 0.7919, "step": 185 }, { "epoch": 0.09234206280253196, "grad_norm": 0.15676689121485238, "learning_rate": 4.9947374812771675e-06, "loss": 0.8475, "step": 186 }, { "epoch": 0.09283852550577137, "grad_norm": 0.14836071218925942, "learning_rate": 4.994673909052067e-06, "loss": 0.81, "step": 187 }, { "epoch": 0.0933349882090108, "grad_norm": 0.14051092402883178, "learning_rate": 4.994609955556057e-06, "loss": 0.7896, "step": 188 }, { "epoch": 0.09383145091225022, "grad_norm": 0.13964829094096, "learning_rate": 4.994545620798914e-06, "loss": 0.8095, "step": 189 }, { "epoch": 0.09432791361548963, "grad_norm": 0.13916395003469767, "learning_rate": 4.994480904790469e-06, "loss": 0.7818, "step": 190 }, { "epoch": 0.09482437631872906, "grad_norm": 0.13992841292678654, "learning_rate": 4.994415807540616e-06, "loss": 0.8511, "step": 191 }, { "epoch": 0.09532083902196847, "grad_norm": 0.1382517091115567, "learning_rate": 4.9943503290593e-06, "loss": 0.804, "step": 192 }, { "epoch": 0.09581730172520789, "grad_norm": 0.13828160588381802, "learning_rate": 4.99428446935653e-06, "loss": 0.7945, "step": 193 }, { "epoch": 0.09631376442844732, "grad_norm": 0.14059132138612335, "learning_rate": 4.9942182284423715e-06, "loss": 0.8127, "step": 194 }, { "epoch": 0.09681022713168673, "grad_norm": 0.14342500046524775, "learning_rate": 4.994151606326949e-06, "loss": 0.7869, "step": 195 }, { "epoch": 0.09730668983492616, "grad_norm": 0.15328257822811497, "learning_rate": 4.994084603020444e-06, "loss": 0.7969, "step": 196 }, { "epoch": 0.09780315253816557, "grad_norm": 0.15036264302541547, "learning_rate": 4.9940172185330975e-06, "loss": 0.8021, "step": 197 }, { "epoch": 0.09829961524140499, "grad_norm": 0.13631880610103, "learning_rate": 4.993949452875208e-06, "loss": 0.8062, "step": 198 }, { "epoch": 0.09879607794464441, "grad_norm": 0.1370149045617051, "learning_rate": 4.993881306057131e-06, "loss": 0.7964, "step": 199 }, { "epoch": 0.09929254064788383, "grad_norm": 0.14003683139998516, "learning_rate": 4.993812778089283e-06, "loss": 0.7829, "step": 200 }, { "epoch": 0.09978900335112324, "grad_norm": 0.13364321729142267, "learning_rate": 4.993743868982137e-06, "loss": 0.7435, "step": 201 }, { "epoch": 0.10028546605436267, "grad_norm": 0.1432644408384139, "learning_rate": 4.993674578746225e-06, "loss": 0.8404, "step": 202 }, { "epoch": 0.10078192875760209, "grad_norm": 0.14022940831071956, "learning_rate": 4.9936049073921365e-06, "loss": 0.7916, "step": 203 }, { "epoch": 0.1012783914608415, "grad_norm": 0.1431480895121353, "learning_rate": 4.99353485493052e-06, "loss": 0.7825, "step": 204 }, { "epoch": 0.10177485416408093, "grad_norm": 0.1537315485269007, "learning_rate": 4.993464421372081e-06, "loss": 0.8018, "step": 205 }, { "epoch": 0.10227131686732034, "grad_norm": 0.13149782403488902, "learning_rate": 4.993393606727587e-06, "loss": 0.748, "step": 206 }, { "epoch": 0.10276777957055976, "grad_norm": 0.13795599982306228, "learning_rate": 4.993322411007857e-06, "loss": 0.7745, "step": 207 }, { "epoch": 0.10326424227379918, "grad_norm": 0.14263948789480718, "learning_rate": 4.993250834223774e-06, "loss": 0.8286, "step": 208 }, { "epoch": 0.1037607049770386, "grad_norm": 0.1322915151191568, "learning_rate": 4.9931788763862774e-06, "loss": 0.7765, "step": 209 }, { "epoch": 0.10425716768027801, "grad_norm": 0.1411982251684298, "learning_rate": 4.993106537506365e-06, "loss": 0.7978, "step": 210 }, { "epoch": 0.10475363038351744, "grad_norm": 0.14287120534895575, "learning_rate": 4.993033817595092e-06, "loss": 0.8119, "step": 211 }, { "epoch": 0.10525009308675685, "grad_norm": 0.1457313365849308, "learning_rate": 4.992960716663572e-06, "loss": 0.8391, "step": 212 }, { "epoch": 0.10574655578999628, "grad_norm": 0.1374862675337484, "learning_rate": 4.992887234722978e-06, "loss": 0.8284, "step": 213 }, { "epoch": 0.1062430184932357, "grad_norm": 0.13532768487001728, "learning_rate": 4.992813371784542e-06, "loss": 0.7933, "step": 214 }, { "epoch": 0.10673948119647511, "grad_norm": 0.14619055294203817, "learning_rate": 4.99273912785955e-06, "loss": 0.8232, "step": 215 }, { "epoch": 0.10723594389971454, "grad_norm": 0.13334770368704213, "learning_rate": 4.992664502959351e-06, "loss": 0.7987, "step": 216 }, { "epoch": 0.10773240660295395, "grad_norm": 0.1386527129272213, "learning_rate": 4.99258949709535e-06, "loss": 0.8385, "step": 217 }, { "epoch": 0.10822886930619337, "grad_norm": 0.13307297628479753, "learning_rate": 4.99251411027901e-06, "loss": 0.7945, "step": 218 }, { "epoch": 0.1087253320094328, "grad_norm": 0.1405179423764166, "learning_rate": 4.992438342521851e-06, "loss": 0.8591, "step": 219 }, { "epoch": 0.10922179471267221, "grad_norm": 0.14156609276835416, "learning_rate": 4.992362193835456e-06, "loss": 0.8333, "step": 220 }, { "epoch": 0.10971825741591162, "grad_norm": 0.135514778316713, "learning_rate": 4.992285664231462e-06, "loss": 0.7784, "step": 221 }, { "epoch": 0.11021472011915105, "grad_norm": 0.13722582342328407, "learning_rate": 4.992208753721564e-06, "loss": 0.8339, "step": 222 }, { "epoch": 0.11071118282239047, "grad_norm": 0.1354247771990746, "learning_rate": 4.992131462317518e-06, "loss": 0.828, "step": 223 }, { "epoch": 0.11120764552562988, "grad_norm": 0.13994897440630472, "learning_rate": 4.992053790031136e-06, "loss": 0.8453, "step": 224 }, { "epoch": 0.11170410822886931, "grad_norm": 0.13384734719071256, "learning_rate": 4.9919757368742895e-06, "loss": 0.8066, "step": 225 }, { "epoch": 0.11220057093210872, "grad_norm": 0.13127250934166673, "learning_rate": 4.991897302858908e-06, "loss": 0.7916, "step": 226 }, { "epoch": 0.11269703363534815, "grad_norm": 0.14039582163447972, "learning_rate": 4.9918184879969765e-06, "loss": 0.8656, "step": 227 }, { "epoch": 0.11319349633858757, "grad_norm": 0.1470635342507985, "learning_rate": 4.991739292300544e-06, "loss": 0.8278, "step": 228 }, { "epoch": 0.11368995904182698, "grad_norm": 0.14174626233120474, "learning_rate": 4.991659715781712e-06, "loss": 0.823, "step": 229 }, { "epoch": 0.11418642174506641, "grad_norm": 0.13862626411027507, "learning_rate": 4.991579758452644e-06, "loss": 0.8278, "step": 230 }, { "epoch": 0.11468288444830582, "grad_norm": 0.13959224442195398, "learning_rate": 4.991499420325558e-06, "loss": 0.8252, "step": 231 }, { "epoch": 0.11517934715154524, "grad_norm": 0.13884826767573266, "learning_rate": 4.991418701412735e-06, "loss": 0.8404, "step": 232 }, { "epoch": 0.11567580985478466, "grad_norm": 0.1389057416612581, "learning_rate": 4.991337601726509e-06, "loss": 0.8106, "step": 233 }, { "epoch": 0.11617227255802408, "grad_norm": 0.13638785848050972, "learning_rate": 4.991256121279277e-06, "loss": 0.8069, "step": 234 }, { "epoch": 0.11666873526126349, "grad_norm": 0.1338672028203034, "learning_rate": 4.991174260083491e-06, "loss": 0.8019, "step": 235 }, { "epoch": 0.11716519796450292, "grad_norm": 0.14097764226298978, "learning_rate": 4.991092018151663e-06, "loss": 0.846, "step": 236 }, { "epoch": 0.11766166066774233, "grad_norm": 0.13455102270597805, "learning_rate": 4.991009395496361e-06, "loss": 0.8056, "step": 237 }, { "epoch": 0.11815812337098175, "grad_norm": 0.14555871706504694, "learning_rate": 4.9909263921302135e-06, "loss": 0.8075, "step": 238 }, { "epoch": 0.11865458607422118, "grad_norm": 0.14461201544386468, "learning_rate": 4.990843008065905e-06, "loss": 0.8704, "step": 239 }, { "epoch": 0.11915104877746059, "grad_norm": 0.1419964925817203, "learning_rate": 4.9907592433161815e-06, "loss": 0.792, "step": 240 }, { "epoch": 0.1196475114807, "grad_norm": 0.13759060310549415, "learning_rate": 4.990675097893843e-06, "loss": 0.7836, "step": 241 }, { "epoch": 0.12014397418393943, "grad_norm": 0.14605868485481172, "learning_rate": 4.9905905718117505e-06, "loss": 0.836, "step": 242 }, { "epoch": 0.12064043688717885, "grad_norm": 0.1373934253609785, "learning_rate": 4.990505665082824e-06, "loss": 0.7723, "step": 243 }, { "epoch": 0.12113689959041828, "grad_norm": 0.13865838334876657, "learning_rate": 4.9904203777200375e-06, "loss": 0.7898, "step": 244 }, { "epoch": 0.12163336229365769, "grad_norm": 0.1390399518577959, "learning_rate": 4.990334709736428e-06, "loss": 0.7869, "step": 245 }, { "epoch": 0.1221298249968971, "grad_norm": 0.14077068851100413, "learning_rate": 4.990248661145087e-06, "loss": 0.8108, "step": 246 }, { "epoch": 0.12262628770013653, "grad_norm": 0.1337469353425711, "learning_rate": 4.9901622319591665e-06, "loss": 0.8248, "step": 247 }, { "epoch": 0.12312275040337595, "grad_norm": 0.13685347748788432, "learning_rate": 4.9900754221918766e-06, "loss": 0.8252, "step": 248 }, { "epoch": 0.12361921310661536, "grad_norm": 0.13611377917936257, "learning_rate": 4.989988231856483e-06, "loss": 0.7827, "step": 249 }, { "epoch": 0.12411567580985479, "grad_norm": 0.13974150757425385, "learning_rate": 4.989900660966312e-06, "loss": 0.8795, "step": 250 }, { "epoch": 0.1246121385130942, "grad_norm": 0.13278222798977063, "learning_rate": 4.9898127095347475e-06, "loss": 0.8178, "step": 251 }, { "epoch": 0.12510860121633363, "grad_norm": 0.13505628901465874, "learning_rate": 4.989724377575231e-06, "loss": 0.7909, "step": 252 }, { "epoch": 0.12560506391957305, "grad_norm": 0.1366672871248607, "learning_rate": 4.989635665101263e-06, "loss": 0.8262, "step": 253 }, { "epoch": 0.12610152662281246, "grad_norm": 0.14405211149440367, "learning_rate": 4.989546572126402e-06, "loss": 0.8606, "step": 254 }, { "epoch": 0.12659798932605187, "grad_norm": 0.14227617759563377, "learning_rate": 4.9894570986642655e-06, "loss": 0.8379, "step": 255 }, { "epoch": 0.1270944520292913, "grad_norm": 0.1469637596335691, "learning_rate": 4.989367244728526e-06, "loss": 0.8578, "step": 256 }, { "epoch": 0.12759091473253073, "grad_norm": 0.13275558591812953, "learning_rate": 4.989277010332917e-06, "loss": 0.7559, "step": 257 }, { "epoch": 0.12808737743577014, "grad_norm": 0.14791426685196862, "learning_rate": 4.989186395491229e-06, "loss": 0.844, "step": 258 }, { "epoch": 0.12858384013900956, "grad_norm": 0.13244945426039428, "learning_rate": 4.989095400217312e-06, "loss": 0.8062, "step": 259 }, { "epoch": 0.12908030284224897, "grad_norm": 0.13778000814954997, "learning_rate": 4.9890040245250725e-06, "loss": 0.8399, "step": 260 }, { "epoch": 0.1295767655454884, "grad_norm": 0.143797201748363, "learning_rate": 4.9889122684284765e-06, "loss": 0.7978, "step": 261 }, { "epoch": 0.1300732282487278, "grad_norm": 0.13685020353927613, "learning_rate": 4.988820131941547e-06, "loss": 0.7712, "step": 262 }, { "epoch": 0.13056969095196724, "grad_norm": 0.13664331543020586, "learning_rate": 4.988727615078365e-06, "loss": 0.7458, "step": 263 }, { "epoch": 0.13106615365520666, "grad_norm": 0.1499586905570859, "learning_rate": 4.988634717853071e-06, "loss": 0.8261, "step": 264 }, { "epoch": 0.13156261635844607, "grad_norm": 0.13982088441840343, "learning_rate": 4.988541440279862e-06, "loss": 0.8105, "step": 265 }, { "epoch": 0.13205907906168549, "grad_norm": 0.13605433414869017, "learning_rate": 4.988447782372996e-06, "loss": 0.7822, "step": 266 }, { "epoch": 0.1325555417649249, "grad_norm": 0.13252713875584649, "learning_rate": 4.988353744146784e-06, "loss": 0.7532, "step": 267 }, { "epoch": 0.13305200446816434, "grad_norm": 0.140502196927792, "learning_rate": 4.988259325615601e-06, "loss": 0.8733, "step": 268 }, { "epoch": 0.13354846717140376, "grad_norm": 0.13822527446456542, "learning_rate": 4.988164526793877e-06, "loss": 0.7752, "step": 269 }, { "epoch": 0.13404492987464317, "grad_norm": 0.13966558823867067, "learning_rate": 4.988069347696098e-06, "loss": 0.8037, "step": 270 }, { "epoch": 0.13454139257788258, "grad_norm": 0.14070793181761732, "learning_rate": 4.987973788336814e-06, "loss": 0.801, "step": 271 }, { "epoch": 0.135037855281122, "grad_norm": 0.13548015995353577, "learning_rate": 4.987877848730627e-06, "loss": 0.7917, "step": 272 }, { "epoch": 0.1355343179843614, "grad_norm": 0.13914099615548198, "learning_rate": 4.987781528892201e-06, "loss": 0.7604, "step": 273 }, { "epoch": 0.13603078068760086, "grad_norm": 0.1439655316233177, "learning_rate": 4.987684828836257e-06, "loss": 0.8128, "step": 274 }, { "epoch": 0.13652724339084027, "grad_norm": 0.13334244239454465, "learning_rate": 4.987587748577574e-06, "loss": 0.7762, "step": 275 }, { "epoch": 0.13702370609407968, "grad_norm": 0.13033286848134193, "learning_rate": 4.98749028813099e-06, "loss": 0.794, "step": 276 }, { "epoch": 0.1375201687973191, "grad_norm": 0.1263283049211498, "learning_rate": 4.987392447511398e-06, "loss": 0.7245, "step": 277 }, { "epoch": 0.1380166315005585, "grad_norm": 0.1346177373563915, "learning_rate": 4.987294226733753e-06, "loss": 0.825, "step": 278 }, { "epoch": 0.13851309420379793, "grad_norm": 0.13797046885885128, "learning_rate": 4.987195625813066e-06, "loss": 0.7799, "step": 279 }, { "epoch": 0.13900955690703737, "grad_norm": 0.13671450378929442, "learning_rate": 4.987096644764407e-06, "loss": 0.8186, "step": 280 }, { "epoch": 0.13950601961027678, "grad_norm": 0.13733243307369106, "learning_rate": 4.986997283602903e-06, "loss": 0.8646, "step": 281 }, { "epoch": 0.1400024823135162, "grad_norm": 0.13818658844648463, "learning_rate": 4.986897542343741e-06, "loss": 0.8053, "step": 282 }, { "epoch": 0.1404989450167556, "grad_norm": 0.1376323705656438, "learning_rate": 4.9867974210021634e-06, "loss": 0.8298, "step": 283 }, { "epoch": 0.14099540771999503, "grad_norm": 0.13132512407016342, "learning_rate": 4.986696919593473e-06, "loss": 0.7656, "step": 284 }, { "epoch": 0.14149187042323447, "grad_norm": 0.135693807444876, "learning_rate": 4.986596038133029e-06, "loss": 0.8259, "step": 285 }, { "epoch": 0.14198833312647388, "grad_norm": 0.14566445753533755, "learning_rate": 4.986494776636251e-06, "loss": 0.8459, "step": 286 }, { "epoch": 0.1424847958297133, "grad_norm": 0.14082471675828692, "learning_rate": 4.986393135118614e-06, "loss": 0.8634, "step": 287 }, { "epoch": 0.1429812585329527, "grad_norm": 0.13675780982750446, "learning_rate": 4.9862911135956525e-06, "loss": 0.7724, "step": 288 }, { "epoch": 0.14347772123619212, "grad_norm": 0.1387994309453966, "learning_rate": 4.986188712082959e-06, "loss": 0.7802, "step": 289 }, { "epoch": 0.14397418393943154, "grad_norm": 0.13354684023511523, "learning_rate": 4.986085930596184e-06, "loss": 0.8134, "step": 290 }, { "epoch": 0.14447064664267098, "grad_norm": 0.14594678291314142, "learning_rate": 4.985982769151035e-06, "loss": 0.799, "step": 291 }, { "epoch": 0.1449671093459104, "grad_norm": 0.14334448087535562, "learning_rate": 4.985879227763281e-06, "loss": 0.8697, "step": 292 }, { "epoch": 0.1454635720491498, "grad_norm": 0.13442178316157122, "learning_rate": 4.985775306448743e-06, "loss": 0.794, "step": 293 }, { "epoch": 0.14596003475238922, "grad_norm": 0.14958434833387912, "learning_rate": 4.985671005223308e-06, "loss": 0.8104, "step": 294 }, { "epoch": 0.14645649745562864, "grad_norm": 0.14089328532302936, "learning_rate": 4.985566324102913e-06, "loss": 0.8334, "step": 295 }, { "epoch": 0.14695296015886805, "grad_norm": 0.14930089897209584, "learning_rate": 4.98546126310356e-06, "loss": 0.8472, "step": 296 }, { "epoch": 0.1474494228621075, "grad_norm": 0.13838634635947197, "learning_rate": 4.9853558222413025e-06, "loss": 0.8081, "step": 297 }, { "epoch": 0.1479458855653469, "grad_norm": 0.13657833547456122, "learning_rate": 4.985250001532258e-06, "loss": 0.8046, "step": 298 }, { "epoch": 0.14844234826858632, "grad_norm": 0.13416933719640858, "learning_rate": 4.9851438009925985e-06, "loss": 0.7718, "step": 299 }, { "epoch": 0.14893881097182574, "grad_norm": 0.13305701512405635, "learning_rate": 4.985037220638556e-06, "loss": 0.7332, "step": 300 }, { "epoch": 0.14943527367506515, "grad_norm": 0.12255084911798615, "learning_rate": 4.9849302604864176e-06, "loss": 0.7301, "step": 301 }, { "epoch": 0.1499317363783046, "grad_norm": 0.1349582502588026, "learning_rate": 4.9848229205525325e-06, "loss": 0.8268, "step": 302 }, { "epoch": 0.150428199081544, "grad_norm": 0.13941501409088294, "learning_rate": 4.984715200853305e-06, "loss": 0.8187, "step": 303 }, { "epoch": 0.15092466178478342, "grad_norm": 0.13492743679489108, "learning_rate": 4.9846071014051985e-06, "loss": 0.7948, "step": 304 }, { "epoch": 0.15142112448802283, "grad_norm": 0.1652920462764793, "learning_rate": 4.984498622224734e-06, "loss": 0.834, "step": 305 }, { "epoch": 0.15191758719126225, "grad_norm": 0.13639513644830636, "learning_rate": 4.984389763328491e-06, "loss": 0.8263, "step": 306 }, { "epoch": 0.15241404989450166, "grad_norm": 0.13894535392298077, "learning_rate": 4.984280524733107e-06, "loss": 0.8399, "step": 307 }, { "epoch": 0.1529105125977411, "grad_norm": 0.1427172968985807, "learning_rate": 4.984170906455277e-06, "loss": 0.8176, "step": 308 }, { "epoch": 0.15340697530098052, "grad_norm": 0.13916573482217912, "learning_rate": 4.984060908511755e-06, "loss": 0.84, "step": 309 }, { "epoch": 0.15390343800421993, "grad_norm": 0.13640696194757118, "learning_rate": 4.983950530919352e-06, "loss": 0.8378, "step": 310 }, { "epoch": 0.15439990070745935, "grad_norm": 0.1269776959706502, "learning_rate": 4.983839773694937e-06, "loss": 0.7752, "step": 311 }, { "epoch": 0.15489636341069876, "grad_norm": 0.13735768715592372, "learning_rate": 4.983728636855438e-06, "loss": 0.8528, "step": 312 }, { "epoch": 0.1553928261139382, "grad_norm": 0.13601796887750978, "learning_rate": 4.983617120417841e-06, "loss": 0.7813, "step": 313 }, { "epoch": 0.15588928881717762, "grad_norm": 0.1394868538656493, "learning_rate": 4.983505224399188e-06, "loss": 0.8121, "step": 314 }, { "epoch": 0.15638575152041703, "grad_norm": 0.1393241198294074, "learning_rate": 4.983392948816582e-06, "loss": 0.8167, "step": 315 }, { "epoch": 0.15688221422365645, "grad_norm": 0.13601273115897386, "learning_rate": 4.9832802936871815e-06, "loss": 0.815, "step": 316 }, { "epoch": 0.15737867692689586, "grad_norm": 0.13309082027141247, "learning_rate": 4.983167259028205e-06, "loss": 0.7845, "step": 317 }, { "epoch": 0.15787513963013527, "grad_norm": 0.1328710618339079, "learning_rate": 4.983053844856928e-06, "loss": 0.837, "step": 318 }, { "epoch": 0.15837160233337472, "grad_norm": 0.1382512821366624, "learning_rate": 4.982940051190682e-06, "loss": 0.7642, "step": 319 }, { "epoch": 0.15886806503661413, "grad_norm": 0.13347990081206224, "learning_rate": 4.982825878046862e-06, "loss": 0.7786, "step": 320 }, { "epoch": 0.15936452773985355, "grad_norm": 0.1350212390870091, "learning_rate": 4.9827113254429144e-06, "loss": 0.7822, "step": 321 }, { "epoch": 0.15986099044309296, "grad_norm": 0.13659963300872746, "learning_rate": 4.982596393396348e-06, "loss": 0.7519, "step": 322 }, { "epoch": 0.16035745314633237, "grad_norm": 0.14307889455512066, "learning_rate": 4.982481081924728e-06, "loss": 0.8533, "step": 323 }, { "epoch": 0.1608539158495718, "grad_norm": 0.1330601424580335, "learning_rate": 4.982365391045679e-06, "loss": 0.8239, "step": 324 }, { "epoch": 0.16135037855281123, "grad_norm": 0.15026890457234857, "learning_rate": 4.982249320776882e-06, "loss": 0.8806, "step": 325 }, { "epoch": 0.16184684125605064, "grad_norm": 0.12974893713522884, "learning_rate": 4.982132871136075e-06, "loss": 0.7378, "step": 326 }, { "epoch": 0.16234330395929006, "grad_norm": 0.1444247544686782, "learning_rate": 4.9820160421410575e-06, "loss": 0.8243, "step": 327 }, { "epoch": 0.16283976666252947, "grad_norm": 0.1323450801185765, "learning_rate": 4.981898833809684e-06, "loss": 0.7696, "step": 328 }, { "epoch": 0.1633362293657689, "grad_norm": 0.1353207749212687, "learning_rate": 4.981781246159867e-06, "loss": 0.7677, "step": 329 }, { "epoch": 0.16383269206900833, "grad_norm": 0.12787497196232483, "learning_rate": 4.98166327920958e-06, "loss": 0.7655, "step": 330 }, { "epoch": 0.16432915477224774, "grad_norm": 0.13359250170701767, "learning_rate": 4.9815449329768505e-06, "loss": 0.8392, "step": 331 }, { "epoch": 0.16482561747548716, "grad_norm": 0.13162070870865217, "learning_rate": 4.981426207479767e-06, "loss": 0.8023, "step": 332 }, { "epoch": 0.16532208017872657, "grad_norm": 0.1340847314964679, "learning_rate": 4.981307102736474e-06, "loss": 0.8049, "step": 333 }, { "epoch": 0.16581854288196599, "grad_norm": 0.13799078318837465, "learning_rate": 4.981187618765175e-06, "loss": 0.801, "step": 334 }, { "epoch": 0.1663150055852054, "grad_norm": 0.1352495083171962, "learning_rate": 4.981067755584131e-06, "loss": 0.8028, "step": 335 }, { "epoch": 0.16681146828844484, "grad_norm": 0.13424131175066562, "learning_rate": 4.9809475132116624e-06, "loss": 0.8148, "step": 336 }, { "epoch": 0.16730793099168426, "grad_norm": 0.13380623256179228, "learning_rate": 4.980826891666145e-06, "loss": 0.7906, "step": 337 }, { "epoch": 0.16780439369492367, "grad_norm": 0.13711848434016552, "learning_rate": 4.980705890966014e-06, "loss": 0.7818, "step": 338 }, { "epoch": 0.16830085639816308, "grad_norm": 0.1408936385231929, "learning_rate": 4.980584511129763e-06, "loss": 0.8319, "step": 339 }, { "epoch": 0.1687973191014025, "grad_norm": 0.1361241205412377, "learning_rate": 4.980462752175943e-06, "loss": 0.7816, "step": 340 }, { "epoch": 0.1692937818046419, "grad_norm": 0.13336550378881848, "learning_rate": 4.980340614123162e-06, "loss": 0.7656, "step": 341 }, { "epoch": 0.16979024450788135, "grad_norm": 0.1291163574188604, "learning_rate": 4.980218096990087e-06, "loss": 0.7573, "step": 342 }, { "epoch": 0.17028670721112077, "grad_norm": 0.1331261115959964, "learning_rate": 4.980095200795443e-06, "loss": 0.7838, "step": 343 }, { "epoch": 0.17078316991436018, "grad_norm": 0.13606378865652022, "learning_rate": 4.979971925558014e-06, "loss": 0.817, "step": 344 }, { "epoch": 0.1712796326175996, "grad_norm": 0.12897271331540627, "learning_rate": 4.979848271296639e-06, "loss": 0.7615, "step": 345 }, { "epoch": 0.171776095320839, "grad_norm": 0.14023828743325562, "learning_rate": 4.979724238030217e-06, "loss": 0.7834, "step": 346 }, { "epoch": 0.17227255802407845, "grad_norm": 0.13830905999777027, "learning_rate": 4.979599825777704e-06, "loss": 0.8302, "step": 347 }, { "epoch": 0.17276902072731787, "grad_norm": 0.1428666780776237, "learning_rate": 4.979475034558115e-06, "loss": 0.8588, "step": 348 }, { "epoch": 0.17326548343055728, "grad_norm": 0.128916884054928, "learning_rate": 4.979349864390523e-06, "loss": 0.7776, "step": 349 }, { "epoch": 0.1737619461337967, "grad_norm": 0.13925327403441065, "learning_rate": 4.9792243152940576e-06, "loss": 0.8802, "step": 350 }, { "epoch": 0.1742584088370361, "grad_norm": 0.1363578403401138, "learning_rate": 4.979098387287907e-06, "loss": 0.8118, "step": 351 }, { "epoch": 0.17475487154027552, "grad_norm": 0.14256811683930512, "learning_rate": 4.978972080391317e-06, "loss": 0.7879, "step": 352 }, { "epoch": 0.17525133424351497, "grad_norm": 0.13404514324949623, "learning_rate": 4.978845394623591e-06, "loss": 0.7738, "step": 353 }, { "epoch": 0.17574779694675438, "grad_norm": 0.14672131580692716, "learning_rate": 4.978718330004093e-06, "loss": 0.8091, "step": 354 }, { "epoch": 0.1762442596499938, "grad_norm": 0.13482860481948047, "learning_rate": 4.978590886552241e-06, "loss": 0.7855, "step": 355 }, { "epoch": 0.1767407223532332, "grad_norm": 0.1390863998270458, "learning_rate": 4.978463064287513e-06, "loss": 0.8136, "step": 356 }, { "epoch": 0.17723718505647262, "grad_norm": 0.12766825836109758, "learning_rate": 4.978334863229445e-06, "loss": 0.7519, "step": 357 }, { "epoch": 0.17773364775971204, "grad_norm": 0.13957492610638372, "learning_rate": 4.97820628339763e-06, "loss": 0.8374, "step": 358 }, { "epoch": 0.17823011046295148, "grad_norm": 0.13624109938717846, "learning_rate": 4.97807732481172e-06, "loss": 0.8468, "step": 359 }, { "epoch": 0.1787265731661909, "grad_norm": 0.1371796025283825, "learning_rate": 4.977947987491424e-06, "loss": 0.8134, "step": 360 }, { "epoch": 0.1792230358694303, "grad_norm": 0.13791288379959807, "learning_rate": 4.977818271456508e-06, "loss": 0.8523, "step": 361 }, { "epoch": 0.17971949857266972, "grad_norm": 0.13682556966382917, "learning_rate": 4.977688176726799e-06, "loss": 0.7998, "step": 362 }, { "epoch": 0.18021596127590914, "grad_norm": 0.12914631502678342, "learning_rate": 4.977557703322178e-06, "loss": 0.7715, "step": 363 }, { "epoch": 0.18071242397914858, "grad_norm": 0.13450502572602166, "learning_rate": 4.977426851262588e-06, "loss": 0.7999, "step": 364 }, { "epoch": 0.181208886682388, "grad_norm": 0.1324950964120947, "learning_rate": 4.977295620568025e-06, "loss": 0.773, "step": 365 }, { "epoch": 0.1817053493856274, "grad_norm": 0.13291470130623328, "learning_rate": 4.977164011258547e-06, "loss": 0.8047, "step": 366 }, { "epoch": 0.18220181208886682, "grad_norm": 0.14303063683318232, "learning_rate": 4.977032023354269e-06, "loss": 0.8361, "step": 367 }, { "epoch": 0.18269827479210624, "grad_norm": 0.14299058880067156, "learning_rate": 4.976899656875361e-06, "loss": 0.8172, "step": 368 }, { "epoch": 0.18319473749534565, "grad_norm": 0.13663382187886108, "learning_rate": 4.976766911842056e-06, "loss": 0.7529, "step": 369 }, { "epoch": 0.1836912001985851, "grad_norm": 0.13831178738187494, "learning_rate": 4.9766337882746395e-06, "loss": 0.7863, "step": 370 }, { "epoch": 0.1841876629018245, "grad_norm": 0.13456481368960774, "learning_rate": 4.976500286193458e-06, "loss": 0.8505, "step": 371 }, { "epoch": 0.18468412560506392, "grad_norm": 0.13582472070675122, "learning_rate": 4.976366405618916e-06, "loss": 0.8036, "step": 372 }, { "epoch": 0.18518058830830333, "grad_norm": 0.13588434233209187, "learning_rate": 4.976232146571476e-06, "loss": 0.7814, "step": 373 }, { "epoch": 0.18567705101154275, "grad_norm": 0.13408060178137982, "learning_rate": 4.976097509071654e-06, "loss": 0.824, "step": 374 }, { "epoch": 0.1861735137147822, "grad_norm": 0.13739925696360844, "learning_rate": 4.975962493140029e-06, "loss": 0.8233, "step": 375 }, { "epoch": 0.1866699764180216, "grad_norm": 0.12982209493306304, "learning_rate": 4.9758270987972356e-06, "loss": 0.7537, "step": 376 }, { "epoch": 0.18716643912126102, "grad_norm": 0.13310617104329844, "learning_rate": 4.975691326063968e-06, "loss": 0.7771, "step": 377 }, { "epoch": 0.18766290182450043, "grad_norm": 0.13823931193705333, "learning_rate": 4.9755551749609755e-06, "loss": 0.7723, "step": 378 }, { "epoch": 0.18815936452773985, "grad_norm": 0.1382970645097716, "learning_rate": 4.975418645509066e-06, "loss": 0.8002, "step": 379 }, { "epoch": 0.18865582723097926, "grad_norm": 0.13785374560341188, "learning_rate": 4.975281737729109e-06, "loss": 0.7799, "step": 380 }, { "epoch": 0.1891522899342187, "grad_norm": 0.13630037342754214, "learning_rate": 4.975144451642024e-06, "loss": 0.7825, "step": 381 }, { "epoch": 0.18964875263745812, "grad_norm": 0.13742010823689682, "learning_rate": 4.975006787268797e-06, "loss": 0.7832, "step": 382 }, { "epoch": 0.19014521534069753, "grad_norm": 0.1372266789447449, "learning_rate": 4.974868744630467e-06, "loss": 0.7805, "step": 383 }, { "epoch": 0.19064167804393695, "grad_norm": 0.13335756294028897, "learning_rate": 4.974730323748129e-06, "loss": 0.7747, "step": 384 }, { "epoch": 0.19113814074717636, "grad_norm": 0.1331325911806612, "learning_rate": 4.974591524642942e-06, "loss": 0.7626, "step": 385 }, { "epoch": 0.19163460345041577, "grad_norm": 0.13528286793265076, "learning_rate": 4.974452347336116e-06, "loss": 0.7906, "step": 386 }, { "epoch": 0.19213106615365522, "grad_norm": 0.13710197959682258, "learning_rate": 4.974312791848925e-06, "loss": 0.7775, "step": 387 }, { "epoch": 0.19262752885689463, "grad_norm": 0.13646907256302476, "learning_rate": 4.974172858202695e-06, "loss": 0.8266, "step": 388 }, { "epoch": 0.19312399156013405, "grad_norm": 0.14002294190339723, "learning_rate": 4.974032546418816e-06, "loss": 0.7807, "step": 389 }, { "epoch": 0.19362045426337346, "grad_norm": 0.14404269798290067, "learning_rate": 4.973891856518728e-06, "loss": 0.8195, "step": 390 }, { "epoch": 0.19411691696661287, "grad_norm": 0.13811976877791707, "learning_rate": 4.973750788523937e-06, "loss": 0.8008, "step": 391 }, { "epoch": 0.19461337966985232, "grad_norm": 0.14072300476936328, "learning_rate": 4.9736093424560005e-06, "loss": 0.7705, "step": 392 }, { "epoch": 0.19510984237309173, "grad_norm": 0.14423164849233763, "learning_rate": 4.973467518336538e-06, "loss": 0.7988, "step": 393 }, { "epoch": 0.19560630507633114, "grad_norm": 0.13418208376903598, "learning_rate": 4.973325316187225e-06, "loss": 0.7761, "step": 394 }, { "epoch": 0.19610276777957056, "grad_norm": 0.1316889322126393, "learning_rate": 4.973182736029793e-06, "loss": 0.847, "step": 395 }, { "epoch": 0.19659923048280997, "grad_norm": 0.14201288602040998, "learning_rate": 4.973039777886035e-06, "loss": 0.7743, "step": 396 }, { "epoch": 0.1970956931860494, "grad_norm": 0.14278977007281732, "learning_rate": 4.9728964417777986e-06, "loss": 0.7947, "step": 397 }, { "epoch": 0.19759215588928883, "grad_norm": 0.13398922706134572, "learning_rate": 4.972752727726992e-06, "loss": 0.75, "step": 398 }, { "epoch": 0.19808861859252824, "grad_norm": 0.1375355686898203, "learning_rate": 4.972608635755577e-06, "loss": 0.781, "step": 399 }, { "epoch": 0.19858508129576766, "grad_norm": 0.1371602165214636, "learning_rate": 4.972464165885579e-06, "loss": 0.823, "step": 400 }, { "epoch": 0.19908154399900707, "grad_norm": 0.14577906731021556, "learning_rate": 4.972319318139074e-06, "loss": 0.8288, "step": 401 }, { "epoch": 0.19957800670224649, "grad_norm": 0.15599884531010932, "learning_rate": 4.972174092538203e-06, "loss": 0.8007, "step": 402 }, { "epoch": 0.2000744694054859, "grad_norm": 0.13716250162699573, "learning_rate": 4.97202848910516e-06, "loss": 0.763, "step": 403 }, { "epoch": 0.20057093210872534, "grad_norm": 0.13013318815804806, "learning_rate": 4.9718825078622e-06, "loss": 0.7654, "step": 404 }, { "epoch": 0.20106739481196476, "grad_norm": 0.14489783733703743, "learning_rate": 4.971736148831631e-06, "loss": 0.8095, "step": 405 }, { "epoch": 0.20156385751520417, "grad_norm": 0.14178082134535308, "learning_rate": 4.971589412035823e-06, "loss": 0.7955, "step": 406 }, { "epoch": 0.20206032021844358, "grad_norm": 0.14527202694186261, "learning_rate": 4.971442297497202e-06, "loss": 0.8, "step": 407 }, { "epoch": 0.202556782921683, "grad_norm": 0.13886094340580113, "learning_rate": 4.971294805238252e-06, "loss": 0.8482, "step": 408 }, { "epoch": 0.20305324562492244, "grad_norm": 0.15143893194787292, "learning_rate": 4.971146935281517e-06, "loss": 0.7927, "step": 409 }, { "epoch": 0.20354970832816185, "grad_norm": 0.1636495293029831, "learning_rate": 4.970998687649593e-06, "loss": 0.8153, "step": 410 }, { "epoch": 0.20404617103140127, "grad_norm": 0.13453836074071723, "learning_rate": 4.97085006236514e-06, "loss": 0.7805, "step": 411 }, { "epoch": 0.20454263373464068, "grad_norm": 0.16194606706618664, "learning_rate": 4.970701059450872e-06, "loss": 0.821, "step": 412 }, { "epoch": 0.2050390964378801, "grad_norm": 0.13277679173675005, "learning_rate": 4.970551678929562e-06, "loss": 0.7262, "step": 413 }, { "epoch": 0.2055355591411195, "grad_norm": 0.13842208760542138, "learning_rate": 4.970401920824039e-06, "loss": 0.7754, "step": 414 }, { "epoch": 0.20603202184435895, "grad_norm": 0.13680895396236756, "learning_rate": 4.970251785157193e-06, "loss": 0.7984, "step": 415 }, { "epoch": 0.20652848454759837, "grad_norm": 0.1332819441058511, "learning_rate": 4.9701012719519694e-06, "loss": 0.7827, "step": 416 }, { "epoch": 0.20702494725083778, "grad_norm": 0.13155005386495136, "learning_rate": 4.969950381231371e-06, "loss": 0.7598, "step": 417 }, { "epoch": 0.2075214099540772, "grad_norm": 0.13812357614760978, "learning_rate": 4.969799113018459e-06, "loss": 0.7805, "step": 418 }, { "epoch": 0.2080178726573166, "grad_norm": 0.15001010565178097, "learning_rate": 4.9696474673363536e-06, "loss": 0.7916, "step": 419 }, { "epoch": 0.20851433536055602, "grad_norm": 0.14065673015444682, "learning_rate": 4.96949544420823e-06, "loss": 0.7902, "step": 420 }, { "epoch": 0.20901079806379547, "grad_norm": 0.13896425621312414, "learning_rate": 4.969343043657323e-06, "loss": 0.78, "step": 421 }, { "epoch": 0.20950726076703488, "grad_norm": 0.14937608116406495, "learning_rate": 4.969190265706926e-06, "loss": 0.8289, "step": 422 }, { "epoch": 0.2100037234702743, "grad_norm": 0.13459240584003007, "learning_rate": 4.969037110380387e-06, "loss": 0.7906, "step": 423 }, { "epoch": 0.2105001861735137, "grad_norm": 0.1484626362599989, "learning_rate": 4.968883577701112e-06, "loss": 0.8244, "step": 424 }, { "epoch": 0.21099664887675312, "grad_norm": 0.13015898606801415, "learning_rate": 4.9687296676925686e-06, "loss": 0.7431, "step": 425 }, { "epoch": 0.21149311157999257, "grad_norm": 0.1377331802046174, "learning_rate": 4.96857538037828e-06, "loss": 0.8043, "step": 426 }, { "epoch": 0.21198957428323198, "grad_norm": 0.1363174078519366, "learning_rate": 4.968420715781823e-06, "loss": 0.7584, "step": 427 }, { "epoch": 0.2124860369864714, "grad_norm": 0.14109517205489622, "learning_rate": 4.9682656739268385e-06, "loss": 0.7868, "step": 428 }, { "epoch": 0.2129824996897108, "grad_norm": 0.14476636902871076, "learning_rate": 4.968110254837022e-06, "loss": 0.8051, "step": 429 }, { "epoch": 0.21347896239295022, "grad_norm": 0.13488134316489944, "learning_rate": 4.967954458536126e-06, "loss": 0.7816, "step": 430 }, { "epoch": 0.21397542509618964, "grad_norm": 0.1311388707406287, "learning_rate": 4.967798285047961e-06, "loss": 0.7856, "step": 431 }, { "epoch": 0.21447188779942908, "grad_norm": 0.13733380279723623, "learning_rate": 4.967641734396397e-06, "loss": 0.8495, "step": 432 }, { "epoch": 0.2149683505026685, "grad_norm": 0.13619991947733362, "learning_rate": 4.967484806605359e-06, "loss": 0.7803, "step": 433 }, { "epoch": 0.2154648132059079, "grad_norm": 0.13862255789954744, "learning_rate": 4.967327501698831e-06, "loss": 0.7749, "step": 434 }, { "epoch": 0.21596127590914732, "grad_norm": 0.1451665257035334, "learning_rate": 4.967169819700856e-06, "loss": 0.7966, "step": 435 }, { "epoch": 0.21645773861238674, "grad_norm": 0.13309881671792553, "learning_rate": 4.967011760635532e-06, "loss": 0.749, "step": 436 }, { "epoch": 0.21695420131562618, "grad_norm": 0.14123567913488688, "learning_rate": 4.966853324527015e-06, "loss": 0.7611, "step": 437 }, { "epoch": 0.2174506640188656, "grad_norm": 0.1398898539392954, "learning_rate": 4.966694511399521e-06, "loss": 0.8463, "step": 438 }, { "epoch": 0.217947126722105, "grad_norm": 0.1372879573733997, "learning_rate": 4.9665353212773215e-06, "loss": 0.7858, "step": 439 }, { "epoch": 0.21844358942534442, "grad_norm": 0.13654858802550288, "learning_rate": 4.966375754184746e-06, "loss": 0.8215, "step": 440 }, { "epoch": 0.21894005212858383, "grad_norm": 0.1319581844818004, "learning_rate": 4.966215810146181e-06, "loss": 0.8257, "step": 441 }, { "epoch": 0.21943651483182325, "grad_norm": 0.13477793443314756, "learning_rate": 4.966055489186072e-06, "loss": 0.8037, "step": 442 }, { "epoch": 0.2199329775350627, "grad_norm": 0.13368048885447226, "learning_rate": 4.965894791328924e-06, "loss": 0.7658, "step": 443 }, { "epoch": 0.2204294402383021, "grad_norm": 0.13455924110626613, "learning_rate": 4.965733716599292e-06, "loss": 0.8163, "step": 444 }, { "epoch": 0.22092590294154152, "grad_norm": 0.1389198409505174, "learning_rate": 4.965572265021798e-06, "loss": 0.8417, "step": 445 }, { "epoch": 0.22142236564478093, "grad_norm": 0.13388284038750456, "learning_rate": 4.965410436621115e-06, "loss": 0.7774, "step": 446 }, { "epoch": 0.22191882834802035, "grad_norm": 0.132820811008653, "learning_rate": 4.965248231421977e-06, "loss": 0.8015, "step": 447 }, { "epoch": 0.22241529105125976, "grad_norm": 0.13543397173414973, "learning_rate": 4.965085649449175e-06, "loss": 0.8054, "step": 448 }, { "epoch": 0.2229117537544992, "grad_norm": 0.13391298366876886, "learning_rate": 4.964922690727555e-06, "loss": 0.8157, "step": 449 }, { "epoch": 0.22340821645773862, "grad_norm": 0.13119644805742203, "learning_rate": 4.964759355282024e-06, "loss": 0.7737, "step": 450 }, { "epoch": 0.22390467916097803, "grad_norm": 0.13789404014972487, "learning_rate": 4.964595643137544e-06, "loss": 0.8227, "step": 451 }, { "epoch": 0.22440114186421745, "grad_norm": 0.13702173759120256, "learning_rate": 4.964431554319138e-06, "loss": 0.7777, "step": 452 }, { "epoch": 0.22489760456745686, "grad_norm": 0.167958896728115, "learning_rate": 4.964267088851883e-06, "loss": 0.8295, "step": 453 }, { "epoch": 0.2253940672706963, "grad_norm": 0.1362052877173801, "learning_rate": 4.964102246760915e-06, "loss": 0.8139, "step": 454 }, { "epoch": 0.22589052997393572, "grad_norm": 0.13619122221235858, "learning_rate": 4.963937028071427e-06, "loss": 0.7996, "step": 455 }, { "epoch": 0.22638699267717513, "grad_norm": 0.13307543777024503, "learning_rate": 4.96377143280867e-06, "loss": 0.7634, "step": 456 }, { "epoch": 0.22688345538041454, "grad_norm": 0.13177673869505957, "learning_rate": 4.963605460997954e-06, "loss": 0.7707, "step": 457 }, { "epoch": 0.22737991808365396, "grad_norm": 0.13107526362132932, "learning_rate": 4.963439112664644e-06, "loss": 0.7883, "step": 458 }, { "epoch": 0.22787638078689337, "grad_norm": 0.13905062257304818, "learning_rate": 4.963272387834163e-06, "loss": 0.7869, "step": 459 }, { "epoch": 0.22837284349013282, "grad_norm": 0.14228112591091902, "learning_rate": 4.963105286531994e-06, "loss": 0.7684, "step": 460 }, { "epoch": 0.22886930619337223, "grad_norm": 0.1278341075030752, "learning_rate": 4.962937808783675e-06, "loss": 0.7381, "step": 461 }, { "epoch": 0.22936576889661164, "grad_norm": 0.13836582166759392, "learning_rate": 4.962769954614802e-06, "loss": 0.8072, "step": 462 }, { "epoch": 0.22986223159985106, "grad_norm": 0.13221136159222038, "learning_rate": 4.962601724051029e-06, "loss": 0.7667, "step": 463 }, { "epoch": 0.23035869430309047, "grad_norm": 0.1359691390529077, "learning_rate": 4.962433117118067e-06, "loss": 0.8016, "step": 464 }, { "epoch": 0.2308551570063299, "grad_norm": 0.13838185689016483, "learning_rate": 4.962264133841686e-06, "loss": 0.7823, "step": 465 }, { "epoch": 0.23135161970956933, "grad_norm": 0.13217829549783422, "learning_rate": 4.96209477424771e-06, "loss": 0.7233, "step": 466 }, { "epoch": 0.23184808241280874, "grad_norm": 0.12783947356664202, "learning_rate": 4.9619250383620256e-06, "loss": 0.7493, "step": 467 }, { "epoch": 0.23234454511604816, "grad_norm": 0.13083482079204406, "learning_rate": 4.961754926210572e-06, "loss": 0.7389, "step": 468 }, { "epoch": 0.23284100781928757, "grad_norm": 0.13464481187124352, "learning_rate": 4.9615844378193505e-06, "loss": 0.78, "step": 469 }, { "epoch": 0.23333747052252699, "grad_norm": 0.13684478446270137, "learning_rate": 4.961413573214415e-06, "loss": 0.8143, "step": 470 }, { "epoch": 0.23383393322576643, "grad_norm": 0.14377264892348118, "learning_rate": 4.9612423324218816e-06, "loss": 0.8265, "step": 471 }, { "epoch": 0.23433039592900584, "grad_norm": 0.13245307939757578, "learning_rate": 4.961070715467921e-06, "loss": 0.8093, "step": 472 }, { "epoch": 0.23482685863224526, "grad_norm": 0.1373381453121188, "learning_rate": 4.9608987223787606e-06, "loss": 0.7547, "step": 473 }, { "epoch": 0.23532332133548467, "grad_norm": 0.1370145480054436, "learning_rate": 4.960726353180688e-06, "loss": 0.8459, "step": 474 }, { "epoch": 0.23581978403872408, "grad_norm": 0.1375438658427002, "learning_rate": 4.960553607900047e-06, "loss": 0.7813, "step": 475 }, { "epoch": 0.2363162467419635, "grad_norm": 0.13778611231342322, "learning_rate": 4.96038048656324e-06, "loss": 0.7739, "step": 476 }, { "epoch": 0.23681270944520294, "grad_norm": 0.13166121819124513, "learning_rate": 4.9602069891967245e-06, "loss": 0.7708, "step": 477 }, { "epoch": 0.23730917214844235, "grad_norm": 0.1396399426393052, "learning_rate": 4.9600331158270175e-06, "loss": 0.7725, "step": 478 }, { "epoch": 0.23780563485168177, "grad_norm": 0.1392271735139755, "learning_rate": 4.959858866480691e-06, "loss": 0.7779, "step": 479 }, { "epoch": 0.23830209755492118, "grad_norm": 0.13001276373757875, "learning_rate": 4.959684241184379e-06, "loss": 0.7653, "step": 480 }, { "epoch": 0.2387985602581606, "grad_norm": 0.13464254447792998, "learning_rate": 4.959509239964768e-06, "loss": 0.7932, "step": 481 }, { "epoch": 0.2392950229614, "grad_norm": 0.13597319212243217, "learning_rate": 4.959333862848605e-06, "loss": 0.7934, "step": 482 }, { "epoch": 0.23979148566463945, "grad_norm": 0.14655344478202936, "learning_rate": 4.959158109862694e-06, "loss": 0.798, "step": 483 }, { "epoch": 0.24028794836787887, "grad_norm": 0.1305172241900655, "learning_rate": 4.958981981033895e-06, "loss": 0.7791, "step": 484 }, { "epoch": 0.24078441107111828, "grad_norm": 0.13605164916015544, "learning_rate": 4.958805476389127e-06, "loss": 0.8128, "step": 485 }, { "epoch": 0.2412808737743577, "grad_norm": 0.13833174537228857, "learning_rate": 4.958628595955366e-06, "loss": 0.7694, "step": 486 }, { "epoch": 0.2417773364775971, "grad_norm": 0.1358842375740616, "learning_rate": 4.958451339759645e-06, "loss": 0.7786, "step": 487 }, { "epoch": 0.24227379918083655, "grad_norm": 0.13943485857687118, "learning_rate": 4.9582737078290556e-06, "loss": 0.8026, "step": 488 }, { "epoch": 0.24277026188407597, "grad_norm": 0.1341222818235527, "learning_rate": 4.958095700190745e-06, "loss": 0.821, "step": 489 }, { "epoch": 0.24326672458731538, "grad_norm": 0.13304092780884888, "learning_rate": 4.957917316871919e-06, "loss": 0.8058, "step": 490 }, { "epoch": 0.2437631872905548, "grad_norm": 0.13446637676436832, "learning_rate": 4.957738557899841e-06, "loss": 0.8165, "step": 491 }, { "epoch": 0.2442596499937942, "grad_norm": 0.1355754492629554, "learning_rate": 4.9575594233018305e-06, "loss": 0.84, "step": 492 }, { "epoch": 0.24475611269703362, "grad_norm": 0.1326880486992906, "learning_rate": 4.957379913105267e-06, "loss": 0.7595, "step": 493 }, { "epoch": 0.24525257540027307, "grad_norm": 0.13614000382079813, "learning_rate": 4.957200027337585e-06, "loss": 0.7988, "step": 494 }, { "epoch": 0.24574903810351248, "grad_norm": 0.12773189423591907, "learning_rate": 4.957019766026277e-06, "loss": 0.7577, "step": 495 }, { "epoch": 0.2462455008067519, "grad_norm": 0.14086258045001926, "learning_rate": 4.956839129198892e-06, "loss": 0.8078, "step": 496 }, { "epoch": 0.2467419635099913, "grad_norm": 0.13555212844444037, "learning_rate": 4.95665811688304e-06, "loss": 0.8094, "step": 497 }, { "epoch": 0.24723842621323072, "grad_norm": 0.13674333466288918, "learning_rate": 4.9564767291063844e-06, "loss": 0.7807, "step": 498 }, { "epoch": 0.24773488891647016, "grad_norm": 0.1304786699944242, "learning_rate": 4.956294965896647e-06, "loss": 0.7614, "step": 499 }, { "epoch": 0.24823135161970958, "grad_norm": 0.13848355027573125, "learning_rate": 4.956112827281607e-06, "loss": 0.8078, "step": 500 }, { "epoch": 0.248727814322949, "grad_norm": 0.13212981378633934, "learning_rate": 4.955930313289102e-06, "loss": 0.7895, "step": 501 }, { "epoch": 0.2492242770261884, "grad_norm": 0.134424879179094, "learning_rate": 4.955747423947027e-06, "loss": 0.8235, "step": 502 }, { "epoch": 0.24972073972942782, "grad_norm": 0.12855995535715545, "learning_rate": 4.955564159283334e-06, "loss": 0.7454, "step": 503 }, { "epoch": 0.25021720243266726, "grad_norm": 0.13938742116853411, "learning_rate": 4.95538051932603e-06, "loss": 0.7975, "step": 504 }, { "epoch": 0.25021720243266726, "eval_loss": 0.7929754853248596, "eval_runtime": 135.44, "eval_samples_per_second": 224.107, "eval_steps_per_second": 28.02, "step": 504 }, { "epoch": 0.25071366513590665, "grad_norm": 0.13609034177916032, "learning_rate": 4.9551965041031835e-06, "loss": 0.7783, "step": 505 }, { "epoch": 0.2512101278391461, "grad_norm": 0.13139635870746627, "learning_rate": 4.955012113642916e-06, "loss": 0.7706, "step": 506 }, { "epoch": 0.2517065905423855, "grad_norm": 0.13959142837209643, "learning_rate": 4.954827347973412e-06, "loss": 0.8491, "step": 507 }, { "epoch": 0.2522030532456249, "grad_norm": 0.15436767440806537, "learning_rate": 4.954642207122907e-06, "loss": 0.8424, "step": 508 }, { "epoch": 0.25269951594886436, "grad_norm": 0.14047650581011453, "learning_rate": 4.954456691119698e-06, "loss": 0.7782, "step": 509 }, { "epoch": 0.25319597865210375, "grad_norm": 0.13512624112535368, "learning_rate": 4.954270799992138e-06, "loss": 0.7552, "step": 510 }, { "epoch": 0.2536924413553432, "grad_norm": 0.13165016338907254, "learning_rate": 4.954084533768637e-06, "loss": 0.7652, "step": 511 }, { "epoch": 0.2541889040585826, "grad_norm": 0.13432352709536072, "learning_rate": 4.953897892477664e-06, "loss": 0.7549, "step": 512 }, { "epoch": 0.254685366761822, "grad_norm": 0.13825583852964196, "learning_rate": 4.953710876147743e-06, "loss": 0.7461, "step": 513 }, { "epoch": 0.25518182946506146, "grad_norm": 0.13800340620252008, "learning_rate": 4.953523484807456e-06, "loss": 0.8118, "step": 514 }, { "epoch": 0.25567829216830085, "grad_norm": 0.1357923592978122, "learning_rate": 4.9533357184854454e-06, "loss": 0.8104, "step": 515 }, { "epoch": 0.2561747548715403, "grad_norm": 0.13697526329337478, "learning_rate": 4.953147577210406e-06, "loss": 0.853, "step": 516 }, { "epoch": 0.2566712175747797, "grad_norm": 0.13197113130506147, "learning_rate": 4.952959061011091e-06, "loss": 0.7424, "step": 517 }, { "epoch": 0.2571676802780191, "grad_norm": 0.13803258269495886, "learning_rate": 4.952770169916316e-06, "loss": 0.8018, "step": 518 }, { "epoch": 0.25766414298125856, "grad_norm": 0.13462239799861447, "learning_rate": 4.952580903954946e-06, "loss": 0.7604, "step": 519 }, { "epoch": 0.25816060568449795, "grad_norm": 0.13185857943402207, "learning_rate": 4.95239126315591e-06, "loss": 0.7809, "step": 520 }, { "epoch": 0.2586570683877374, "grad_norm": 0.13281620469964295, "learning_rate": 4.95220124754819e-06, "loss": 0.7726, "step": 521 }, { "epoch": 0.2591535310909768, "grad_norm": 0.14105017206726814, "learning_rate": 4.952010857160828e-06, "loss": 0.785, "step": 522 }, { "epoch": 0.2596499937942162, "grad_norm": 0.1439691078823198, "learning_rate": 4.951820092022921e-06, "loss": 0.7934, "step": 523 }, { "epoch": 0.2601464564974556, "grad_norm": 0.1382892013792061, "learning_rate": 4.951628952163625e-06, "loss": 0.7892, "step": 524 }, { "epoch": 0.26064291920069504, "grad_norm": 0.13861843984858735, "learning_rate": 4.951437437612152e-06, "loss": 0.8235, "step": 525 }, { "epoch": 0.2611393819039345, "grad_norm": 0.14191335563414534, "learning_rate": 4.951245548397773e-06, "loss": 0.8068, "step": 526 }, { "epoch": 0.2616358446071739, "grad_norm": 0.13893025334242587, "learning_rate": 4.951053284549815e-06, "loss": 0.7718, "step": 527 }, { "epoch": 0.2621323073104133, "grad_norm": 0.128500724810349, "learning_rate": 4.950860646097661e-06, "loss": 0.7775, "step": 528 }, { "epoch": 0.2626287700136527, "grad_norm": 0.14778687944277374, "learning_rate": 4.950667633070755e-06, "loss": 0.8259, "step": 529 }, { "epoch": 0.26312523271689214, "grad_norm": 0.1482567507544276, "learning_rate": 4.950474245498594e-06, "loss": 0.751, "step": 530 }, { "epoch": 0.2636216954201316, "grad_norm": 0.14168154296888727, "learning_rate": 4.950280483410735e-06, "loss": 0.7766, "step": 531 }, { "epoch": 0.26411815812337097, "grad_norm": 0.13937007291923947, "learning_rate": 4.950086346836792e-06, "loss": 0.8215, "step": 532 }, { "epoch": 0.2646146208266104, "grad_norm": 0.13948603039348756, "learning_rate": 4.949891835806434e-06, "loss": 0.764, "step": 533 }, { "epoch": 0.2651110835298498, "grad_norm": 0.1398432077501712, "learning_rate": 4.9496969503493905e-06, "loss": 0.7917, "step": 534 }, { "epoch": 0.26560754623308924, "grad_norm": 0.13864095868027326, "learning_rate": 4.949501690495446e-06, "loss": 0.7852, "step": 535 }, { "epoch": 0.2661040089363287, "grad_norm": 0.137626101201187, "learning_rate": 4.949306056274443e-06, "loss": 0.7971, "step": 536 }, { "epoch": 0.26660047163956807, "grad_norm": 0.13991644106960857, "learning_rate": 4.949110047716281e-06, "loss": 0.8049, "step": 537 }, { "epoch": 0.2670969343428075, "grad_norm": 0.13755972862852978, "learning_rate": 4.948913664850917e-06, "loss": 0.7761, "step": 538 }, { "epoch": 0.2675933970460469, "grad_norm": 0.14095099190685437, "learning_rate": 4.9487169077083645e-06, "loss": 0.7995, "step": 539 }, { "epoch": 0.26808985974928634, "grad_norm": 0.13772198796480103, "learning_rate": 4.948519776318694e-06, "loss": 0.775, "step": 540 }, { "epoch": 0.2685863224525257, "grad_norm": 0.1474365567813288, "learning_rate": 4.948322270712036e-06, "loss": 0.775, "step": 541 }, { "epoch": 0.26908278515576517, "grad_norm": 0.14740248795105346, "learning_rate": 4.948124390918574e-06, "loss": 0.8823, "step": 542 }, { "epoch": 0.2695792478590046, "grad_norm": 0.13090799079120452, "learning_rate": 4.947926136968551e-06, "loss": 0.7722, "step": 543 }, { "epoch": 0.270075710562244, "grad_norm": 0.1398513276367715, "learning_rate": 4.947727508892268e-06, "loss": 0.7622, "step": 544 }, { "epoch": 0.27057217326548344, "grad_norm": 0.1402504056708596, "learning_rate": 4.947528506720082e-06, "loss": 0.7855, "step": 545 }, { "epoch": 0.2710686359687228, "grad_norm": 0.13124650948584116, "learning_rate": 4.947329130482407e-06, "loss": 0.738, "step": 546 }, { "epoch": 0.27156509867196227, "grad_norm": 0.13017696798038783, "learning_rate": 4.947129380209713e-06, "loss": 0.7761, "step": 547 }, { "epoch": 0.2720615613752017, "grad_norm": 0.14381395142272502, "learning_rate": 4.9469292559325316e-06, "loss": 0.7887, "step": 548 }, { "epoch": 0.2725580240784411, "grad_norm": 0.13338140216734587, "learning_rate": 4.946728757681446e-06, "loss": 0.8003, "step": 549 }, { "epoch": 0.27305448678168054, "grad_norm": 0.13560694970854092, "learning_rate": 4.946527885487101e-06, "loss": 0.7377, "step": 550 }, { "epoch": 0.2735509494849199, "grad_norm": 0.13758839128075348, "learning_rate": 4.946326639380194e-06, "loss": 0.8017, "step": 551 }, { "epoch": 0.27404741218815937, "grad_norm": 0.13753618486867644, "learning_rate": 4.946125019391486e-06, "loss": 0.7786, "step": 552 }, { "epoch": 0.2745438748913988, "grad_norm": 0.13936503272255338, "learning_rate": 4.945923025551789e-06, "loss": 0.7771, "step": 553 }, { "epoch": 0.2750403375946382, "grad_norm": 0.13630518143436557, "learning_rate": 4.945720657891975e-06, "loss": 0.7896, "step": 554 }, { "epoch": 0.27553680029787764, "grad_norm": 0.13452520454118158, "learning_rate": 4.945517916442971e-06, "loss": 0.82, "step": 555 }, { "epoch": 0.276033263001117, "grad_norm": 0.15078185753975815, "learning_rate": 4.945314801235766e-06, "loss": 0.7977, "step": 556 }, { "epoch": 0.27652972570435647, "grad_norm": 0.13571286379008968, "learning_rate": 4.9451113123014e-06, "loss": 0.8183, "step": 557 }, { "epoch": 0.27702618840759585, "grad_norm": 0.14134176469144435, "learning_rate": 4.9449074496709756e-06, "loss": 0.8391, "step": 558 }, { "epoch": 0.2775226511108353, "grad_norm": 0.13575461049209747, "learning_rate": 4.944703213375648e-06, "loss": 0.7913, "step": 559 }, { "epoch": 0.27801911381407474, "grad_norm": 0.13627699697738124, "learning_rate": 4.944498603446633e-06, "loss": 0.7737, "step": 560 }, { "epoch": 0.2785155765173141, "grad_norm": 0.1436068512980607, "learning_rate": 4.9442936199152e-06, "loss": 0.7665, "step": 561 }, { "epoch": 0.27901203922055356, "grad_norm": 0.1419116978490336, "learning_rate": 4.944088262812679e-06, "loss": 0.8012, "step": 562 }, { "epoch": 0.27950850192379295, "grad_norm": 0.14345533439447763, "learning_rate": 4.943882532170454e-06, "loss": 0.7487, "step": 563 }, { "epoch": 0.2800049646270324, "grad_norm": 0.1482619360446857, "learning_rate": 4.94367642801997e-06, "loss": 0.7929, "step": 564 }, { "epoch": 0.28050142733027184, "grad_norm": 0.1379616939658588, "learning_rate": 4.943469950392724e-06, "loss": 0.7792, "step": 565 }, { "epoch": 0.2809978900335112, "grad_norm": 0.14304974445218507, "learning_rate": 4.943263099320275e-06, "loss": 0.7879, "step": 566 }, { "epoch": 0.28149435273675066, "grad_norm": 0.1428969404097643, "learning_rate": 4.943055874834236e-06, "loss": 0.7217, "step": 567 }, { "epoch": 0.28199081543999005, "grad_norm": 0.13838491143778672, "learning_rate": 4.942848276966278e-06, "loss": 0.7751, "step": 568 }, { "epoch": 0.2824872781432295, "grad_norm": 0.13637716635230512, "learning_rate": 4.942640305748128e-06, "loss": 0.7978, "step": 569 }, { "epoch": 0.28298374084646893, "grad_norm": 0.14002084365223352, "learning_rate": 4.942431961211573e-06, "loss": 0.8041, "step": 570 }, { "epoch": 0.2834802035497083, "grad_norm": 0.13976480972292757, "learning_rate": 4.942223243388454e-06, "loss": 0.801, "step": 571 }, { "epoch": 0.28397666625294776, "grad_norm": 0.14031361844161488, "learning_rate": 4.9420141523106705e-06, "loss": 0.7837, "step": 572 }, { "epoch": 0.28447312895618715, "grad_norm": 0.14188180474957363, "learning_rate": 4.941804688010178e-06, "loss": 0.8129, "step": 573 }, { "epoch": 0.2849695916594266, "grad_norm": 0.13739560633335732, "learning_rate": 4.941594850518991e-06, "loss": 0.8599, "step": 574 }, { "epoch": 0.285466054362666, "grad_norm": 0.13161274663378267, "learning_rate": 4.9413846398691775e-06, "loss": 0.7992, "step": 575 }, { "epoch": 0.2859625170659054, "grad_norm": 0.13571724186099784, "learning_rate": 4.941174056092868e-06, "loss": 0.7426, "step": 576 }, { "epoch": 0.28645897976914486, "grad_norm": 0.14657992209581697, "learning_rate": 4.940963099222244e-06, "loss": 0.7474, "step": 577 }, { "epoch": 0.28695544247238425, "grad_norm": 0.14054315894814753, "learning_rate": 4.94075176928955e-06, "loss": 0.8116, "step": 578 }, { "epoch": 0.2874519051756237, "grad_norm": 0.14667038794719667, "learning_rate": 4.940540066327082e-06, "loss": 0.839, "step": 579 }, { "epoch": 0.2879483678788631, "grad_norm": 0.13180298446137564, "learning_rate": 4.940327990367196e-06, "loss": 0.7865, "step": 580 }, { "epoch": 0.2884448305821025, "grad_norm": 0.14864936755923036, "learning_rate": 4.940115541442303e-06, "loss": 0.7855, "step": 581 }, { "epoch": 0.28894129328534196, "grad_norm": 0.13838905353793282, "learning_rate": 4.939902719584875e-06, "loss": 0.7736, "step": 582 }, { "epoch": 0.28943775598858135, "grad_norm": 0.13826980328271227, "learning_rate": 4.939689524827436e-06, "loss": 0.7738, "step": 583 }, { "epoch": 0.2899342186918208, "grad_norm": 0.13562760080033687, "learning_rate": 4.939475957202572e-06, "loss": 0.7803, "step": 584 }, { "epoch": 0.2904306813950602, "grad_norm": 0.1477129197436419, "learning_rate": 4.939262016742921e-06, "loss": 0.8892, "step": 585 }, { "epoch": 0.2909271440982996, "grad_norm": 0.13608868262673587, "learning_rate": 4.939047703481182e-06, "loss": 0.7787, "step": 586 }, { "epoch": 0.29142360680153906, "grad_norm": 0.13532164943155106, "learning_rate": 4.938833017450108e-06, "loss": 0.7919, "step": 587 }, { "epoch": 0.29192006950477845, "grad_norm": 0.1354528176328873, "learning_rate": 4.938617958682511e-06, "loss": 0.7187, "step": 588 }, { "epoch": 0.2924165322080179, "grad_norm": 0.14287654528543972, "learning_rate": 4.93840252721126e-06, "loss": 0.7962, "step": 589 }, { "epoch": 0.2929129949112573, "grad_norm": 0.13500520944212716, "learning_rate": 4.9381867230692795e-06, "loss": 0.7953, "step": 590 }, { "epoch": 0.2934094576144967, "grad_norm": 0.13701507538430724, "learning_rate": 4.937970546289551e-06, "loss": 0.8274, "step": 591 }, { "epoch": 0.2939059203177361, "grad_norm": 0.13546590818001467, "learning_rate": 4.937753996905115e-06, "loss": 0.7731, "step": 592 }, { "epoch": 0.29440238302097554, "grad_norm": 0.13811700535991556, "learning_rate": 4.937537074949067e-06, "loss": 0.7647, "step": 593 }, { "epoch": 0.294898845724215, "grad_norm": 0.12945879834966295, "learning_rate": 4.937319780454559e-06, "loss": 0.744, "step": 594 }, { "epoch": 0.2953953084274544, "grad_norm": 0.13498731326365385, "learning_rate": 4.937102113454803e-06, "loss": 0.7759, "step": 595 }, { "epoch": 0.2958917711306938, "grad_norm": 0.1292963266491359, "learning_rate": 4.936884073983065e-06, "loss": 0.7647, "step": 596 }, { "epoch": 0.2963882338339332, "grad_norm": 0.1364683596899809, "learning_rate": 4.9366656620726685e-06, "loss": 0.782, "step": 597 }, { "epoch": 0.29688469653717264, "grad_norm": 0.14354575937871145, "learning_rate": 4.936446877756994e-06, "loss": 0.7965, "step": 598 }, { "epoch": 0.2973811592404121, "grad_norm": 0.14297046663924878, "learning_rate": 4.936227721069481e-06, "loss": 0.8565, "step": 599 }, { "epoch": 0.29787762194365147, "grad_norm": 0.13515162331475536, "learning_rate": 4.936008192043621e-06, "loss": 0.7897, "step": 600 }, { "epoch": 0.2983740846468909, "grad_norm": 0.14366874023945117, "learning_rate": 4.935788290712969e-06, "loss": 0.8526, "step": 601 }, { "epoch": 0.2988705473501303, "grad_norm": 0.13501986218993212, "learning_rate": 4.935568017111131e-06, "loss": 0.7843, "step": 602 }, { "epoch": 0.29936701005336974, "grad_norm": 0.13501818290266443, "learning_rate": 4.935347371271772e-06, "loss": 0.785, "step": 603 }, { "epoch": 0.2998634727566092, "grad_norm": 0.1357480400677297, "learning_rate": 4.9351263532286165e-06, "loss": 0.809, "step": 604 }, { "epoch": 0.30035993545984857, "grad_norm": 0.13963197307606626, "learning_rate": 4.934904963015442e-06, "loss": 0.7573, "step": 605 }, { "epoch": 0.300856398163088, "grad_norm": 0.13904388544231225, "learning_rate": 4.934683200666084e-06, "loss": 0.76, "step": 606 }, { "epoch": 0.3013528608663274, "grad_norm": 0.13273586015038175, "learning_rate": 4.934461066214436e-06, "loss": 0.777, "step": 607 }, { "epoch": 0.30184932356956684, "grad_norm": 0.1410913256356068, "learning_rate": 4.934238559694448e-06, "loss": 0.8199, "step": 608 }, { "epoch": 0.3023457862728063, "grad_norm": 0.13415630992181152, "learning_rate": 4.9340156811401265e-06, "loss": 0.8143, "step": 609 }, { "epoch": 0.30284224897604567, "grad_norm": 0.1350060544192735, "learning_rate": 4.9337924305855335e-06, "loss": 0.7607, "step": 610 }, { "epoch": 0.3033387116792851, "grad_norm": 0.13671649355566554, "learning_rate": 4.933568808064791e-06, "loss": 0.8062, "step": 611 }, { "epoch": 0.3038351743825245, "grad_norm": 0.13241711189350516, "learning_rate": 4.933344813612076e-06, "loss": 0.7463, "step": 612 }, { "epoch": 0.30433163708576394, "grad_norm": 0.145086762190584, "learning_rate": 4.933120447261621e-06, "loss": 0.8496, "step": 613 }, { "epoch": 0.3048280997890033, "grad_norm": 0.13576833005384956, "learning_rate": 4.932895709047719e-06, "loss": 0.7619, "step": 614 }, { "epoch": 0.30532456249224277, "grad_norm": 0.13080739610003392, "learning_rate": 4.932670599004715e-06, "loss": 0.7436, "step": 615 }, { "epoch": 0.3058210251954822, "grad_norm": 0.13931345500666475, "learning_rate": 4.932445117167016e-06, "loss": 0.7806, "step": 616 }, { "epoch": 0.3063174878987216, "grad_norm": 0.1403456162971794, "learning_rate": 4.932219263569082e-06, "loss": 0.8203, "step": 617 }, { "epoch": 0.30681395060196104, "grad_norm": 0.14545314884277688, "learning_rate": 4.93199303824543e-06, "loss": 0.7991, "step": 618 }, { "epoch": 0.3073104133052004, "grad_norm": 0.13361188513192082, "learning_rate": 4.931766441230637e-06, "loss": 0.7669, "step": 619 }, { "epoch": 0.30780687600843987, "grad_norm": 0.14211532439526364, "learning_rate": 4.931539472559335e-06, "loss": 0.8197, "step": 620 }, { "epoch": 0.3083033387116793, "grad_norm": 0.1384426265495238, "learning_rate": 4.93131213226621e-06, "loss": 0.7982, "step": 621 }, { "epoch": 0.3087998014149187, "grad_norm": 0.13479886625937776, "learning_rate": 4.931084420386009e-06, "loss": 0.7696, "step": 622 }, { "epoch": 0.30929626411815814, "grad_norm": 0.13591593582520675, "learning_rate": 4.9308563369535335e-06, "loss": 0.8021, "step": 623 }, { "epoch": 0.3097927268213975, "grad_norm": 0.13924380730728358, "learning_rate": 4.930627882003644e-06, "loss": 0.8394, "step": 624 }, { "epoch": 0.31028918952463697, "grad_norm": 0.13692960315786307, "learning_rate": 4.930399055571253e-06, "loss": 0.7905, "step": 625 }, { "epoch": 0.3107856522278764, "grad_norm": 0.1339979894901406, "learning_rate": 4.930169857691336e-06, "loss": 0.7881, "step": 626 }, { "epoch": 0.3112821149311158, "grad_norm": 0.1367431893133873, "learning_rate": 4.929940288398921e-06, "loss": 0.7887, "step": 627 }, { "epoch": 0.31177857763435524, "grad_norm": 0.13841200775795232, "learning_rate": 4.929710347729094e-06, "loss": 0.7653, "step": 628 }, { "epoch": 0.3122750403375946, "grad_norm": 0.13600960600331674, "learning_rate": 4.929480035716997e-06, "loss": 0.7811, "step": 629 }, { "epoch": 0.31277150304083406, "grad_norm": 0.13653488024445395, "learning_rate": 4.9292493523978315e-06, "loss": 0.7919, "step": 630 }, { "epoch": 0.31326796574407345, "grad_norm": 0.13176835024293457, "learning_rate": 4.929018297806852e-06, "loss": 0.7634, "step": 631 }, { "epoch": 0.3137644284473129, "grad_norm": 0.1469495293000222, "learning_rate": 4.928786871979372e-06, "loss": 0.8497, "step": 632 }, { "epoch": 0.31426089115055234, "grad_norm": 0.1431964358331162, "learning_rate": 4.928555074950761e-06, "loss": 0.8058, "step": 633 }, { "epoch": 0.3147573538537917, "grad_norm": 0.13850195286656924, "learning_rate": 4.928322906756446e-06, "loss": 0.7735, "step": 634 }, { "epoch": 0.31525381655703116, "grad_norm": 0.13463773472502238, "learning_rate": 4.92809036743191e-06, "loss": 0.7184, "step": 635 }, { "epoch": 0.31575027926027055, "grad_norm": 0.1364605714313478, "learning_rate": 4.927857457012693e-06, "loss": 0.7585, "step": 636 }, { "epoch": 0.31624674196351, "grad_norm": 0.1366334427972066, "learning_rate": 4.927624175534391e-06, "loss": 0.7739, "step": 637 }, { "epoch": 0.31674320466674943, "grad_norm": 0.13226666489304473, "learning_rate": 4.927390523032658e-06, "loss": 0.7801, "step": 638 }, { "epoch": 0.3172396673699888, "grad_norm": 0.1321397495808711, "learning_rate": 4.927156499543203e-06, "loss": 0.7691, "step": 639 }, { "epoch": 0.31773613007322826, "grad_norm": 0.14417418984304473, "learning_rate": 4.926922105101795e-06, "loss": 0.8335, "step": 640 }, { "epoch": 0.31823259277646765, "grad_norm": 0.1373833193656603, "learning_rate": 4.926687339744255e-06, "loss": 0.8057, "step": 641 }, { "epoch": 0.3187290554797071, "grad_norm": 0.13780220647272026, "learning_rate": 4.926452203506464e-06, "loss": 0.8188, "step": 642 }, { "epoch": 0.31922551818294653, "grad_norm": 0.1304815912554088, "learning_rate": 4.926216696424359e-06, "loss": 0.7549, "step": 643 }, { "epoch": 0.3197219808861859, "grad_norm": 0.14230394590112047, "learning_rate": 4.9259808185339344e-06, "loss": 0.8688, "step": 644 }, { "epoch": 0.32021844358942536, "grad_norm": 0.13197144711432499, "learning_rate": 4.925744569871238e-06, "loss": 0.7638, "step": 645 }, { "epoch": 0.32071490629266475, "grad_norm": 0.1377381360289958, "learning_rate": 4.925507950472378e-06, "loss": 0.7865, "step": 646 }, { "epoch": 0.3212113689959042, "grad_norm": 0.14064672696348562, "learning_rate": 4.9252709603735184e-06, "loss": 0.8655, "step": 647 }, { "epoch": 0.3217078316991436, "grad_norm": 0.17099346992203562, "learning_rate": 4.925033599610879e-06, "loss": 0.7557, "step": 648 }, { "epoch": 0.322204294402383, "grad_norm": 0.13649366857460205, "learning_rate": 4.9247958682207365e-06, "loss": 0.8075, "step": 649 }, { "epoch": 0.32270075710562246, "grad_norm": 0.13376375859315848, "learning_rate": 4.924557766239424e-06, "loss": 0.8125, "step": 650 }, { "epoch": 0.32319721980886185, "grad_norm": 0.1395328981536072, "learning_rate": 4.9243192937033304e-06, "loss": 0.7883, "step": 651 }, { "epoch": 0.3236936825121013, "grad_norm": 0.13819391735551326, "learning_rate": 4.924080450648905e-06, "loss": 0.8368, "step": 652 }, { "epoch": 0.3241901452153407, "grad_norm": 0.13868224090210812, "learning_rate": 4.92384123711265e-06, "loss": 0.7756, "step": 653 }, { "epoch": 0.3246866079185801, "grad_norm": 0.13995057823515514, "learning_rate": 4.923601653131125e-06, "loss": 0.7953, "step": 654 }, { "epoch": 0.32518307062181956, "grad_norm": 0.14384219577990262, "learning_rate": 4.923361698740946e-06, "loss": 0.7864, "step": 655 }, { "epoch": 0.32567953332505895, "grad_norm": 0.1359832308842288, "learning_rate": 4.923121373978789e-06, "loss": 0.7315, "step": 656 }, { "epoch": 0.3261759960282984, "grad_norm": 0.1332816844154388, "learning_rate": 4.92288067888138e-06, "loss": 0.771, "step": 657 }, { "epoch": 0.3266724587315378, "grad_norm": 0.13971086373297162, "learning_rate": 4.922639613485508e-06, "loss": 0.7904, "step": 658 }, { "epoch": 0.3271689214347772, "grad_norm": 0.13132115501434735, "learning_rate": 4.922398177828015e-06, "loss": 0.7416, "step": 659 }, { "epoch": 0.32766538413801666, "grad_norm": 0.13992347458321633, "learning_rate": 4.9221563719458e-06, "loss": 0.814, "step": 660 }, { "epoch": 0.32816184684125604, "grad_norm": 0.13633398687640888, "learning_rate": 4.921914195875821e-06, "loss": 0.7611, "step": 661 }, { "epoch": 0.3286583095444955, "grad_norm": 0.14882463890187234, "learning_rate": 4.921671649655088e-06, "loss": 0.7434, "step": 662 }, { "epoch": 0.3291547722477349, "grad_norm": 0.13772949607044616, "learning_rate": 4.921428733320674e-06, "loss": 0.7717, "step": 663 }, { "epoch": 0.3296512349509743, "grad_norm": 0.1414530607047641, "learning_rate": 4.921185446909702e-06, "loss": 0.782, "step": 664 }, { "epoch": 0.3301476976542137, "grad_norm": 0.14471648591939731, "learning_rate": 4.920941790459355e-06, "loss": 0.7662, "step": 665 }, { "epoch": 0.33064416035745314, "grad_norm": 0.14485133167971112, "learning_rate": 4.920697764006872e-06, "loss": 0.8213, "step": 666 }, { "epoch": 0.3311406230606926, "grad_norm": 0.14192892706466312, "learning_rate": 4.920453367589548e-06, "loss": 0.8613, "step": 667 }, { "epoch": 0.33163708576393197, "grad_norm": 0.14207950507096373, "learning_rate": 4.920208601244737e-06, "loss": 0.826, "step": 668 }, { "epoch": 0.3321335484671714, "grad_norm": 0.14408452937358693, "learning_rate": 4.919963465009846e-06, "loss": 0.7818, "step": 669 }, { "epoch": 0.3326300111704108, "grad_norm": 0.13736006931932224, "learning_rate": 4.919717958922341e-06, "loss": 0.7589, "step": 670 }, { "epoch": 0.33312647387365024, "grad_norm": 0.14298168063992314, "learning_rate": 4.919472083019743e-06, "loss": 0.8034, "step": 671 }, { "epoch": 0.3336229365768897, "grad_norm": 0.1376542857034229, "learning_rate": 4.91922583733963e-06, "loss": 0.7863, "step": 672 }, { "epoch": 0.33411939928012907, "grad_norm": 0.1390010637369658, "learning_rate": 4.918979221919637e-06, "loss": 0.842, "step": 673 }, { "epoch": 0.3346158619833685, "grad_norm": 0.14484040584209332, "learning_rate": 4.918732236797456e-06, "loss": 0.7629, "step": 674 }, { "epoch": 0.3351123246866079, "grad_norm": 0.13890387799381465, "learning_rate": 4.918484882010833e-06, "loss": 0.778, "step": 675 }, { "epoch": 0.33560878738984734, "grad_norm": 0.13863969701935344, "learning_rate": 4.918237157597574e-06, "loss": 0.8279, "step": 676 }, { "epoch": 0.3361052500930868, "grad_norm": 0.1426640315283534, "learning_rate": 4.917989063595539e-06, "loss": 0.8762, "step": 677 }, { "epoch": 0.33660171279632617, "grad_norm": 0.13500863262637763, "learning_rate": 4.917740600042645e-06, "loss": 0.7536, "step": 678 }, { "epoch": 0.3370981754995656, "grad_norm": 0.12795948317244496, "learning_rate": 4.917491766976865e-06, "loss": 0.7169, "step": 679 }, { "epoch": 0.337594638202805, "grad_norm": 0.13710750398142116, "learning_rate": 4.917242564436231e-06, "loss": 0.738, "step": 680 }, { "epoch": 0.33809110090604444, "grad_norm": 0.13190657220996582, "learning_rate": 4.916992992458828e-06, "loss": 0.7693, "step": 681 }, { "epoch": 0.3385875636092838, "grad_norm": 0.13830630412972159, "learning_rate": 4.9167430510828e-06, "loss": 0.8153, "step": 682 }, { "epoch": 0.33908402631252327, "grad_norm": 0.13304884196362807, "learning_rate": 4.916492740346346e-06, "loss": 0.7945, "step": 683 }, { "epoch": 0.3395804890157627, "grad_norm": 0.12774428215124484, "learning_rate": 4.916242060287723e-06, "loss": 0.7052, "step": 684 }, { "epoch": 0.3400769517190021, "grad_norm": 0.1265371082556476, "learning_rate": 4.9159910109452416e-06, "loss": 0.7483, "step": 685 }, { "epoch": 0.34057341442224154, "grad_norm": 0.13343444419954362, "learning_rate": 4.9157395923572716e-06, "loss": 0.7653, "step": 686 }, { "epoch": 0.3410698771254809, "grad_norm": 0.13166700692340655, "learning_rate": 4.9154878045622385e-06, "loss": 0.7587, "step": 687 }, { "epoch": 0.34156633982872037, "grad_norm": 0.14076974443956647, "learning_rate": 4.915235647598624e-06, "loss": 0.7912, "step": 688 }, { "epoch": 0.3420628025319598, "grad_norm": 0.12977776060064555, "learning_rate": 4.914983121504966e-06, "loss": 0.7723, "step": 689 }, { "epoch": 0.3425592652351992, "grad_norm": 0.13812993400540244, "learning_rate": 4.914730226319859e-06, "loss": 0.7968, "step": 690 }, { "epoch": 0.34305572793843864, "grad_norm": 0.14598669976661624, "learning_rate": 4.914476962081954e-06, "loss": 0.8009, "step": 691 }, { "epoch": 0.343552190641678, "grad_norm": 0.13314085383077542, "learning_rate": 4.9142233288299595e-06, "loss": 0.8293, "step": 692 }, { "epoch": 0.34404865334491747, "grad_norm": 0.14232577807503202, "learning_rate": 4.9139693266026375e-06, "loss": 0.7755, "step": 693 }, { "epoch": 0.3445451160481569, "grad_norm": 0.13552034485159273, "learning_rate": 4.91371495543881e-06, "loss": 0.8046, "step": 694 }, { "epoch": 0.3450415787513963, "grad_norm": 0.13920010636352773, "learning_rate": 4.913460215377351e-06, "loss": 0.7898, "step": 695 }, { "epoch": 0.34553804145463574, "grad_norm": 0.1365082779225381, "learning_rate": 4.9132051064571965e-06, "loss": 0.7949, "step": 696 }, { "epoch": 0.3460345041578751, "grad_norm": 0.13329039576470306, "learning_rate": 4.912949628717334e-06, "loss": 0.7691, "step": 697 }, { "epoch": 0.34653096686111456, "grad_norm": 0.13821152204427162, "learning_rate": 4.912693782196808e-06, "loss": 0.782, "step": 698 }, { "epoch": 0.34702742956435395, "grad_norm": 0.1351784114090911, "learning_rate": 4.912437566934724e-06, "loss": 0.7467, "step": 699 }, { "epoch": 0.3475238922675934, "grad_norm": 0.13348674748078376, "learning_rate": 4.912180982970237e-06, "loss": 0.7762, "step": 700 }, { "epoch": 0.34802035497083283, "grad_norm": 0.12985538825056495, "learning_rate": 4.911924030342563e-06, "loss": 0.7902, "step": 701 }, { "epoch": 0.3485168176740722, "grad_norm": 0.1368260578507066, "learning_rate": 4.911666709090974e-06, "loss": 0.7878, "step": 702 }, { "epoch": 0.34901328037731166, "grad_norm": 0.1360969813881036, "learning_rate": 4.911409019254797e-06, "loss": 0.7653, "step": 703 }, { "epoch": 0.34950974308055105, "grad_norm": 0.13601859820292503, "learning_rate": 4.911150960873414e-06, "loss": 0.7416, "step": 704 }, { "epoch": 0.3500062057837905, "grad_norm": 0.13523409685451968, "learning_rate": 4.910892533986268e-06, "loss": 0.7683, "step": 705 }, { "epoch": 0.35050266848702993, "grad_norm": 0.1307299504782071, "learning_rate": 4.9106337386328524e-06, "loss": 0.7706, "step": 706 }, { "epoch": 0.3509991311902693, "grad_norm": 0.1324922818188745, "learning_rate": 4.910374574852722e-06, "loss": 0.7753, "step": 707 }, { "epoch": 0.35149559389350876, "grad_norm": 0.13283375926300728, "learning_rate": 4.910115042685486e-06, "loss": 0.7988, "step": 708 }, { "epoch": 0.35199205659674815, "grad_norm": 0.1364833655942737, "learning_rate": 4.909855142170809e-06, "loss": 0.7662, "step": 709 }, { "epoch": 0.3524885192999876, "grad_norm": 0.13240082659711724, "learning_rate": 4.909594873348412e-06, "loss": 0.7692, "step": 710 }, { "epoch": 0.35298498200322703, "grad_norm": 0.13859318485445407, "learning_rate": 4.909334236258073e-06, "loss": 0.8092, "step": 711 }, { "epoch": 0.3534814447064664, "grad_norm": 0.14669171230394829, "learning_rate": 4.909073230939628e-06, "loss": 0.849, "step": 712 }, { "epoch": 0.35397790740970586, "grad_norm": 0.1329208698752514, "learning_rate": 4.908811857432966e-06, "loss": 0.8112, "step": 713 }, { "epoch": 0.35447437011294525, "grad_norm": 0.13420771737338105, "learning_rate": 4.908550115778032e-06, "loss": 0.8081, "step": 714 }, { "epoch": 0.3549708328161847, "grad_norm": 0.13869844814379267, "learning_rate": 4.908288006014833e-06, "loss": 0.7924, "step": 715 }, { "epoch": 0.3554672955194241, "grad_norm": 0.13159030247433556, "learning_rate": 4.9080255281834255e-06, "loss": 0.7896, "step": 716 }, { "epoch": 0.3559637582226635, "grad_norm": 0.12782379383553066, "learning_rate": 4.907762682323926e-06, "loss": 0.7492, "step": 717 }, { "epoch": 0.35646022092590296, "grad_norm": 0.12984107500991426, "learning_rate": 4.907499468476506e-06, "loss": 0.7713, "step": 718 }, { "epoch": 0.35695668362914235, "grad_norm": 0.13563855815235779, "learning_rate": 4.907235886681394e-06, "loss": 0.7198, "step": 719 }, { "epoch": 0.3574531463323818, "grad_norm": 0.1410105313395293, "learning_rate": 4.906971936978874e-06, "loss": 0.7584, "step": 720 }, { "epoch": 0.3579496090356212, "grad_norm": 0.13491737996113276, "learning_rate": 4.906707619409285e-06, "loss": 0.8048, "step": 721 }, { "epoch": 0.3584460717388606, "grad_norm": 0.15901372063337899, "learning_rate": 4.906442934013026e-06, "loss": 0.8157, "step": 722 }, { "epoch": 0.35894253444210006, "grad_norm": 0.1318933579561297, "learning_rate": 4.906177880830548e-06, "loss": 0.7624, "step": 723 }, { "epoch": 0.35943899714533945, "grad_norm": 0.13581410637368269, "learning_rate": 4.905912459902362e-06, "loss": 0.8205, "step": 724 }, { "epoch": 0.3599354598485789, "grad_norm": 0.1370308459267444, "learning_rate": 4.905646671269032e-06, "loss": 0.8298, "step": 725 }, { "epoch": 0.3604319225518183, "grad_norm": 0.1358006289221788, "learning_rate": 4.90538051497118e-06, "loss": 0.8233, "step": 726 }, { "epoch": 0.3609283852550577, "grad_norm": 0.1316316195921008, "learning_rate": 4.905113991049484e-06, "loss": 0.8134, "step": 727 }, { "epoch": 0.36142484795829716, "grad_norm": 0.1335957967976374, "learning_rate": 4.904847099544676e-06, "loss": 0.7753, "step": 728 }, { "epoch": 0.36192131066153654, "grad_norm": 0.13467968539337932, "learning_rate": 4.904579840497549e-06, "loss": 0.7678, "step": 729 }, { "epoch": 0.362417773364776, "grad_norm": 0.13328087348761786, "learning_rate": 4.904312213948948e-06, "loss": 0.7422, "step": 730 }, { "epoch": 0.3629142360680154, "grad_norm": 0.13791325862150497, "learning_rate": 4.904044219939775e-06, "loss": 0.81, "step": 731 }, { "epoch": 0.3634106987712548, "grad_norm": 0.14664569154055215, "learning_rate": 4.9037758585109886e-06, "loss": 0.7829, "step": 732 }, { "epoch": 0.36390716147449426, "grad_norm": 0.13819662122726, "learning_rate": 4.9035071297036045e-06, "loss": 0.7533, "step": 733 }, { "epoch": 0.36440362417773364, "grad_norm": 0.14379447561879494, "learning_rate": 4.903238033558692e-06, "loss": 0.7812, "step": 734 }, { "epoch": 0.3649000868809731, "grad_norm": 0.1348693038250325, "learning_rate": 4.90296857011738e-06, "loss": 0.7406, "step": 735 }, { "epoch": 0.36539654958421247, "grad_norm": 0.13919538306325066, "learning_rate": 4.90269873942085e-06, "loss": 0.7716, "step": 736 }, { "epoch": 0.3658930122874519, "grad_norm": 0.13768912705501787, "learning_rate": 4.902428541510342e-06, "loss": 0.7797, "step": 737 }, { "epoch": 0.3663894749906913, "grad_norm": 0.13652014063796708, "learning_rate": 4.902157976427152e-06, "loss": 0.7549, "step": 738 }, { "epoch": 0.36688593769393074, "grad_norm": 0.13524592628689325, "learning_rate": 4.901887044212631e-06, "loss": 0.7481, "step": 739 }, { "epoch": 0.3673824003971702, "grad_norm": 0.1311044839852732, "learning_rate": 4.9016157449081855e-06, "loss": 0.8021, "step": 740 }, { "epoch": 0.36787886310040957, "grad_norm": 0.13954525535721593, "learning_rate": 4.901344078555282e-06, "loss": 0.7548, "step": 741 }, { "epoch": 0.368375325803649, "grad_norm": 0.136268642834534, "learning_rate": 4.901072045195437e-06, "loss": 0.7602, "step": 742 }, { "epoch": 0.3688717885068884, "grad_norm": 0.23363764465279754, "learning_rate": 4.90079964487023e-06, "loss": 0.776, "step": 743 }, { "epoch": 0.36936825121012784, "grad_norm": 0.13274057394053615, "learning_rate": 4.90052687762129e-06, "loss": 0.7351, "step": 744 }, { "epoch": 0.3698647139133673, "grad_norm": 0.14127484700099766, "learning_rate": 4.900253743490307e-06, "loss": 0.7798, "step": 745 }, { "epoch": 0.37036117661660667, "grad_norm": 0.12815346443371342, "learning_rate": 4.8999802425190235e-06, "loss": 0.7075, "step": 746 }, { "epoch": 0.3708576393198461, "grad_norm": 0.1368536898030887, "learning_rate": 4.899706374749242e-06, "loss": 0.757, "step": 747 }, { "epoch": 0.3713541020230855, "grad_norm": 0.133763511827003, "learning_rate": 4.899432140222816e-06, "loss": 0.807, "step": 748 }, { "epoch": 0.37185056472632494, "grad_norm": 0.14086274715481756, "learning_rate": 4.899157538981661e-06, "loss": 0.7545, "step": 749 }, { "epoch": 0.3723470274295644, "grad_norm": 0.13940663877796536, "learning_rate": 4.898882571067742e-06, "loss": 0.7895, "step": 750 }, { "epoch": 0.37284349013280377, "grad_norm": 0.14364436254867233, "learning_rate": 4.898607236523086e-06, "loss": 0.824, "step": 751 }, { "epoch": 0.3733399528360432, "grad_norm": 0.1363424647021028, "learning_rate": 4.898331535389772e-06, "loss": 0.7689, "step": 752 }, { "epoch": 0.3738364155392826, "grad_norm": 0.134333700914737, "learning_rate": 4.898055467709938e-06, "loss": 0.781, "step": 753 }, { "epoch": 0.37433287824252204, "grad_norm": 0.1354244860930839, "learning_rate": 4.897779033525775e-06, "loss": 0.7425, "step": 754 }, { "epoch": 0.3748293409457614, "grad_norm": 0.13641351699049295, "learning_rate": 4.897502232879533e-06, "loss": 0.8106, "step": 755 }, { "epoch": 0.37532580364900087, "grad_norm": 0.1397668428766841, "learning_rate": 4.897225065813515e-06, "loss": 0.7725, "step": 756 }, { "epoch": 0.3758222663522403, "grad_norm": 0.1388989852014346, "learning_rate": 4.896947532370083e-06, "loss": 0.8095, "step": 757 }, { "epoch": 0.3763187290554797, "grad_norm": 0.13452943247011598, "learning_rate": 4.896669632591652e-06, "loss": 0.7768, "step": 758 }, { "epoch": 0.37681519175871914, "grad_norm": 0.13898005901785496, "learning_rate": 4.896391366520695e-06, "loss": 0.8469, "step": 759 }, { "epoch": 0.3773116544619585, "grad_norm": 0.1344241818366532, "learning_rate": 4.8961127341997425e-06, "loss": 0.7871, "step": 760 }, { "epoch": 0.37780811716519797, "grad_norm": 0.13570943105691186, "learning_rate": 4.895833735671376e-06, "loss": 0.7702, "step": 761 }, { "epoch": 0.3783045798684374, "grad_norm": 0.1341854190460997, "learning_rate": 4.895554370978238e-06, "loss": 0.7409, "step": 762 }, { "epoch": 0.3788010425716768, "grad_norm": 0.13050283705876012, "learning_rate": 4.895274640163023e-06, "loss": 0.7793, "step": 763 }, { "epoch": 0.37929750527491624, "grad_norm": 0.1329571983648645, "learning_rate": 4.894994543268486e-06, "loss": 0.7539, "step": 764 }, { "epoch": 0.3797939679781556, "grad_norm": 0.14453559821774697, "learning_rate": 4.894714080337433e-06, "loss": 0.8057, "step": 765 }, { "epoch": 0.38029043068139506, "grad_norm": 0.14541141571134905, "learning_rate": 4.894433251412729e-06, "loss": 0.8173, "step": 766 }, { "epoch": 0.3807868933846345, "grad_norm": 0.1365214842205778, "learning_rate": 4.894152056537295e-06, "loss": 0.7515, "step": 767 }, { "epoch": 0.3812833560878739, "grad_norm": 0.13104231373031291, "learning_rate": 4.893870495754106e-06, "loss": 0.7388, "step": 768 }, { "epoch": 0.38177981879111333, "grad_norm": 0.1293760148802387, "learning_rate": 4.8935885691061955e-06, "loss": 0.7202, "step": 769 }, { "epoch": 0.3822762814943527, "grad_norm": 0.1413055453494607, "learning_rate": 4.893306276636649e-06, "loss": 0.7529, "step": 770 }, { "epoch": 0.38277274419759216, "grad_norm": 0.13778756966339015, "learning_rate": 4.893023618388612e-06, "loss": 0.7854, "step": 771 }, { "epoch": 0.38326920690083155, "grad_norm": 0.12932306420294923, "learning_rate": 4.892740594405285e-06, "loss": 0.7832, "step": 772 }, { "epoch": 0.383765669604071, "grad_norm": 0.13397702650716775, "learning_rate": 4.892457204729923e-06, "loss": 0.8013, "step": 773 }, { "epoch": 0.38426213230731043, "grad_norm": 0.14230173874593646, "learning_rate": 4.892173449405837e-06, "loss": 0.749, "step": 774 }, { "epoch": 0.3847585950105498, "grad_norm": 0.13494471486274012, "learning_rate": 4.891889328476395e-06, "loss": 0.7644, "step": 775 }, { "epoch": 0.38525505771378926, "grad_norm": 0.13174879824024627, "learning_rate": 4.89160484198502e-06, "loss": 0.7659, "step": 776 }, { "epoch": 0.38575152041702865, "grad_norm": 0.1340875139550934, "learning_rate": 4.891319989975191e-06, "loss": 0.8022, "step": 777 }, { "epoch": 0.3862479831202681, "grad_norm": 0.1317344878554217, "learning_rate": 4.891034772490444e-06, "loss": 0.7821, "step": 778 }, { "epoch": 0.38674444582350753, "grad_norm": 0.12863325352249577, "learning_rate": 4.890749189574369e-06, "loss": 0.729, "step": 779 }, { "epoch": 0.3872409085267469, "grad_norm": 0.13603104401195087, "learning_rate": 4.8904632412706135e-06, "loss": 0.7692, "step": 780 }, { "epoch": 0.38773737122998636, "grad_norm": 0.1371314105411586, "learning_rate": 4.890176927622879e-06, "loss": 0.816, "step": 781 }, { "epoch": 0.38823383393322575, "grad_norm": 0.13666348082090357, "learning_rate": 4.889890248674926e-06, "loss": 0.7781, "step": 782 }, { "epoch": 0.3887302966364652, "grad_norm": 0.12972574048556007, "learning_rate": 4.889603204470566e-06, "loss": 0.7418, "step": 783 }, { "epoch": 0.38922675933970463, "grad_norm": 0.1380447797265697, "learning_rate": 4.889315795053671e-06, "loss": 0.7666, "step": 784 }, { "epoch": 0.389723222042944, "grad_norm": 0.1302714912154436, "learning_rate": 4.889028020468167e-06, "loss": 0.783, "step": 785 }, { "epoch": 0.39021968474618346, "grad_norm": 0.1467555997648217, "learning_rate": 4.8887398807580345e-06, "loss": 0.7373, "step": 786 }, { "epoch": 0.39071614744942285, "grad_norm": 0.13456233284077948, "learning_rate": 4.888451375967313e-06, "loss": 0.7843, "step": 787 }, { "epoch": 0.3912126101526623, "grad_norm": 0.13579468578021064, "learning_rate": 4.888162506140093e-06, "loss": 0.7926, "step": 788 }, { "epoch": 0.3917090728559017, "grad_norm": 0.13486742182263928, "learning_rate": 4.887873271320526e-06, "loss": 0.7848, "step": 789 }, { "epoch": 0.3922055355591411, "grad_norm": 0.13060505166884362, "learning_rate": 4.887583671552815e-06, "loss": 0.7316, "step": 790 }, { "epoch": 0.39270199826238056, "grad_norm": 0.13869981464559716, "learning_rate": 4.887293706881224e-06, "loss": 0.8121, "step": 791 }, { "epoch": 0.39319846096561994, "grad_norm": 0.13166122616659146, "learning_rate": 4.887003377350066e-06, "loss": 0.7251, "step": 792 }, { "epoch": 0.3936949236688594, "grad_norm": 0.14046352512611365, "learning_rate": 4.886712683003715e-06, "loss": 0.758, "step": 793 }, { "epoch": 0.3941913863720988, "grad_norm": 0.14464252163844996, "learning_rate": 4.886421623886598e-06, "loss": 0.815, "step": 794 }, { "epoch": 0.3946878490753382, "grad_norm": 0.13409840602053655, "learning_rate": 4.886130200043199e-06, "loss": 0.7752, "step": 795 }, { "epoch": 0.39518431177857766, "grad_norm": 0.13116167627946584, "learning_rate": 4.885838411518058e-06, "loss": 0.7308, "step": 796 }, { "epoch": 0.39568077448181704, "grad_norm": 0.1366342943883782, "learning_rate": 4.885546258355769e-06, "loss": 0.8026, "step": 797 }, { "epoch": 0.3961772371850565, "grad_norm": 0.13731477100332815, "learning_rate": 4.885253740600985e-06, "loss": 0.7204, "step": 798 }, { "epoch": 0.39667369988829587, "grad_norm": 0.14004464166577948, "learning_rate": 4.88496085829841e-06, "loss": 0.7729, "step": 799 }, { "epoch": 0.3971701625915353, "grad_norm": 0.13691778866799595, "learning_rate": 4.884667611492808e-06, "loss": 0.7497, "step": 800 }, { "epoch": 0.39766662529477476, "grad_norm": 0.13321987767176327, "learning_rate": 4.884374000228998e-06, "loss": 0.7415, "step": 801 }, { "epoch": 0.39816308799801414, "grad_norm": 0.1398138150913019, "learning_rate": 4.884080024551851e-06, "loss": 0.7733, "step": 802 }, { "epoch": 0.3986595507012536, "grad_norm": 0.1470854981776522, "learning_rate": 4.8837856845062994e-06, "loss": 0.7704, "step": 803 }, { "epoch": 0.39915601340449297, "grad_norm": 0.13800002748758905, "learning_rate": 4.883490980137327e-06, "loss": 0.781, "step": 804 }, { "epoch": 0.3996524761077324, "grad_norm": 0.13129465917522473, "learning_rate": 4.883195911489974e-06, "loss": 0.7406, "step": 805 }, { "epoch": 0.4001489388109718, "grad_norm": 0.13483055098605387, "learning_rate": 4.882900478609338e-06, "loss": 0.7982, "step": 806 }, { "epoch": 0.40064540151421124, "grad_norm": 0.13355656690282036, "learning_rate": 4.8826046815405705e-06, "loss": 0.8013, "step": 807 }, { "epoch": 0.4011418642174507, "grad_norm": 0.13207774630124497, "learning_rate": 4.882308520328879e-06, "loss": 0.7445, "step": 808 }, { "epoch": 0.40163832692069007, "grad_norm": 0.13638111186259064, "learning_rate": 4.882011995019529e-06, "loss": 0.7928, "step": 809 }, { "epoch": 0.4021347896239295, "grad_norm": 0.13865080146765882, "learning_rate": 4.881715105657837e-06, "loss": 0.7972, "step": 810 }, { "epoch": 0.4026312523271689, "grad_norm": 0.14025214556857968, "learning_rate": 4.88141785228918e-06, "loss": 0.8052, "step": 811 }, { "epoch": 0.40312771503040834, "grad_norm": 0.13674600726600722, "learning_rate": 4.881120234958986e-06, "loss": 0.8034, "step": 812 }, { "epoch": 0.4036241777336478, "grad_norm": 0.1360386191646674, "learning_rate": 4.8808222537127436e-06, "loss": 0.7882, "step": 813 }, { "epoch": 0.40412064043688717, "grad_norm": 0.14178158974636348, "learning_rate": 4.8805239085959936e-06, "loss": 0.781, "step": 814 }, { "epoch": 0.4046171031401266, "grad_norm": 0.1352509016655136, "learning_rate": 4.880225199654331e-06, "loss": 0.7834, "step": 815 }, { "epoch": 0.405113565843366, "grad_norm": 0.13284596151325054, "learning_rate": 4.879926126933412e-06, "loss": 0.7846, "step": 816 }, { "epoch": 0.40561002854660544, "grad_norm": 0.13744074794376318, "learning_rate": 4.8796266904789445e-06, "loss": 0.7567, "step": 817 }, { "epoch": 0.4061064912498449, "grad_norm": 0.13859156903074973, "learning_rate": 4.8793268903366905e-06, "loss": 0.793, "step": 818 }, { "epoch": 0.40660295395308427, "grad_norm": 0.14032883681460276, "learning_rate": 4.879026726552471e-06, "loss": 0.8204, "step": 819 }, { "epoch": 0.4070994166563237, "grad_norm": 0.13375088164936702, "learning_rate": 4.878726199172162e-06, "loss": 0.7397, "step": 820 }, { "epoch": 0.4075958793595631, "grad_norm": 0.13828272771245217, "learning_rate": 4.878425308241693e-06, "loss": 0.7657, "step": 821 }, { "epoch": 0.40809234206280254, "grad_norm": 0.14138136285113104, "learning_rate": 4.87812405380705e-06, "loss": 0.7708, "step": 822 }, { "epoch": 0.4085888047660419, "grad_norm": 0.13690922786942494, "learning_rate": 4.8778224359142775e-06, "loss": 0.7559, "step": 823 }, { "epoch": 0.40908526746928137, "grad_norm": 0.1428920733606056, "learning_rate": 4.87752045460947e-06, "loss": 0.7543, "step": 824 }, { "epoch": 0.4095817301725208, "grad_norm": 0.13259517168171056, "learning_rate": 4.877218109938782e-06, "loss": 0.7468, "step": 825 }, { "epoch": 0.4100781928757602, "grad_norm": 0.13286837812829952, "learning_rate": 4.876915401948421e-06, "loss": 0.7498, "step": 826 }, { "epoch": 0.41057465557899964, "grad_norm": 0.1505604133945408, "learning_rate": 4.876612330684652e-06, "loss": 0.8083, "step": 827 }, { "epoch": 0.411071118282239, "grad_norm": 0.14013330639827198, "learning_rate": 4.876308896193795e-06, "loss": 0.7805, "step": 828 }, { "epoch": 0.41156758098547847, "grad_norm": 0.141751989941827, "learning_rate": 4.876005098522224e-06, "loss": 0.789, "step": 829 }, { "epoch": 0.4120640436887179, "grad_norm": 0.1439633625106176, "learning_rate": 4.87570093771637e-06, "loss": 0.7825, "step": 830 }, { "epoch": 0.4125605063919573, "grad_norm": 0.13578927525549853, "learning_rate": 4.875396413822719e-06, "loss": 0.7434, "step": 831 }, { "epoch": 0.41305696909519674, "grad_norm": 0.14301935596353957, "learning_rate": 4.875091526887813e-06, "loss": 0.7866, "step": 832 }, { "epoch": 0.4135534317984361, "grad_norm": 0.13962888610646845, "learning_rate": 4.8747862769582485e-06, "loss": 0.797, "step": 833 }, { "epoch": 0.41404989450167556, "grad_norm": 0.13762036122185398, "learning_rate": 4.874480664080679e-06, "loss": 0.7647, "step": 834 }, { "epoch": 0.414546357204915, "grad_norm": 0.14874331327029203, "learning_rate": 4.874174688301811e-06, "loss": 0.7742, "step": 835 }, { "epoch": 0.4150428199081544, "grad_norm": 0.14145690460721833, "learning_rate": 4.873868349668409e-06, "loss": 0.7794, "step": 836 }, { "epoch": 0.41553928261139383, "grad_norm": 0.13516936437887206, "learning_rate": 4.873561648227292e-06, "loss": 0.772, "step": 837 }, { "epoch": 0.4160357453146332, "grad_norm": 0.13997107273973253, "learning_rate": 4.873254584025335e-06, "loss": 0.7802, "step": 838 }, { "epoch": 0.41653220801787266, "grad_norm": 0.15576247682108174, "learning_rate": 4.8729471571094665e-06, "loss": 0.8451, "step": 839 }, { "epoch": 0.41702867072111205, "grad_norm": 0.1391550838396009, "learning_rate": 4.872639367526672e-06, "loss": 0.74, "step": 840 }, { "epoch": 0.4175251334243515, "grad_norm": 0.13906947337724887, "learning_rate": 4.872331215323993e-06, "loss": 0.7883, "step": 841 }, { "epoch": 0.41802159612759093, "grad_norm": 0.1369675811107513, "learning_rate": 4.872022700548525e-06, "loss": 0.7384, "step": 842 }, { "epoch": 0.4185180588308303, "grad_norm": 0.1443749383496745, "learning_rate": 4.87171382324742e-06, "loss": 0.773, "step": 843 }, { "epoch": 0.41901452153406976, "grad_norm": 0.13524935011822023, "learning_rate": 4.871404583467884e-06, "loss": 0.8338, "step": 844 }, { "epoch": 0.41951098423730915, "grad_norm": 0.13600833744156696, "learning_rate": 4.8710949812571805e-06, "loss": 0.7715, "step": 845 }, { "epoch": 0.4200074469405486, "grad_norm": 0.13910800980465646, "learning_rate": 4.870785016662627e-06, "loss": 0.7653, "step": 846 }, { "epoch": 0.42050390964378803, "grad_norm": 0.14716154207375018, "learning_rate": 4.870474689731596e-06, "loss": 0.7753, "step": 847 }, { "epoch": 0.4210003723470274, "grad_norm": 0.13651844849366374, "learning_rate": 4.870164000511516e-06, "loss": 0.7252, "step": 848 }, { "epoch": 0.42149683505026686, "grad_norm": 0.13107938750960044, "learning_rate": 4.869852949049872e-06, "loss": 0.7403, "step": 849 }, { "epoch": 0.42199329775350625, "grad_norm": 0.1405132580685121, "learning_rate": 4.8695415353942025e-06, "loss": 0.7816, "step": 850 }, { "epoch": 0.4224897604567457, "grad_norm": 0.16019940582846015, "learning_rate": 4.869229759592101e-06, "loss": 0.7977, "step": 851 }, { "epoch": 0.42298622315998513, "grad_norm": 0.1386861474814064, "learning_rate": 4.868917621691219e-06, "loss": 0.8859, "step": 852 }, { "epoch": 0.4234826858632245, "grad_norm": 0.1438580035371236, "learning_rate": 4.868605121739261e-06, "loss": 0.7708, "step": 853 }, { "epoch": 0.42397914856646396, "grad_norm": 0.13851415036446404, "learning_rate": 4.868292259783988e-06, "loss": 0.7379, "step": 854 }, { "epoch": 0.42447561126970335, "grad_norm": 0.13928629569080855, "learning_rate": 4.867979035873216e-06, "loss": 0.7663, "step": 855 }, { "epoch": 0.4249720739729428, "grad_norm": 0.1326009455578934, "learning_rate": 4.8676654500548156e-06, "loss": 0.7432, "step": 856 }, { "epoch": 0.4254685366761822, "grad_norm": 0.13141434025922472, "learning_rate": 4.867351502376714e-06, "loss": 0.7558, "step": 857 }, { "epoch": 0.4259649993794216, "grad_norm": 0.1301537847703651, "learning_rate": 4.867037192886893e-06, "loss": 0.7003, "step": 858 }, { "epoch": 0.42646146208266106, "grad_norm": 0.13958306777392412, "learning_rate": 4.866722521633389e-06, "loss": 0.8252, "step": 859 }, { "epoch": 0.42695792478590044, "grad_norm": 0.1371175213676672, "learning_rate": 4.866407488664296e-06, "loss": 0.7807, "step": 860 }, { "epoch": 0.4274543874891399, "grad_norm": 0.1326784777780711, "learning_rate": 4.866092094027761e-06, "loss": 0.7701, "step": 861 }, { "epoch": 0.4279508501923793, "grad_norm": 0.13589123146778564, "learning_rate": 4.865776337771986e-06, "loss": 0.7374, "step": 862 }, { "epoch": 0.4284473128956187, "grad_norm": 0.13307442584248397, "learning_rate": 4.86546021994523e-06, "loss": 0.7581, "step": 863 }, { "epoch": 0.42894377559885816, "grad_norm": 0.12935156456607097, "learning_rate": 4.865143740595807e-06, "loss": 0.7259, "step": 864 }, { "epoch": 0.42944023830209754, "grad_norm": 0.13856977776450893, "learning_rate": 4.864826899772086e-06, "loss": 0.7921, "step": 865 }, { "epoch": 0.429936701005337, "grad_norm": 0.13098700749992728, "learning_rate": 4.864509697522489e-06, "loss": 0.7581, "step": 866 }, { "epoch": 0.43043316370857637, "grad_norm": 0.1277394417952265, "learning_rate": 4.864192133895499e-06, "loss": 0.7424, "step": 867 }, { "epoch": 0.4309296264118158, "grad_norm": 0.1297413964613426, "learning_rate": 4.8638742089396464e-06, "loss": 0.7869, "step": 868 }, { "epoch": 0.43142608911505526, "grad_norm": 0.1395569604985153, "learning_rate": 4.863555922703523e-06, "loss": 0.7837, "step": 869 }, { "epoch": 0.43192255181829464, "grad_norm": 0.14789922672713987, "learning_rate": 4.863237275235774e-06, "loss": 0.8032, "step": 870 }, { "epoch": 0.4324190145215341, "grad_norm": 0.148230222225613, "learning_rate": 4.8629182665850995e-06, "loss": 0.7581, "step": 871 }, { "epoch": 0.43291547722477347, "grad_norm": 0.13520814483911175, "learning_rate": 4.862598896800254e-06, "loss": 0.7961, "step": 872 }, { "epoch": 0.4334119399280129, "grad_norm": 0.1400523186641088, "learning_rate": 4.862279165930049e-06, "loss": 0.7443, "step": 873 }, { "epoch": 0.43390840263125235, "grad_norm": 0.14000148595084386, "learning_rate": 4.861959074023348e-06, "loss": 0.7092, "step": 874 }, { "epoch": 0.43440486533449174, "grad_norm": 0.13363969656101354, "learning_rate": 4.8616386211290755e-06, "loss": 0.7532, "step": 875 }, { "epoch": 0.4349013280377312, "grad_norm": 0.1338415278751024, "learning_rate": 4.861317807296205e-06, "loss": 0.7598, "step": 876 }, { "epoch": 0.43539779074097057, "grad_norm": 0.13445264018650427, "learning_rate": 4.860996632573769e-06, "loss": 0.7517, "step": 877 }, { "epoch": 0.43589425344421, "grad_norm": 0.13560845548955244, "learning_rate": 4.860675097010853e-06, "loss": 0.7565, "step": 878 }, { "epoch": 0.4363907161474494, "grad_norm": 0.14803665445404562, "learning_rate": 4.860353200656599e-06, "loss": 0.8122, "step": 879 }, { "epoch": 0.43688717885068884, "grad_norm": 0.1373213861484435, "learning_rate": 4.860030943560204e-06, "loss": 0.7541, "step": 880 }, { "epoch": 0.4373836415539283, "grad_norm": 0.12955793875119787, "learning_rate": 4.859708325770919e-06, "loss": 0.7502, "step": 881 }, { "epoch": 0.43788010425716767, "grad_norm": 0.1441160209192175, "learning_rate": 4.859385347338052e-06, "loss": 0.7734, "step": 882 }, { "epoch": 0.4383765669604071, "grad_norm": 0.13942948203768102, "learning_rate": 4.8590620083109645e-06, "loss": 0.7528, "step": 883 }, { "epoch": 0.4388730296636465, "grad_norm": 0.14399699173850394, "learning_rate": 4.858738308739073e-06, "loss": 0.7628, "step": 884 }, { "epoch": 0.43936949236688594, "grad_norm": 0.137394867443982, "learning_rate": 4.858414248671851e-06, "loss": 0.8637, "step": 885 }, { "epoch": 0.4398659550701254, "grad_norm": 0.14448436375711424, "learning_rate": 4.8580898281588255e-06, "loss": 0.8147, "step": 886 }, { "epoch": 0.44036241777336477, "grad_norm": 0.1341227002790187, "learning_rate": 4.8577650472495785e-06, "loss": 0.7602, "step": 887 }, { "epoch": 0.4408588804766042, "grad_norm": 0.14377468127267348, "learning_rate": 4.857439905993748e-06, "loss": 0.7913, "step": 888 }, { "epoch": 0.4413553431798436, "grad_norm": 0.13340052111413453, "learning_rate": 4.857114404441027e-06, "loss": 0.7482, "step": 889 }, { "epoch": 0.44185180588308304, "grad_norm": 0.13574880212233634, "learning_rate": 4.856788542641162e-06, "loss": 0.7771, "step": 890 }, { "epoch": 0.4423482685863225, "grad_norm": 0.1394549058341552, "learning_rate": 4.856462320643957e-06, "loss": 0.727, "step": 891 }, { "epoch": 0.44284473128956187, "grad_norm": 0.16206005047704644, "learning_rate": 4.856135738499269e-06, "loss": 0.7901, "step": 892 }, { "epoch": 0.4433411939928013, "grad_norm": 0.14512700342806611, "learning_rate": 4.855808796257012e-06, "loss": 0.8094, "step": 893 }, { "epoch": 0.4438376566960407, "grad_norm": 0.1531915103698981, "learning_rate": 4.855481493967152e-06, "loss": 0.8711, "step": 894 }, { "epoch": 0.44433411939928014, "grad_norm": 0.14141629017985097, "learning_rate": 4.855153831679713e-06, "loss": 0.7529, "step": 895 }, { "epoch": 0.4448305821025195, "grad_norm": 0.14793605224512887, "learning_rate": 4.854825809444773e-06, "loss": 0.7606, "step": 896 }, { "epoch": 0.44532704480575896, "grad_norm": 0.14449987137890363, "learning_rate": 4.854497427312465e-06, "loss": 0.8164, "step": 897 }, { "epoch": 0.4458235075089984, "grad_norm": 0.14487270020396967, "learning_rate": 4.854168685332977e-06, "loss": 0.7833, "step": 898 }, { "epoch": 0.4463199702122378, "grad_norm": 0.14100131641479818, "learning_rate": 4.853839583556551e-06, "loss": 0.7785, "step": 899 }, { "epoch": 0.44681643291547724, "grad_norm": 0.1402338208402533, "learning_rate": 4.853510122033486e-06, "loss": 0.7922, "step": 900 }, { "epoch": 0.4473128956187166, "grad_norm": 0.15070020553438185, "learning_rate": 4.853180300814135e-06, "loss": 0.7818, "step": 901 }, { "epoch": 0.44780935832195606, "grad_norm": 0.14340004090339828, "learning_rate": 4.8528501199489045e-06, "loss": 0.7831, "step": 902 }, { "epoch": 0.4483058210251955, "grad_norm": 0.12886555141368564, "learning_rate": 4.852519579488258e-06, "loss": 0.7358, "step": 903 }, { "epoch": 0.4488022837284349, "grad_norm": 0.13659666264950565, "learning_rate": 4.852188679482715e-06, "loss": 0.797, "step": 904 }, { "epoch": 0.44929874643167433, "grad_norm": 0.1344704194549763, "learning_rate": 4.851857419982845e-06, "loss": 0.7746, "step": 905 }, { "epoch": 0.4497952091349137, "grad_norm": 0.14038386138367254, "learning_rate": 4.8515258010392786e-06, "loss": 0.7897, "step": 906 }, { "epoch": 0.45029167183815316, "grad_norm": 0.1310362323726338, "learning_rate": 4.851193822702698e-06, "loss": 0.7891, "step": 907 }, { "epoch": 0.4507881345413926, "grad_norm": 0.13456562483222215, "learning_rate": 4.850861485023839e-06, "loss": 0.8137, "step": 908 }, { "epoch": 0.451284597244632, "grad_norm": 0.13867498023241415, "learning_rate": 4.8505287880534954e-06, "loss": 0.755, "step": 909 }, { "epoch": 0.45178105994787143, "grad_norm": 0.13709552350038648, "learning_rate": 4.8501957318425145e-06, "loss": 0.7789, "step": 910 }, { "epoch": 0.4522775226511108, "grad_norm": 0.20256799536792502, "learning_rate": 4.849862316441799e-06, "loss": 0.7916, "step": 911 }, { "epoch": 0.45277398535435026, "grad_norm": 0.13805282049183487, "learning_rate": 4.849528541902304e-06, "loss": 0.7578, "step": 912 }, { "epoch": 0.45327044805758965, "grad_norm": 0.13846494457743488, "learning_rate": 4.849194408275045e-06, "loss": 0.7993, "step": 913 }, { "epoch": 0.4537669107608291, "grad_norm": 0.13624434622740592, "learning_rate": 4.848859915611087e-06, "loss": 0.7218, "step": 914 }, { "epoch": 0.45426337346406853, "grad_norm": 0.13735589522446118, "learning_rate": 4.848525063961551e-06, "loss": 0.7927, "step": 915 }, { "epoch": 0.4547598361673079, "grad_norm": 0.1435760229715099, "learning_rate": 4.848189853377615e-06, "loss": 0.7959, "step": 916 }, { "epoch": 0.45525629887054736, "grad_norm": 0.1374310227347919, "learning_rate": 4.8478542839105105e-06, "loss": 0.8102, "step": 917 }, { "epoch": 0.45575276157378675, "grad_norm": 0.13784054445459668, "learning_rate": 4.847518355611524e-06, "loss": 0.8705, "step": 918 }, { "epoch": 0.4562492242770262, "grad_norm": 0.1429652700234789, "learning_rate": 4.8471820685319965e-06, "loss": 0.8273, "step": 919 }, { "epoch": 0.45674568698026563, "grad_norm": 0.13776620485826613, "learning_rate": 4.8468454227233235e-06, "loss": 0.7659, "step": 920 }, { "epoch": 0.457242149683505, "grad_norm": 0.13044770694847052, "learning_rate": 4.8465084182369566e-06, "loss": 0.7336, "step": 921 }, { "epoch": 0.45773861238674446, "grad_norm": 0.13757280161779586, "learning_rate": 4.846171055124401e-06, "loss": 0.7948, "step": 922 }, { "epoch": 0.45823507508998385, "grad_norm": 0.1391821959776347, "learning_rate": 4.845833333437219e-06, "loss": 0.7553, "step": 923 }, { "epoch": 0.4587315377932233, "grad_norm": 0.1412266279982853, "learning_rate": 4.845495253227023e-06, "loss": 0.8104, "step": 924 }, { "epoch": 0.45922800049646273, "grad_norm": 0.13037017997721642, "learning_rate": 4.845156814545485e-06, "loss": 0.755, "step": 925 }, { "epoch": 0.4597244631997021, "grad_norm": 0.13938907235913728, "learning_rate": 4.844818017444331e-06, "loss": 0.7582, "step": 926 }, { "epoch": 0.46022092590294156, "grad_norm": 0.13642206078365174, "learning_rate": 4.8444788619753375e-06, "loss": 0.7709, "step": 927 }, { "epoch": 0.46071738860618094, "grad_norm": 0.13281685905144847, "learning_rate": 4.844139348190342e-06, "loss": 0.7613, "step": 928 }, { "epoch": 0.4612138513094204, "grad_norm": 0.1376296253823425, "learning_rate": 4.843799476141231e-06, "loss": 0.8199, "step": 929 }, { "epoch": 0.4617103140126598, "grad_norm": 0.1430226666279928, "learning_rate": 4.843459245879952e-06, "loss": 0.7879, "step": 930 }, { "epoch": 0.4622067767158992, "grad_norm": 0.13853491478085572, "learning_rate": 4.8431186574585e-06, "loss": 0.799, "step": 931 }, { "epoch": 0.46270323941913866, "grad_norm": 0.14106561846325905, "learning_rate": 4.842777710928932e-06, "loss": 0.799, "step": 932 }, { "epoch": 0.46319970212237804, "grad_norm": 0.13514878380767298, "learning_rate": 4.842436406343353e-06, "loss": 0.7928, "step": 933 }, { "epoch": 0.4636961648256175, "grad_norm": 0.12994346068694584, "learning_rate": 4.842094743753929e-06, "loss": 0.7338, "step": 934 }, { "epoch": 0.46419262752885687, "grad_norm": 0.13540353265751, "learning_rate": 4.841752723212874e-06, "loss": 0.7365, "step": 935 }, { "epoch": 0.4646890902320963, "grad_norm": 0.1383233625428097, "learning_rate": 4.841410344772464e-06, "loss": 0.7931, "step": 936 }, { "epoch": 0.46518555293533576, "grad_norm": 0.13984478325484884, "learning_rate": 4.841067608485024e-06, "loss": 0.8022, "step": 937 }, { "epoch": 0.46568201563857514, "grad_norm": 0.13182848523521054, "learning_rate": 4.840724514402936e-06, "loss": 0.7207, "step": 938 }, { "epoch": 0.4661784783418146, "grad_norm": 0.13720103035170697, "learning_rate": 4.8403810625786366e-06, "loss": 0.7699, "step": 939 }, { "epoch": 0.46667494104505397, "grad_norm": 0.13805399872377858, "learning_rate": 4.840037253064617e-06, "loss": 0.7676, "step": 940 }, { "epoch": 0.4671714037482934, "grad_norm": 0.133084203123112, "learning_rate": 4.839693085913423e-06, "loss": 0.7366, "step": 941 }, { "epoch": 0.46766786645153285, "grad_norm": 0.1338565121410207, "learning_rate": 4.839348561177656e-06, "loss": 0.7738, "step": 942 }, { "epoch": 0.46816432915477224, "grad_norm": 0.14005655784202467, "learning_rate": 4.839003678909968e-06, "loss": 0.8011, "step": 943 }, { "epoch": 0.4686607918580117, "grad_norm": 0.13866968197048227, "learning_rate": 4.838658439163072e-06, "loss": 0.7869, "step": 944 }, { "epoch": 0.46915725456125107, "grad_norm": 0.14490477371225619, "learning_rate": 4.838312841989731e-06, "loss": 0.8122, "step": 945 }, { "epoch": 0.4696537172644905, "grad_norm": 0.13501188505213316, "learning_rate": 4.837966887442764e-06, "loss": 0.8048, "step": 946 }, { "epoch": 0.4701501799677299, "grad_norm": 0.13061796475992138, "learning_rate": 4.837620575575045e-06, "loss": 0.7505, "step": 947 }, { "epoch": 0.47064664267096934, "grad_norm": 0.13123329580779844, "learning_rate": 4.837273906439501e-06, "loss": 0.7514, "step": 948 }, { "epoch": 0.4711431053742088, "grad_norm": 0.13931347467033603, "learning_rate": 4.836926880089117e-06, "loss": 0.7545, "step": 949 }, { "epoch": 0.47163956807744817, "grad_norm": 0.13794961687851362, "learning_rate": 4.836579496576928e-06, "loss": 0.8052, "step": 950 }, { "epoch": 0.4721360307806876, "grad_norm": 0.13580616802094855, "learning_rate": 4.836231755956028e-06, "loss": 0.755, "step": 951 }, { "epoch": 0.472632493483927, "grad_norm": 0.1375813327214774, "learning_rate": 4.835883658279562e-06, "loss": 0.7844, "step": 952 }, { "epoch": 0.47312895618716644, "grad_norm": 0.1334419288379686, "learning_rate": 4.835535203600732e-06, "loss": 0.7886, "step": 953 }, { "epoch": 0.4736254188904059, "grad_norm": 0.13849333665474736, "learning_rate": 4.835186391972795e-06, "loss": 0.7719, "step": 954 }, { "epoch": 0.47412188159364527, "grad_norm": 0.13814260935997866, "learning_rate": 4.834837223449058e-06, "loss": 0.7593, "step": 955 }, { "epoch": 0.4746183442968847, "grad_norm": 0.12665160870314987, "learning_rate": 4.834487698082888e-06, "loss": 0.6989, "step": 956 }, { "epoch": 0.4751148070001241, "grad_norm": 0.13161442511625795, "learning_rate": 4.834137815927705e-06, "loss": 0.7502, "step": 957 }, { "epoch": 0.47561126970336354, "grad_norm": 0.13088405996355415, "learning_rate": 4.833787577036981e-06, "loss": 0.7672, "step": 958 }, { "epoch": 0.476107732406603, "grad_norm": 0.14037857434636752, "learning_rate": 4.833436981464246e-06, "loss": 0.7766, "step": 959 }, { "epoch": 0.47660419510984237, "grad_norm": 0.14031622164392288, "learning_rate": 4.833086029263081e-06, "loss": 0.7815, "step": 960 }, { "epoch": 0.4771006578130818, "grad_norm": 0.13531078299513247, "learning_rate": 4.832734720487125e-06, "loss": 0.7547, "step": 961 }, { "epoch": 0.4775971205163212, "grad_norm": 0.13337784164706815, "learning_rate": 4.8323830551900705e-06, "loss": 0.7955, "step": 962 }, { "epoch": 0.47809358321956064, "grad_norm": 0.1347558468252713, "learning_rate": 4.832031033425663e-06, "loss": 0.7486, "step": 963 }, { "epoch": 0.4785900459228, "grad_norm": 0.12872155796573553, "learning_rate": 4.831678655247702e-06, "loss": 0.7768, "step": 964 }, { "epoch": 0.47908650862603946, "grad_norm": 0.1348281268570624, "learning_rate": 4.831325920710045e-06, "loss": 0.7895, "step": 965 }, { "epoch": 0.4795829713292789, "grad_norm": 0.13513666322029744, "learning_rate": 4.830972829866601e-06, "loss": 0.7915, "step": 966 }, { "epoch": 0.4800794340325183, "grad_norm": 0.13491478767782572, "learning_rate": 4.830619382771334e-06, "loss": 0.7665, "step": 967 }, { "epoch": 0.48057589673575773, "grad_norm": 0.13770336867680258, "learning_rate": 4.830265579478263e-06, "loss": 0.8023, "step": 968 }, { "epoch": 0.4810723594389971, "grad_norm": 0.13544943102350623, "learning_rate": 4.829911420041461e-06, "loss": 0.7393, "step": 969 }, { "epoch": 0.48156882214223656, "grad_norm": 0.13435327527712077, "learning_rate": 4.829556904515056e-06, "loss": 0.7506, "step": 970 }, { "epoch": 0.482065284845476, "grad_norm": 0.13966597982630186, "learning_rate": 4.82920203295323e-06, "loss": 0.786, "step": 971 }, { "epoch": 0.4825617475487154, "grad_norm": 0.1424347215239213, "learning_rate": 4.828846805410219e-06, "loss": 0.7934, "step": 972 }, { "epoch": 0.48305821025195483, "grad_norm": 0.13234179930319845, "learning_rate": 4.828491221940313e-06, "loss": 0.7655, "step": 973 }, { "epoch": 0.4835546729551942, "grad_norm": 0.14012843827614116, "learning_rate": 4.82813528259786e-06, "loss": 0.7496, "step": 974 }, { "epoch": 0.48405113565843366, "grad_norm": 0.13715136273058942, "learning_rate": 4.827778987437256e-06, "loss": 0.8375, "step": 975 }, { "epoch": 0.4845475983616731, "grad_norm": 0.1947144750999217, "learning_rate": 4.827422336512958e-06, "loss": 0.8667, "step": 976 }, { "epoch": 0.4850440610649125, "grad_norm": 0.13741283362319834, "learning_rate": 4.827065329879473e-06, "loss": 0.8308, "step": 977 }, { "epoch": 0.48554052376815193, "grad_norm": 0.13386724756044246, "learning_rate": 4.826707967591364e-06, "loss": 0.7704, "step": 978 }, { "epoch": 0.4860369864713913, "grad_norm": 0.14441549526350253, "learning_rate": 4.826350249703249e-06, "loss": 0.7697, "step": 979 }, { "epoch": 0.48653344917463076, "grad_norm": 0.14033203475651918, "learning_rate": 4.825992176269797e-06, "loss": 0.8019, "step": 980 }, { "epoch": 0.48702991187787015, "grad_norm": 0.13714343033359203, "learning_rate": 4.825633747345736e-06, "loss": 0.7529, "step": 981 }, { "epoch": 0.4875263745811096, "grad_norm": 0.12941529603226035, "learning_rate": 4.825274962985845e-06, "loss": 0.7562, "step": 982 }, { "epoch": 0.48802283728434903, "grad_norm": 0.13974390480489013, "learning_rate": 4.82491582324496e-06, "loss": 0.8017, "step": 983 }, { "epoch": 0.4885192999875884, "grad_norm": 0.1366449796259129, "learning_rate": 4.824556328177968e-06, "loss": 0.7515, "step": 984 }, { "epoch": 0.48901576269082786, "grad_norm": 0.14711958138650388, "learning_rate": 4.824196477839812e-06, "loss": 0.795, "step": 985 }, { "epoch": 0.48951222539406725, "grad_norm": 0.14257192599317975, "learning_rate": 4.8238362722854905e-06, "loss": 0.7937, "step": 986 }, { "epoch": 0.4900086880973067, "grad_norm": 0.1347130511121721, "learning_rate": 4.823475711570055e-06, "loss": 0.7963, "step": 987 }, { "epoch": 0.49050515080054613, "grad_norm": 0.13038662073702864, "learning_rate": 4.823114795748611e-06, "loss": 0.7671, "step": 988 }, { "epoch": 0.4910016135037855, "grad_norm": 0.14016752297712926, "learning_rate": 4.8227535248763185e-06, "loss": 0.7237, "step": 989 }, { "epoch": 0.49149807620702496, "grad_norm": 0.14127823957650343, "learning_rate": 4.8223918990083925e-06, "loss": 0.8157, "step": 990 }, { "epoch": 0.49199453891026435, "grad_norm": 0.15260187773192319, "learning_rate": 4.8220299182001014e-06, "loss": 0.7994, "step": 991 }, { "epoch": 0.4924910016135038, "grad_norm": 0.13943785694471836, "learning_rate": 4.821667582506768e-06, "loss": 0.7663, "step": 992 }, { "epoch": 0.49298746431674323, "grad_norm": 0.13275215903917895, "learning_rate": 4.8213048919837694e-06, "loss": 0.7614, "step": 993 }, { "epoch": 0.4934839270199826, "grad_norm": 0.14101255565793344, "learning_rate": 4.820941846686538e-06, "loss": 0.7854, "step": 994 }, { "epoch": 0.49398038972322206, "grad_norm": 0.13656223313177246, "learning_rate": 4.820578446670559e-06, "loss": 0.7944, "step": 995 }, { "epoch": 0.49447685242646144, "grad_norm": 0.13690268510235684, "learning_rate": 4.820214691991372e-06, "loss": 0.7417, "step": 996 }, { "epoch": 0.4949733151297009, "grad_norm": 0.14131395637674096, "learning_rate": 4.81985058270457e-06, "loss": 0.798, "step": 997 }, { "epoch": 0.49546977783294033, "grad_norm": 0.13522955049735227, "learning_rate": 4.819486118865804e-06, "loss": 0.7744, "step": 998 }, { "epoch": 0.4959662405361797, "grad_norm": 0.13340560525311815, "learning_rate": 4.819121300530774e-06, "loss": 0.7248, "step": 999 }, { "epoch": 0.49646270323941916, "grad_norm": 0.14852492729160868, "learning_rate": 4.8187561277552376e-06, "loss": 0.8108, "step": 1000 }, { "epoch": 0.49695916594265854, "grad_norm": 0.14319486748419052, "learning_rate": 4.818390600595005e-06, "loss": 0.7618, "step": 1001 }, { "epoch": 0.497455628645898, "grad_norm": 0.13608599816764147, "learning_rate": 4.818024719105942e-06, "loss": 0.7732, "step": 1002 }, { "epoch": 0.49795209134913737, "grad_norm": 0.13154303448443375, "learning_rate": 4.817658483343967e-06, "loss": 0.7478, "step": 1003 }, { "epoch": 0.4984485540523768, "grad_norm": 0.13240308227842207, "learning_rate": 4.817291893365055e-06, "loss": 0.7901, "step": 1004 }, { "epoch": 0.49894501675561626, "grad_norm": 0.14667939413445374, "learning_rate": 4.816924949225231e-06, "loss": 0.7964, "step": 1005 }, { "epoch": 0.49944147945885564, "grad_norm": 0.13188227123187843, "learning_rate": 4.816557650980578e-06, "loss": 0.6981, "step": 1006 }, { "epoch": 0.4999379421620951, "grad_norm": 0.12344883892983644, "learning_rate": 4.816189998687231e-06, "loss": 0.717, "step": 1007 }, { "epoch": 0.5004344048653345, "grad_norm": 0.14130934547471188, "learning_rate": 4.81582199240138e-06, "loss": 0.7951, "step": 1008 }, { "epoch": 0.5004344048653345, "eval_loss": 0.7746918201446533, "eval_runtime": 135.3234, "eval_samples_per_second": 224.3, "eval_steps_per_second": 28.044, "step": 1008 }, { "epoch": 0.5009308675685739, "grad_norm": 0.134048946932314, "learning_rate": 4.815453632179269e-06, "loss": 0.7824, "step": 1009 }, { "epoch": 0.5014273302718133, "grad_norm": 0.13698117562535206, "learning_rate": 4.815084918077196e-06, "loss": 0.8098, "step": 1010 }, { "epoch": 0.5019237929750527, "grad_norm": 0.14425364984140046, "learning_rate": 4.8147158501515125e-06, "loss": 0.7688, "step": 1011 }, { "epoch": 0.5024202556782922, "grad_norm": 0.1332199024954632, "learning_rate": 4.814346428458624e-06, "loss": 0.7414, "step": 1012 }, { "epoch": 0.5029167183815316, "grad_norm": 0.1325832205983266, "learning_rate": 4.813976653054993e-06, "loss": 0.8369, "step": 1013 }, { "epoch": 0.503413181084771, "grad_norm": 0.13394042417772226, "learning_rate": 4.813606523997132e-06, "loss": 0.7683, "step": 1014 }, { "epoch": 0.5039096437880104, "grad_norm": 0.15382272131461908, "learning_rate": 4.81323604134161e-06, "loss": 0.7601, "step": 1015 }, { "epoch": 0.5044061064912498, "grad_norm": 0.14301524433648288, "learning_rate": 4.8128652051450485e-06, "loss": 0.7507, "step": 1016 }, { "epoch": 0.5049025691944893, "grad_norm": 0.13250662485949544, "learning_rate": 4.812494015464124e-06, "loss": 0.7627, "step": 1017 }, { "epoch": 0.5053990318977287, "grad_norm": 0.136342520846831, "learning_rate": 4.812122472355569e-06, "loss": 0.7467, "step": 1018 }, { "epoch": 0.505895494600968, "grad_norm": 0.1487816899919886, "learning_rate": 4.811750575876164e-06, "loss": 0.8047, "step": 1019 }, { "epoch": 0.5063919573042075, "grad_norm": 0.13428718634545492, "learning_rate": 4.811378326082751e-06, "loss": 0.7221, "step": 1020 }, { "epoch": 0.5068884200074469, "grad_norm": 0.1439447750897544, "learning_rate": 4.811005723032219e-06, "loss": 0.7775, "step": 1021 }, { "epoch": 0.5073848827106864, "grad_norm": 0.13831804614048945, "learning_rate": 4.810632766781519e-06, "loss": 0.7779, "step": 1022 }, { "epoch": 0.5078813454139258, "grad_norm": 0.1322159420042874, "learning_rate": 4.810259457387647e-06, "loss": 0.7703, "step": 1023 }, { "epoch": 0.5083778081171652, "grad_norm": 0.13665579834505667, "learning_rate": 4.80988579490766e-06, "loss": 0.773, "step": 1024 }, { "epoch": 0.5088742708204046, "grad_norm": 0.13881136535201452, "learning_rate": 4.809511779398665e-06, "loss": 0.7572, "step": 1025 }, { "epoch": 0.509370733523644, "grad_norm": 0.12477421280336831, "learning_rate": 4.809137410917825e-06, "loss": 0.6892, "step": 1026 }, { "epoch": 0.5098671962268835, "grad_norm": 0.1333532065223667, "learning_rate": 4.808762689522356e-06, "loss": 0.7785, "step": 1027 }, { "epoch": 0.5103636589301229, "grad_norm": 0.139072803813187, "learning_rate": 4.8083876152695285e-06, "loss": 0.7882, "step": 1028 }, { "epoch": 0.5108601216333623, "grad_norm": 0.13361520481016406, "learning_rate": 4.808012188216665e-06, "loss": 0.7488, "step": 1029 }, { "epoch": 0.5113565843366017, "grad_norm": 0.13878508065904147, "learning_rate": 4.807636408421146e-06, "loss": 0.8114, "step": 1030 }, { "epoch": 0.5118530470398411, "grad_norm": 0.13851247233308997, "learning_rate": 4.807260275940401e-06, "loss": 0.8061, "step": 1031 }, { "epoch": 0.5123495097430806, "grad_norm": 0.1373268203823735, "learning_rate": 4.806883790831918e-06, "loss": 0.7767, "step": 1032 }, { "epoch": 0.51284597244632, "grad_norm": 0.13370723442504853, "learning_rate": 4.806506953153235e-06, "loss": 0.7912, "step": 1033 }, { "epoch": 0.5133424351495594, "grad_norm": 0.1339473130089365, "learning_rate": 4.806129762961946e-06, "loss": 0.7391, "step": 1034 }, { "epoch": 0.5138388978527988, "grad_norm": 0.15400773010538693, "learning_rate": 4.805752220315699e-06, "loss": 0.7607, "step": 1035 }, { "epoch": 0.5143353605560382, "grad_norm": 0.13291573497843698, "learning_rate": 4.8053743252721954e-06, "loss": 0.7504, "step": 1036 }, { "epoch": 0.5148318232592777, "grad_norm": 0.14283555501521533, "learning_rate": 4.804996077889189e-06, "loss": 0.8104, "step": 1037 }, { "epoch": 0.5153282859625171, "grad_norm": 0.13534285097280963, "learning_rate": 4.8046174782244915e-06, "loss": 0.7844, "step": 1038 }, { "epoch": 0.5158247486657564, "grad_norm": 0.14292129810329937, "learning_rate": 4.804238526335963e-06, "loss": 0.7838, "step": 1039 }, { "epoch": 0.5163212113689959, "grad_norm": 0.13392734203551412, "learning_rate": 4.803859222281522e-06, "loss": 0.7902, "step": 1040 }, { "epoch": 0.5168176740722353, "grad_norm": 0.19974575949479675, "learning_rate": 4.803479566119138e-06, "loss": 0.7953, "step": 1041 }, { "epoch": 0.5173141367754748, "grad_norm": 0.13159644138104465, "learning_rate": 4.803099557906836e-06, "loss": 0.769, "step": 1042 }, { "epoch": 0.5178105994787141, "grad_norm": 0.1388552332573298, "learning_rate": 4.802719197702694e-06, "loss": 0.8074, "step": 1043 }, { "epoch": 0.5183070621819535, "grad_norm": 0.13679177955633, "learning_rate": 4.802338485564843e-06, "loss": 0.7777, "step": 1044 }, { "epoch": 0.518803524885193, "grad_norm": 0.13979496307917497, "learning_rate": 4.8019574215514705e-06, "loss": 0.8268, "step": 1045 }, { "epoch": 0.5192999875884324, "grad_norm": 0.12800445973569957, "learning_rate": 4.801576005720816e-06, "loss": 0.7296, "step": 1046 }, { "epoch": 0.5197964502916719, "grad_norm": 0.13332186476475139, "learning_rate": 4.801194238131171e-06, "loss": 0.7315, "step": 1047 }, { "epoch": 0.5202929129949112, "grad_norm": 0.13956881166358395, "learning_rate": 4.800812118840884e-06, "loss": 0.7848, "step": 1048 }, { "epoch": 0.5207893756981506, "grad_norm": 0.14486219818800664, "learning_rate": 4.800429647908354e-06, "loss": 0.7346, "step": 1049 }, { "epoch": 0.5212858384013901, "grad_norm": 0.13700325136931357, "learning_rate": 4.800046825392039e-06, "loss": 0.7827, "step": 1050 }, { "epoch": 0.5217823011046295, "grad_norm": 0.13238624342923797, "learning_rate": 4.799663651350444e-06, "loss": 0.737, "step": 1051 }, { "epoch": 0.522278763807869, "grad_norm": 0.1409117185449216, "learning_rate": 4.799280125842133e-06, "loss": 0.7865, "step": 1052 }, { "epoch": 0.5227752265111083, "grad_norm": 0.16712731559406424, "learning_rate": 4.79889624892572e-06, "loss": 0.7599, "step": 1053 }, { "epoch": 0.5232716892143477, "grad_norm": 0.14064208506068193, "learning_rate": 4.798512020659876e-06, "loss": 0.7839, "step": 1054 }, { "epoch": 0.5237681519175872, "grad_norm": 0.13461779032052776, "learning_rate": 4.7981274411033225e-06, "loss": 0.7552, "step": 1055 }, { "epoch": 0.5242646146208266, "grad_norm": 0.13913729985952666, "learning_rate": 4.797742510314838e-06, "loss": 0.7936, "step": 1056 }, { "epoch": 0.5247610773240661, "grad_norm": 0.136526668795062, "learning_rate": 4.797357228353252e-06, "loss": 0.7271, "step": 1057 }, { "epoch": 0.5252575400273054, "grad_norm": 0.13602099516938532, "learning_rate": 4.796971595277449e-06, "loss": 0.7623, "step": 1058 }, { "epoch": 0.5257540027305448, "grad_norm": 0.13692035663882426, "learning_rate": 4.796585611146367e-06, "loss": 0.8187, "step": 1059 }, { "epoch": 0.5262504654337843, "grad_norm": 0.13511454024474553, "learning_rate": 4.7961992760189975e-06, "loss": 0.7717, "step": 1060 }, { "epoch": 0.5267469281370237, "grad_norm": 0.13653868560447394, "learning_rate": 4.795812589954385e-06, "loss": 0.7493, "step": 1061 }, { "epoch": 0.5272433908402632, "grad_norm": 0.14284294774022338, "learning_rate": 4.795425553011629e-06, "loss": 0.7828, "step": 1062 }, { "epoch": 0.5277398535435025, "grad_norm": 0.13641287616715173, "learning_rate": 4.795038165249882e-06, "loss": 0.7575, "step": 1063 }, { "epoch": 0.5282363162467419, "grad_norm": 0.13399693271167426, "learning_rate": 4.794650426728349e-06, "loss": 0.7907, "step": 1064 }, { "epoch": 0.5287327789499814, "grad_norm": 0.13237777235979964, "learning_rate": 4.79426233750629e-06, "loss": 0.7839, "step": 1065 }, { "epoch": 0.5292292416532208, "grad_norm": 0.1390129774707514, "learning_rate": 4.793873897643019e-06, "loss": 0.7499, "step": 1066 }, { "epoch": 0.5297257043564603, "grad_norm": 0.15109478098320664, "learning_rate": 4.793485107197902e-06, "loss": 0.7653, "step": 1067 }, { "epoch": 0.5302221670596996, "grad_norm": 0.14002670175893508, "learning_rate": 4.793095966230359e-06, "loss": 0.7565, "step": 1068 }, { "epoch": 0.530718629762939, "grad_norm": 0.13739224347859277, "learning_rate": 4.792706474799865e-06, "loss": 0.698, "step": 1069 }, { "epoch": 0.5312150924661785, "grad_norm": 0.14191602937165326, "learning_rate": 4.792316632965947e-06, "loss": 0.7827, "step": 1070 }, { "epoch": 0.5317115551694179, "grad_norm": 0.13560549121655127, "learning_rate": 4.791926440788186e-06, "loss": 0.7651, "step": 1071 }, { "epoch": 0.5322080178726574, "grad_norm": 0.1476565893427292, "learning_rate": 4.791535898326217e-06, "loss": 0.7883, "step": 1072 }, { "epoch": 0.5327044805758967, "grad_norm": 0.13008136272495874, "learning_rate": 4.791145005639729e-06, "loss": 0.78, "step": 1073 }, { "epoch": 0.5332009432791361, "grad_norm": 0.1327752708528825, "learning_rate": 4.790753762788461e-06, "loss": 0.7236, "step": 1074 }, { "epoch": 0.5336974059823756, "grad_norm": 0.14247698450976287, "learning_rate": 4.79036216983221e-06, "loss": 0.7687, "step": 1075 }, { "epoch": 0.534193868685615, "grad_norm": 0.13898239220917705, "learning_rate": 4.789970226830825e-06, "loss": 0.7758, "step": 1076 }, { "epoch": 0.5346903313888545, "grad_norm": 0.13280694141514973, "learning_rate": 4.789577933844207e-06, "loss": 0.7538, "step": 1077 }, { "epoch": 0.5351867940920938, "grad_norm": 0.14265951347289108, "learning_rate": 4.7891852909323135e-06, "loss": 0.7814, "step": 1078 }, { "epoch": 0.5356832567953332, "grad_norm": 0.13154992991885675, "learning_rate": 4.788792298155152e-06, "loss": 0.7445, "step": 1079 }, { "epoch": 0.5361797194985727, "grad_norm": 0.13519381517124315, "learning_rate": 4.788398955572786e-06, "loss": 0.7487, "step": 1080 }, { "epoch": 0.5366761822018121, "grad_norm": 0.13044574075599727, "learning_rate": 4.788005263245331e-06, "loss": 0.7455, "step": 1081 }, { "epoch": 0.5371726449050515, "grad_norm": 0.13266919082399994, "learning_rate": 4.787611221232957e-06, "loss": 0.7757, "step": 1082 }, { "epoch": 0.5376691076082909, "grad_norm": 0.13376136036691982, "learning_rate": 4.787216829595887e-06, "loss": 0.7519, "step": 1083 }, { "epoch": 0.5381655703115303, "grad_norm": 0.13418604670303574, "learning_rate": 4.786822088394397e-06, "loss": 0.7926, "step": 1084 }, { "epoch": 0.5386620330147698, "grad_norm": 0.13482820857673963, "learning_rate": 4.786426997688817e-06, "loss": 0.7779, "step": 1085 }, { "epoch": 0.5391584957180092, "grad_norm": 0.130173504550406, "learning_rate": 4.786031557539532e-06, "loss": 0.717, "step": 1086 }, { "epoch": 0.5396549584212486, "grad_norm": 0.14790797249373314, "learning_rate": 4.785635768006975e-06, "loss": 0.8366, "step": 1087 }, { "epoch": 0.540151421124488, "grad_norm": 0.13839226189444587, "learning_rate": 4.78523962915164e-06, "loss": 0.7947, "step": 1088 }, { "epoch": 0.5406478838277274, "grad_norm": 0.13079738699092058, "learning_rate": 4.784843141034068e-06, "loss": 0.7289, "step": 1089 }, { "epoch": 0.5411443465309669, "grad_norm": 0.13393354009715142, "learning_rate": 4.784446303714856e-06, "loss": 0.7756, "step": 1090 }, { "epoch": 0.5416408092342063, "grad_norm": 0.14144453676659285, "learning_rate": 4.784049117254656e-06, "loss": 0.793, "step": 1091 }, { "epoch": 0.5421372719374457, "grad_norm": 0.1300964867769227, "learning_rate": 4.783651581714169e-06, "loss": 0.7597, "step": 1092 }, { "epoch": 0.5426337346406851, "grad_norm": 0.1357913073042723, "learning_rate": 4.7832536971541546e-06, "loss": 0.788, "step": 1093 }, { "epoch": 0.5431301973439245, "grad_norm": 0.13222437636600876, "learning_rate": 4.7828554636354216e-06, "loss": 0.7809, "step": 1094 }, { "epoch": 0.543626660047164, "grad_norm": 0.13258977130957175, "learning_rate": 4.782456881218834e-06, "loss": 0.7448, "step": 1095 }, { "epoch": 0.5441231227504034, "grad_norm": 0.13730375474418763, "learning_rate": 4.782057949965307e-06, "loss": 0.7784, "step": 1096 }, { "epoch": 0.5446195854536428, "grad_norm": 0.13642288559333154, "learning_rate": 4.781658669935813e-06, "loss": 0.781, "step": 1097 }, { "epoch": 0.5451160481568822, "grad_norm": 0.1401061103174583, "learning_rate": 4.7812590411913755e-06, "loss": 0.7655, "step": 1098 }, { "epoch": 0.5456125108601216, "grad_norm": 0.13382843638430478, "learning_rate": 4.780859063793071e-06, "loss": 0.761, "step": 1099 }, { "epoch": 0.5461089735633611, "grad_norm": 0.13467015132419832, "learning_rate": 4.780458737802028e-06, "loss": 0.8425, "step": 1100 }, { "epoch": 0.5466054362666005, "grad_norm": 0.13538805042285004, "learning_rate": 4.780058063279432e-06, "loss": 0.8023, "step": 1101 }, { "epoch": 0.5471018989698399, "grad_norm": 0.13332827992090326, "learning_rate": 4.779657040286519e-06, "loss": 0.7454, "step": 1102 }, { "epoch": 0.5475983616730793, "grad_norm": 0.13714991044262492, "learning_rate": 4.779255668884579e-06, "loss": 0.7781, "step": 1103 }, { "epoch": 0.5480948243763187, "grad_norm": 0.13230879275054402, "learning_rate": 4.778853949134956e-06, "loss": 0.7512, "step": 1104 }, { "epoch": 0.5485912870795582, "grad_norm": 0.13456603648064147, "learning_rate": 4.778451881099045e-06, "loss": 0.8209, "step": 1105 }, { "epoch": 0.5490877497827976, "grad_norm": 0.1308167779233721, "learning_rate": 4.7780494648382955e-06, "loss": 0.764, "step": 1106 }, { "epoch": 0.549584212486037, "grad_norm": 0.16101308796343475, "learning_rate": 4.777646700414211e-06, "loss": 0.7786, "step": 1107 }, { "epoch": 0.5500806751892764, "grad_norm": 0.1459790001808385, "learning_rate": 4.777243587888348e-06, "loss": 0.7868, "step": 1108 }, { "epoch": 0.5505771378925158, "grad_norm": 0.13657235299783674, "learning_rate": 4.776840127322316e-06, "loss": 0.8113, "step": 1109 }, { "epoch": 0.5510736005957553, "grad_norm": 0.14294798523667074, "learning_rate": 4.7764363187777765e-06, "loss": 0.7649, "step": 1110 }, { "epoch": 0.5515700632989947, "grad_norm": 0.13846612184838433, "learning_rate": 4.776032162316445e-06, "loss": 0.7307, "step": 1111 }, { "epoch": 0.552066526002234, "grad_norm": 0.14690601721943153, "learning_rate": 4.775627658000091e-06, "loss": 0.7695, "step": 1112 }, { "epoch": 0.5525629887054735, "grad_norm": 0.14581794719191593, "learning_rate": 4.775222805890537e-06, "loss": 0.8154, "step": 1113 }, { "epoch": 0.5530594514087129, "grad_norm": 0.14444888719097665, "learning_rate": 4.7748176060496574e-06, "loss": 0.8383, "step": 1114 }, { "epoch": 0.5535559141119524, "grad_norm": 0.13254092300237755, "learning_rate": 4.77441205853938e-06, "loss": 0.8386, "step": 1115 }, { "epoch": 0.5540523768151917, "grad_norm": 0.1347220636449837, "learning_rate": 4.774006163421687e-06, "loss": 0.752, "step": 1116 }, { "epoch": 0.5545488395184311, "grad_norm": 0.13548044148354343, "learning_rate": 4.773599920758614e-06, "loss": 0.7409, "step": 1117 }, { "epoch": 0.5550453022216706, "grad_norm": 0.14102047024443465, "learning_rate": 4.773193330612246e-06, "loss": 0.7561, "step": 1118 }, { "epoch": 0.55554176492491, "grad_norm": 0.13374789735214362, "learning_rate": 4.772786393044726e-06, "loss": 0.7837, "step": 1119 }, { "epoch": 0.5560382276281495, "grad_norm": 0.13483434548142853, "learning_rate": 4.772379108118247e-06, "loss": 0.7457, "step": 1120 }, { "epoch": 0.5565346903313888, "grad_norm": 0.13493609143971075, "learning_rate": 4.7719714758950565e-06, "loss": 0.7414, "step": 1121 }, { "epoch": 0.5570311530346282, "grad_norm": 0.14080955544969456, "learning_rate": 4.771563496437454e-06, "loss": 0.7954, "step": 1122 }, { "epoch": 0.5575276157378677, "grad_norm": 0.15315063885453276, "learning_rate": 4.771155169807793e-06, "loss": 0.787, "step": 1123 }, { "epoch": 0.5580240784411071, "grad_norm": 0.1343038161486333, "learning_rate": 4.770746496068479e-06, "loss": 0.7636, "step": 1124 }, { "epoch": 0.5585205411443466, "grad_norm": 0.1323279813259488, "learning_rate": 4.770337475281972e-06, "loss": 0.7246, "step": 1125 }, { "epoch": 0.5590170038475859, "grad_norm": 0.12987821538488584, "learning_rate": 4.769928107510784e-06, "loss": 0.753, "step": 1126 }, { "epoch": 0.5595134665508253, "grad_norm": 0.14121741134888693, "learning_rate": 4.7695183928174804e-06, "loss": 0.7721, "step": 1127 }, { "epoch": 0.5600099292540648, "grad_norm": 0.13095267808109395, "learning_rate": 4.76910833126468e-06, "loss": 0.7727, "step": 1128 }, { "epoch": 0.5605063919573042, "grad_norm": 0.13160852813062984, "learning_rate": 4.768697922915053e-06, "loss": 0.7499, "step": 1129 }, { "epoch": 0.5610028546605437, "grad_norm": 0.13131472336248304, "learning_rate": 4.768287167831323e-06, "loss": 0.7271, "step": 1130 }, { "epoch": 0.561499317363783, "grad_norm": 0.14344145034960104, "learning_rate": 4.767876066076271e-06, "loss": 0.8335, "step": 1131 }, { "epoch": 0.5619957800670224, "grad_norm": 0.13562056297798028, "learning_rate": 4.7674646177127236e-06, "loss": 0.8303, "step": 1132 }, { "epoch": 0.5624922427702619, "grad_norm": 0.1397414253999263, "learning_rate": 4.767052822803565e-06, "loss": 0.7954, "step": 1133 }, { "epoch": 0.5629887054735013, "grad_norm": 0.13474425151028716, "learning_rate": 4.7666406814117324e-06, "loss": 0.7554, "step": 1134 }, { "epoch": 0.5634851681767408, "grad_norm": 0.13073363088146278, "learning_rate": 4.7662281936002155e-06, "loss": 0.7659, "step": 1135 }, { "epoch": 0.5639816308799801, "grad_norm": 0.13266775105459727, "learning_rate": 4.765815359432054e-06, "loss": 0.8103, "step": 1136 }, { "epoch": 0.5644780935832195, "grad_norm": 0.14135466000280938, "learning_rate": 4.765402178970345e-06, "loss": 0.7477, "step": 1137 }, { "epoch": 0.564974556286459, "grad_norm": 0.1364663264168024, "learning_rate": 4.764988652278235e-06, "loss": 0.7801, "step": 1138 }, { "epoch": 0.5654710189896984, "grad_norm": 0.133069946323726, "learning_rate": 4.764574779418927e-06, "loss": 0.7814, "step": 1139 }, { "epoch": 0.5659674816929379, "grad_norm": 0.13667566517665522, "learning_rate": 4.7641605604556725e-06, "loss": 0.7193, "step": 1140 }, { "epoch": 0.5664639443961772, "grad_norm": 0.14023909922018574, "learning_rate": 4.763745995451781e-06, "loss": 0.7794, "step": 1141 }, { "epoch": 0.5669604070994166, "grad_norm": 0.1343364576457988, "learning_rate": 4.763331084470609e-06, "loss": 0.7651, "step": 1142 }, { "epoch": 0.5674568698026561, "grad_norm": 0.13304454158801782, "learning_rate": 4.762915827575571e-06, "loss": 0.741, "step": 1143 }, { "epoch": 0.5679533325058955, "grad_norm": 0.13140270388519174, "learning_rate": 4.762500224830132e-06, "loss": 0.7353, "step": 1144 }, { "epoch": 0.568449795209135, "grad_norm": 0.13074327953386966, "learning_rate": 4.762084276297811e-06, "loss": 0.765, "step": 1145 }, { "epoch": 0.5689462579123743, "grad_norm": 0.13281986443064125, "learning_rate": 4.761667982042176e-06, "loss": 0.7372, "step": 1146 }, { "epoch": 0.5694427206156137, "grad_norm": 0.13419581835055136, "learning_rate": 4.7612513421268546e-06, "loss": 0.7767, "step": 1147 }, { "epoch": 0.5699391833188532, "grad_norm": 0.13612919993671027, "learning_rate": 4.760834356615521e-06, "loss": 0.7834, "step": 1148 }, { "epoch": 0.5704356460220926, "grad_norm": 0.13693184772625464, "learning_rate": 4.760417025571907e-06, "loss": 0.7544, "step": 1149 }, { "epoch": 0.570932108725332, "grad_norm": 0.13395312495200518, "learning_rate": 4.759999349059793e-06, "loss": 0.7411, "step": 1150 }, { "epoch": 0.5714285714285714, "grad_norm": 0.1481697018642554, "learning_rate": 4.759581327143015e-06, "loss": 0.7968, "step": 1151 }, { "epoch": 0.5719250341318108, "grad_norm": 0.1371234108294726, "learning_rate": 4.7591629598854595e-06, "loss": 0.8194, "step": 1152 }, { "epoch": 0.5724214968350503, "grad_norm": 0.13604199584938104, "learning_rate": 4.7587442473510705e-06, "loss": 0.7528, "step": 1153 }, { "epoch": 0.5729179595382897, "grad_norm": 0.1346379831014206, "learning_rate": 4.758325189603838e-06, "loss": 0.7171, "step": 1154 }, { "epoch": 0.573414422241529, "grad_norm": 0.14237506813976503, "learning_rate": 4.757905786707811e-06, "loss": 0.7842, "step": 1155 }, { "epoch": 0.5739108849447685, "grad_norm": 0.13403550334730185, "learning_rate": 4.757486038727086e-06, "loss": 0.7931, "step": 1156 }, { "epoch": 0.5744073476480079, "grad_norm": 0.135231376685361, "learning_rate": 4.757065945725816e-06, "loss": 0.7154, "step": 1157 }, { "epoch": 0.5749038103512474, "grad_norm": 0.13684138959608425, "learning_rate": 4.756645507768207e-06, "loss": 0.7383, "step": 1158 }, { "epoch": 0.5754002730544868, "grad_norm": 0.1460330953509727, "learning_rate": 4.756224724918513e-06, "loss": 0.7872, "step": 1159 }, { "epoch": 0.5758967357577262, "grad_norm": 0.1318765034689836, "learning_rate": 4.755803597241047e-06, "loss": 0.768, "step": 1160 }, { "epoch": 0.5763931984609656, "grad_norm": 0.13719986250467792, "learning_rate": 4.755382124800169e-06, "loss": 0.7453, "step": 1161 }, { "epoch": 0.576889661164205, "grad_norm": 0.14235485272473405, "learning_rate": 4.754960307660296e-06, "loss": 0.7821, "step": 1162 }, { "epoch": 0.5773861238674445, "grad_norm": 0.1356872066401077, "learning_rate": 4.754538145885896e-06, "loss": 0.7501, "step": 1163 }, { "epoch": 0.5778825865706839, "grad_norm": 0.13978700563928276, "learning_rate": 4.754115639541489e-06, "loss": 0.7891, "step": 1164 }, { "epoch": 0.5783790492739233, "grad_norm": 0.1386484989822081, "learning_rate": 4.7536927886916486e-06, "loss": 0.8111, "step": 1165 }, { "epoch": 0.5788755119771627, "grad_norm": 0.1352803729379626, "learning_rate": 4.753269593401e-06, "loss": 0.7986, "step": 1166 }, { "epoch": 0.5793719746804021, "grad_norm": 0.14338323635053893, "learning_rate": 4.752846053734223e-06, "loss": 0.8083, "step": 1167 }, { "epoch": 0.5798684373836416, "grad_norm": 0.14290830558032838, "learning_rate": 4.752422169756048e-06, "loss": 0.7245, "step": 1168 }, { "epoch": 0.580364900086881, "grad_norm": 0.14435662586254663, "learning_rate": 4.7519979415312595e-06, "loss": 0.7937, "step": 1169 }, { "epoch": 0.5808613627901204, "grad_norm": 0.1301445915669164, "learning_rate": 4.751573369124693e-06, "loss": 0.7629, "step": 1170 }, { "epoch": 0.5813578254933598, "grad_norm": 0.14135646579813632, "learning_rate": 4.751148452601239e-06, "loss": 0.8281, "step": 1171 }, { "epoch": 0.5818542881965992, "grad_norm": 0.14137941871842752, "learning_rate": 4.750723192025839e-06, "loss": 0.751, "step": 1172 }, { "epoch": 0.5823507508998387, "grad_norm": 0.13846670183254522, "learning_rate": 4.750297587463486e-06, "loss": 0.8034, "step": 1173 }, { "epoch": 0.5828472136030781, "grad_norm": 0.13325672340874672, "learning_rate": 4.749871638979227e-06, "loss": 0.6927, "step": 1174 }, { "epoch": 0.5833436763063174, "grad_norm": 0.125743329419156, "learning_rate": 4.749445346638163e-06, "loss": 0.7256, "step": 1175 }, { "epoch": 0.5838401390095569, "grad_norm": 0.1417089701135048, "learning_rate": 4.749018710505444e-06, "loss": 0.7831, "step": 1176 }, { "epoch": 0.5843366017127963, "grad_norm": 0.13605194542345714, "learning_rate": 4.748591730646276e-06, "loss": 0.8015, "step": 1177 }, { "epoch": 0.5848330644160358, "grad_norm": 0.13481999719592982, "learning_rate": 4.748164407125915e-06, "loss": 0.7615, "step": 1178 }, { "epoch": 0.5853295271192752, "grad_norm": 0.14186151175015785, "learning_rate": 4.747736740009671e-06, "loss": 0.764, "step": 1179 }, { "epoch": 0.5858259898225145, "grad_norm": 0.14184400720811477, "learning_rate": 4.747308729362906e-06, "loss": 0.7587, "step": 1180 }, { "epoch": 0.586322452525754, "grad_norm": 0.1385901962211324, "learning_rate": 4.746880375251034e-06, "loss": 0.7336, "step": 1181 }, { "epoch": 0.5868189152289934, "grad_norm": 0.13451407782163222, "learning_rate": 4.7464516777395234e-06, "loss": 0.7429, "step": 1182 }, { "epoch": 0.5873153779322329, "grad_norm": 0.13022215035798407, "learning_rate": 4.746022636893894e-06, "loss": 0.7643, "step": 1183 }, { "epoch": 0.5878118406354722, "grad_norm": 0.13690715708703163, "learning_rate": 4.745593252779715e-06, "loss": 0.8217, "step": 1184 }, { "epoch": 0.5883083033387116, "grad_norm": 0.13748468118288174, "learning_rate": 4.745163525462613e-06, "loss": 0.7703, "step": 1185 }, { "epoch": 0.5888047660419511, "grad_norm": 0.1405973365721223, "learning_rate": 4.744733455008265e-06, "loss": 0.7383, "step": 1186 }, { "epoch": 0.5893012287451905, "grad_norm": 0.13717194937849955, "learning_rate": 4.7443030414824e-06, "loss": 0.7589, "step": 1187 }, { "epoch": 0.58979769144843, "grad_norm": 0.13528229246468063, "learning_rate": 4.743872284950799e-06, "loss": 0.75, "step": 1188 }, { "epoch": 0.5902941541516693, "grad_norm": 0.133425574497451, "learning_rate": 4.743441185479298e-06, "loss": 0.7411, "step": 1189 }, { "epoch": 0.5907906168549087, "grad_norm": 0.1324925987986178, "learning_rate": 4.743009743133782e-06, "loss": 0.7685, "step": 1190 }, { "epoch": 0.5912870795581482, "grad_norm": 0.1369066542388308, "learning_rate": 4.742577957980191e-06, "loss": 0.7832, "step": 1191 }, { "epoch": 0.5917835422613876, "grad_norm": 0.13896137808156944, "learning_rate": 4.7421458300845156e-06, "loss": 0.7763, "step": 1192 }, { "epoch": 0.5922800049646271, "grad_norm": 0.13714450419418717, "learning_rate": 4.7417133595128e-06, "loss": 0.7755, "step": 1193 }, { "epoch": 0.5927764676678664, "grad_norm": 0.13866795158105188, "learning_rate": 4.741280546331142e-06, "loss": 0.7525, "step": 1194 }, { "epoch": 0.5932729303711058, "grad_norm": 0.13821511176421178, "learning_rate": 4.740847390605688e-06, "loss": 0.7808, "step": 1195 }, { "epoch": 0.5937693930743453, "grad_norm": 0.1380634473432959, "learning_rate": 4.740413892402639e-06, "loss": 0.7547, "step": 1196 }, { "epoch": 0.5942658557775847, "grad_norm": 0.138297189072512, "learning_rate": 4.73998005178825e-06, "loss": 0.7983, "step": 1197 }, { "epoch": 0.5947623184808242, "grad_norm": 0.13758909195503097, "learning_rate": 4.739545868828824e-06, "loss": 0.7437, "step": 1198 }, { "epoch": 0.5952587811840635, "grad_norm": 0.13643426919261928, "learning_rate": 4.739111343590722e-06, "loss": 0.7617, "step": 1199 }, { "epoch": 0.5957552438873029, "grad_norm": 0.13419264066031278, "learning_rate": 4.7386764761403515e-06, "loss": 0.7577, "step": 1200 }, { "epoch": 0.5962517065905424, "grad_norm": 0.1458962881146663, "learning_rate": 4.738241266544176e-06, "loss": 0.7542, "step": 1201 }, { "epoch": 0.5967481692937818, "grad_norm": 0.13326276596473177, "learning_rate": 4.737805714868711e-06, "loss": 0.7639, "step": 1202 }, { "epoch": 0.5972446319970213, "grad_norm": 0.1327575012050733, "learning_rate": 4.737369821180522e-06, "loss": 0.7815, "step": 1203 }, { "epoch": 0.5977410947002606, "grad_norm": 0.13976077843344958, "learning_rate": 4.736933585546229e-06, "loss": 0.771, "step": 1204 }, { "epoch": 0.5982375574035, "grad_norm": 0.1448357997256506, "learning_rate": 4.736497008032505e-06, "loss": 0.797, "step": 1205 }, { "epoch": 0.5987340201067395, "grad_norm": 0.14152292832314567, "learning_rate": 4.7360600887060735e-06, "loss": 0.8046, "step": 1206 }, { "epoch": 0.5992304828099789, "grad_norm": 0.13983648625283432, "learning_rate": 4.735622827633709e-06, "loss": 0.7767, "step": 1207 }, { "epoch": 0.5997269455132184, "grad_norm": 0.13723445239630844, "learning_rate": 4.7351852248822405e-06, "loss": 0.7898, "step": 1208 }, { "epoch": 0.6002234082164577, "grad_norm": 0.14040576284527934, "learning_rate": 4.734747280518549e-06, "loss": 0.7507, "step": 1209 }, { "epoch": 0.6007198709196971, "grad_norm": 0.13281149215833118, "learning_rate": 4.734308994609568e-06, "loss": 0.7344, "step": 1210 }, { "epoch": 0.6012163336229366, "grad_norm": 0.14696680910440754, "learning_rate": 4.73387036722228e-06, "loss": 0.8057, "step": 1211 }, { "epoch": 0.601712796326176, "grad_norm": 0.14656238080300293, "learning_rate": 4.733431398423725e-06, "loss": 0.7715, "step": 1212 }, { "epoch": 0.6022092590294155, "grad_norm": 0.13640813199119411, "learning_rate": 4.732992088280991e-06, "loss": 0.7887, "step": 1213 }, { "epoch": 0.6027057217326548, "grad_norm": 0.14416087368058042, "learning_rate": 4.73255243686122e-06, "loss": 0.7845, "step": 1214 }, { "epoch": 0.6032021844358942, "grad_norm": 0.1448460344463032, "learning_rate": 4.732112444231604e-06, "loss": 0.7858, "step": 1215 }, { "epoch": 0.6036986471391337, "grad_norm": 0.14284369655429804, "learning_rate": 4.731672110459391e-06, "loss": 0.7595, "step": 1216 }, { "epoch": 0.6041951098423731, "grad_norm": 0.13333186060643346, "learning_rate": 4.7312314356118774e-06, "loss": 0.7787, "step": 1217 }, { "epoch": 0.6046915725456126, "grad_norm": 0.13448997562242113, "learning_rate": 4.7307904197564146e-06, "loss": 0.7907, "step": 1218 }, { "epoch": 0.6051880352488519, "grad_norm": 0.13856754170442995, "learning_rate": 4.730349062960405e-06, "loss": 0.703, "step": 1219 }, { "epoch": 0.6056844979520913, "grad_norm": 0.13197537382149402, "learning_rate": 4.7299073652912995e-06, "loss": 0.7698, "step": 1220 }, { "epoch": 0.6061809606553308, "grad_norm": 0.1308991610627273, "learning_rate": 4.729465326816609e-06, "loss": 0.7331, "step": 1221 }, { "epoch": 0.6066774233585702, "grad_norm": 0.13827465076211878, "learning_rate": 4.72902294760389e-06, "loss": 0.8255, "step": 1222 }, { "epoch": 0.6071738860618096, "grad_norm": 0.14152077690894366, "learning_rate": 4.7285802277207525e-06, "loss": 0.7757, "step": 1223 }, { "epoch": 0.607670348765049, "grad_norm": 0.13327976935901228, "learning_rate": 4.72813716723486e-06, "loss": 0.7834, "step": 1224 }, { "epoch": 0.6081668114682884, "grad_norm": 0.14560592025153601, "learning_rate": 4.727693766213927e-06, "loss": 0.8553, "step": 1225 }, { "epoch": 0.6086632741715279, "grad_norm": 0.139357875506195, "learning_rate": 4.72725002472572e-06, "loss": 0.7334, "step": 1226 }, { "epoch": 0.6091597368747673, "grad_norm": 0.1414789031418474, "learning_rate": 4.726805942838058e-06, "loss": 0.7975, "step": 1227 }, { "epoch": 0.6096561995780067, "grad_norm": 0.13771948814756751, "learning_rate": 4.726361520618812e-06, "loss": 0.738, "step": 1228 }, { "epoch": 0.6101526622812461, "grad_norm": 0.12954626047367315, "learning_rate": 4.725916758135905e-06, "loss": 0.7446, "step": 1229 }, { "epoch": 0.6106491249844855, "grad_norm": 0.1321303236295978, "learning_rate": 4.72547165545731e-06, "loss": 0.7225, "step": 1230 }, { "epoch": 0.611145587687725, "grad_norm": 0.13934783137333412, "learning_rate": 4.725026212651056e-06, "loss": 0.7504, "step": 1231 }, { "epoch": 0.6116420503909644, "grad_norm": 0.1387209464213516, "learning_rate": 4.72458042978522e-06, "loss": 0.779, "step": 1232 }, { "epoch": 0.6121385130942038, "grad_norm": 0.1417563704806412, "learning_rate": 4.7241343069279355e-06, "loss": 0.7727, "step": 1233 }, { "epoch": 0.6126349757974432, "grad_norm": 0.1420634383737698, "learning_rate": 4.723687844147383e-06, "loss": 0.8045, "step": 1234 }, { "epoch": 0.6131314385006826, "grad_norm": 0.142217432211696, "learning_rate": 4.723241041511797e-06, "loss": 0.7966, "step": 1235 }, { "epoch": 0.6136279012039221, "grad_norm": 0.13955237043987861, "learning_rate": 4.722793899089465e-06, "loss": 0.763, "step": 1236 }, { "epoch": 0.6141243639071615, "grad_norm": 0.1383129773036087, "learning_rate": 4.7223464169487255e-06, "loss": 0.7865, "step": 1237 }, { "epoch": 0.6146208266104008, "grad_norm": 0.1318995281229908, "learning_rate": 4.721898595157969e-06, "loss": 0.7592, "step": 1238 }, { "epoch": 0.6151172893136403, "grad_norm": 0.13409755823637834, "learning_rate": 4.721450433785637e-06, "loss": 0.7349, "step": 1239 }, { "epoch": 0.6156137520168797, "grad_norm": 0.13477979687672653, "learning_rate": 4.721001932900224e-06, "loss": 0.7168, "step": 1240 }, { "epoch": 0.6161102147201192, "grad_norm": 0.1339375435013451, "learning_rate": 4.720553092570278e-06, "loss": 0.7679, "step": 1241 }, { "epoch": 0.6166066774233586, "grad_norm": 0.13714226350401248, "learning_rate": 4.720103912864395e-06, "loss": 0.8179, "step": 1242 }, { "epoch": 0.617103140126598, "grad_norm": 0.1495165323744844, "learning_rate": 4.719654393851225e-06, "loss": 0.8059, "step": 1243 }, { "epoch": 0.6175996028298374, "grad_norm": 0.13210522151583382, "learning_rate": 4.719204535599472e-06, "loss": 0.7327, "step": 1244 }, { "epoch": 0.6180960655330768, "grad_norm": 0.135327563201833, "learning_rate": 4.718754338177887e-06, "loss": 0.7884, "step": 1245 }, { "epoch": 0.6185925282363163, "grad_norm": 0.1493290671655753, "learning_rate": 4.7183038016552765e-06, "loss": 0.8058, "step": 1246 }, { "epoch": 0.6190889909395557, "grad_norm": 0.13731834920199315, "learning_rate": 4.717852926100497e-06, "loss": 0.7525, "step": 1247 }, { "epoch": 0.619585453642795, "grad_norm": 0.13121903632834106, "learning_rate": 4.717401711582459e-06, "loss": 0.748, "step": 1248 }, { "epoch": 0.6200819163460345, "grad_norm": 0.13357953738588796, "learning_rate": 4.716950158170123e-06, "loss": 0.7191, "step": 1249 }, { "epoch": 0.6205783790492739, "grad_norm": 0.1333298023226559, "learning_rate": 4.716498265932501e-06, "loss": 0.7536, "step": 1250 }, { "epoch": 0.6210748417525134, "grad_norm": 0.13600946084924598, "learning_rate": 4.7160460349386575e-06, "loss": 0.7327, "step": 1251 }, { "epoch": 0.6215713044557528, "grad_norm": 0.15789147253069744, "learning_rate": 4.7155934652577095e-06, "loss": 0.7542, "step": 1252 }, { "epoch": 0.6220677671589921, "grad_norm": 0.14117884230963118, "learning_rate": 4.7151405569588245e-06, "loss": 0.7566, "step": 1253 }, { "epoch": 0.6225642298622316, "grad_norm": 0.1434269696587236, "learning_rate": 4.714687310111224e-06, "loss": 0.7661, "step": 1254 }, { "epoch": 0.623060692565471, "grad_norm": 0.13233837304493712, "learning_rate": 4.714233724784176e-06, "loss": 0.7387, "step": 1255 }, { "epoch": 0.6235571552687105, "grad_norm": 0.13451446763955208, "learning_rate": 4.713779801047006e-06, "loss": 0.7593, "step": 1256 }, { "epoch": 0.6240536179719498, "grad_norm": 0.14045733263477267, "learning_rate": 4.713325538969089e-06, "loss": 0.7588, "step": 1257 }, { "epoch": 0.6245500806751892, "grad_norm": 0.13811994049159718, "learning_rate": 4.7128709386198516e-06, "loss": 0.7849, "step": 1258 }, { "epoch": 0.6250465433784287, "grad_norm": 0.13661650047767832, "learning_rate": 4.712416000068771e-06, "loss": 0.7871, "step": 1259 }, { "epoch": 0.6255430060816681, "grad_norm": 0.13356087506731615, "learning_rate": 4.7119607233853795e-06, "loss": 0.6997, "step": 1260 }, { "epoch": 0.6260394687849076, "grad_norm": 0.14326475290125062, "learning_rate": 4.7115051086392575e-06, "loss": 0.7913, "step": 1261 }, { "epoch": 0.6265359314881469, "grad_norm": 0.14111338297951223, "learning_rate": 4.711049155900037e-06, "loss": 0.775, "step": 1262 }, { "epoch": 0.6270323941913863, "grad_norm": 0.1384448111488101, "learning_rate": 4.710592865237406e-06, "loss": 0.7453, "step": 1263 }, { "epoch": 0.6275288568946258, "grad_norm": 0.13688056535276805, "learning_rate": 4.710136236721099e-06, "loss": 0.7835, "step": 1264 }, { "epoch": 0.6280253195978652, "grad_norm": 0.1369014193152322, "learning_rate": 4.709679270420905e-06, "loss": 0.7708, "step": 1265 }, { "epoch": 0.6285217823011047, "grad_norm": 0.13619728251726942, "learning_rate": 4.709221966406664e-06, "loss": 0.7593, "step": 1266 }, { "epoch": 0.629018245004344, "grad_norm": 0.13341268255654185, "learning_rate": 4.7087643247482675e-06, "loss": 0.7857, "step": 1267 }, { "epoch": 0.6295147077075834, "grad_norm": 0.1396936626784199, "learning_rate": 4.70830634551566e-06, "loss": 0.7934, "step": 1268 }, { "epoch": 0.6300111704108229, "grad_norm": 0.13784257345636344, "learning_rate": 4.7078480287788335e-06, "loss": 0.8105, "step": 1269 }, { "epoch": 0.6305076331140623, "grad_norm": 0.14024550205782157, "learning_rate": 4.707389374607837e-06, "loss": 0.7744, "step": 1270 }, { "epoch": 0.6310040958173018, "grad_norm": 0.1331894256857835, "learning_rate": 4.7069303830727665e-06, "loss": 0.7454, "step": 1271 }, { "epoch": 0.6315005585205411, "grad_norm": 0.13774853065612155, "learning_rate": 4.706471054243773e-06, "loss": 0.7572, "step": 1272 }, { "epoch": 0.6319970212237805, "grad_norm": 0.13274604497668133, "learning_rate": 4.7060113881910565e-06, "loss": 0.8029, "step": 1273 }, { "epoch": 0.63249348392702, "grad_norm": 0.13007067612592235, "learning_rate": 4.705551384984871e-06, "loss": 0.7329, "step": 1274 }, { "epoch": 0.6329899466302594, "grad_norm": 0.1348605302563776, "learning_rate": 4.705091044695519e-06, "loss": 0.7854, "step": 1275 }, { "epoch": 0.6334864093334989, "grad_norm": 0.13943981892113053, "learning_rate": 4.7046303673933566e-06, "loss": 0.7621, "step": 1276 }, { "epoch": 0.6339828720367382, "grad_norm": 0.13080686345224168, "learning_rate": 4.7041693531487905e-06, "loss": 0.7511, "step": 1277 }, { "epoch": 0.6344793347399776, "grad_norm": 0.13367213440770978, "learning_rate": 4.703708002032281e-06, "loss": 0.8208, "step": 1278 }, { "epoch": 0.6349757974432171, "grad_norm": 0.1338922082718588, "learning_rate": 4.703246314114337e-06, "loss": 0.7583, "step": 1279 }, { "epoch": 0.6354722601464565, "grad_norm": 0.13384133583383315, "learning_rate": 4.702784289465521e-06, "loss": 0.7247, "step": 1280 }, { "epoch": 0.635968722849696, "grad_norm": 0.14316701050668065, "learning_rate": 4.702321928156443e-06, "loss": 0.7769, "step": 1281 }, { "epoch": 0.6364651855529353, "grad_norm": 0.1363830874610169, "learning_rate": 4.701859230257772e-06, "loss": 0.7508, "step": 1282 }, { "epoch": 0.6369616482561747, "grad_norm": 0.13044310663712724, "learning_rate": 4.701396195840221e-06, "loss": 0.6918, "step": 1283 }, { "epoch": 0.6374581109594142, "grad_norm": 0.14799662097345562, "learning_rate": 4.700932824974558e-06, "loss": 0.8122, "step": 1284 }, { "epoch": 0.6379545736626536, "grad_norm": 0.13719379503307672, "learning_rate": 4.700469117731602e-06, "loss": 0.7589, "step": 1285 }, { "epoch": 0.6384510363658931, "grad_norm": 0.15005736439386755, "learning_rate": 4.700005074182223e-06, "loss": 0.7942, "step": 1286 }, { "epoch": 0.6389474990691324, "grad_norm": 0.13361765649422278, "learning_rate": 4.699540694397344e-06, "loss": 0.7418, "step": 1287 }, { "epoch": 0.6394439617723718, "grad_norm": 0.12927575638530414, "learning_rate": 4.699075978447936e-06, "loss": 0.7314, "step": 1288 }, { "epoch": 0.6399404244756113, "grad_norm": 0.13136950718329457, "learning_rate": 4.698610926405024e-06, "loss": 0.7591, "step": 1289 }, { "epoch": 0.6404368871788507, "grad_norm": 0.1404280522487197, "learning_rate": 4.6981455383396845e-06, "loss": 0.8238, "step": 1290 }, { "epoch": 0.64093334988209, "grad_norm": 0.1303989293917867, "learning_rate": 4.697679814323044e-06, "loss": 0.7489, "step": 1291 }, { "epoch": 0.6414298125853295, "grad_norm": 0.13338423096237687, "learning_rate": 4.69721375442628e-06, "loss": 0.7306, "step": 1292 }, { "epoch": 0.6419262752885689, "grad_norm": 0.13305102973746538, "learning_rate": 4.696747358720624e-06, "loss": 0.7886, "step": 1293 }, { "epoch": 0.6424227379918084, "grad_norm": 0.13820820437935433, "learning_rate": 4.696280627277356e-06, "loss": 0.7081, "step": 1294 }, { "epoch": 0.6429192006950478, "grad_norm": 0.13801203340059798, "learning_rate": 4.695813560167809e-06, "loss": 0.7402, "step": 1295 }, { "epoch": 0.6434156633982872, "grad_norm": 0.13519623515883733, "learning_rate": 4.695346157463367e-06, "loss": 0.7847, "step": 1296 }, { "epoch": 0.6439121261015266, "grad_norm": 0.13214986252403507, "learning_rate": 4.6948784192354645e-06, "loss": 0.7734, "step": 1297 }, { "epoch": 0.644408588804766, "grad_norm": 0.1378462366811087, "learning_rate": 4.694410345555588e-06, "loss": 0.6981, "step": 1298 }, { "epoch": 0.6449050515080055, "grad_norm": 0.1366789326165814, "learning_rate": 4.6939419364952734e-06, "loss": 0.7557, "step": 1299 }, { "epoch": 0.6454015142112449, "grad_norm": 0.14607757273947794, "learning_rate": 4.693473192126112e-06, "loss": 0.7782, "step": 1300 }, { "epoch": 0.6458979769144843, "grad_norm": 0.13866618330491343, "learning_rate": 4.693004112519743e-06, "loss": 0.7827, "step": 1301 }, { "epoch": 0.6463944396177237, "grad_norm": 0.13908697250264154, "learning_rate": 4.692534697747858e-06, "loss": 0.7945, "step": 1302 }, { "epoch": 0.6468909023209631, "grad_norm": 0.13233872664417434, "learning_rate": 4.692064947882198e-06, "loss": 0.786, "step": 1303 }, { "epoch": 0.6473873650242026, "grad_norm": 0.13187870756426126, "learning_rate": 4.6915948629945585e-06, "loss": 0.6941, "step": 1304 }, { "epoch": 0.647883827727442, "grad_norm": 0.13132502035875618, "learning_rate": 4.691124443156784e-06, "loss": 0.7943, "step": 1305 }, { "epoch": 0.6483802904306813, "grad_norm": 0.1473223707369921, "learning_rate": 4.690653688440769e-06, "loss": 0.758, "step": 1306 }, { "epoch": 0.6488767531339208, "grad_norm": 0.1374434003262379, "learning_rate": 4.6901825989184634e-06, "loss": 0.7477, "step": 1307 }, { "epoch": 0.6493732158371602, "grad_norm": 0.13383271780435643, "learning_rate": 4.689711174661864e-06, "loss": 0.7669, "step": 1308 }, { "epoch": 0.6498696785403997, "grad_norm": 0.13444066067651259, "learning_rate": 4.689239415743021e-06, "loss": 0.7757, "step": 1309 }, { "epoch": 0.6503661412436391, "grad_norm": 0.12857578926705554, "learning_rate": 4.688767322234035e-06, "loss": 0.7442, "step": 1310 }, { "epoch": 0.6508626039468784, "grad_norm": 0.13406381840115994, "learning_rate": 4.688294894207058e-06, "loss": 0.7598, "step": 1311 }, { "epoch": 0.6513590666501179, "grad_norm": 0.14039516549020117, "learning_rate": 4.687822131734293e-06, "loss": 0.7842, "step": 1312 }, { "epoch": 0.6518555293533573, "grad_norm": 0.1374250686491604, "learning_rate": 4.687349034887994e-06, "loss": 0.7636, "step": 1313 }, { "epoch": 0.6523519920565968, "grad_norm": 0.13288394681388876, "learning_rate": 4.686875603740467e-06, "loss": 0.7648, "step": 1314 }, { "epoch": 0.6528484547598362, "grad_norm": 0.15493904800962308, "learning_rate": 4.686401838364069e-06, "loss": 0.7857, "step": 1315 }, { "epoch": 0.6533449174630755, "grad_norm": 0.1592114663190787, "learning_rate": 4.685927738831204e-06, "loss": 0.7933, "step": 1316 }, { "epoch": 0.653841380166315, "grad_norm": 0.138465643640585, "learning_rate": 4.685453305214335e-06, "loss": 0.7698, "step": 1317 }, { "epoch": 0.6543378428695544, "grad_norm": 0.1379981991943177, "learning_rate": 4.684978537585968e-06, "loss": 0.7459, "step": 1318 }, { "epoch": 0.6548343055727939, "grad_norm": 0.14481568247936927, "learning_rate": 4.684503436018664e-06, "loss": 0.7588, "step": 1319 }, { "epoch": 0.6553307682760333, "grad_norm": 0.14210563691826714, "learning_rate": 4.684028000585038e-06, "loss": 0.7329, "step": 1320 }, { "epoch": 0.6558272309792726, "grad_norm": 0.12867626236605553, "learning_rate": 4.683552231357749e-06, "loss": 0.7337, "step": 1321 }, { "epoch": 0.6563236936825121, "grad_norm": 0.1376861133072269, "learning_rate": 4.683076128409512e-06, "loss": 0.7972, "step": 1322 }, { "epoch": 0.6568201563857515, "grad_norm": 0.14125134793520855, "learning_rate": 4.682599691813092e-06, "loss": 0.7393, "step": 1323 }, { "epoch": 0.657316619088991, "grad_norm": 0.14276487162583956, "learning_rate": 4.682122921641305e-06, "loss": 0.7879, "step": 1324 }, { "epoch": 0.6578130817922304, "grad_norm": 0.13468493472362367, "learning_rate": 4.681645817967017e-06, "loss": 0.7335, "step": 1325 }, { "epoch": 0.6583095444954697, "grad_norm": 0.13943793893639297, "learning_rate": 4.681168380863145e-06, "loss": 0.758, "step": 1326 }, { "epoch": 0.6588060071987092, "grad_norm": 0.13951539561696433, "learning_rate": 4.680690610402659e-06, "loss": 0.8151, "step": 1327 }, { "epoch": 0.6593024699019486, "grad_norm": 0.13406494700163638, "learning_rate": 4.6802125066585765e-06, "loss": 0.7013, "step": 1328 }, { "epoch": 0.6597989326051881, "grad_norm": 0.14160309961844755, "learning_rate": 4.679734069703971e-06, "loss": 0.7574, "step": 1329 }, { "epoch": 0.6602953953084274, "grad_norm": 0.13446373370464942, "learning_rate": 4.679255299611961e-06, "loss": 0.7507, "step": 1330 }, { "epoch": 0.6607918580116668, "grad_norm": 0.12942976744487117, "learning_rate": 4.678776196455722e-06, "loss": 0.7311, "step": 1331 }, { "epoch": 0.6612883207149063, "grad_norm": 0.1391321723122554, "learning_rate": 4.678296760308474e-06, "loss": 0.7981, "step": 1332 }, { "epoch": 0.6617847834181457, "grad_norm": 0.14428783420102687, "learning_rate": 4.677816991243493e-06, "loss": 0.8215, "step": 1333 }, { "epoch": 0.6622812461213852, "grad_norm": 0.1394878539696028, "learning_rate": 4.677336889334103e-06, "loss": 0.7721, "step": 1334 }, { "epoch": 0.6627777088246245, "grad_norm": 0.15266983844158868, "learning_rate": 4.676856454653681e-06, "loss": 0.7336, "step": 1335 }, { "epoch": 0.6632741715278639, "grad_norm": 0.14455346704491276, "learning_rate": 4.676375687275653e-06, "loss": 0.7278, "step": 1336 }, { "epoch": 0.6637706342311034, "grad_norm": 0.1316229836144885, "learning_rate": 4.675894587273496e-06, "loss": 0.7647, "step": 1337 }, { "epoch": 0.6642670969343428, "grad_norm": 0.12941666247842312, "learning_rate": 4.6754131547207406e-06, "loss": 0.7125, "step": 1338 }, { "epoch": 0.6647635596375823, "grad_norm": 0.14046373964383968, "learning_rate": 4.674931389690963e-06, "loss": 0.7341, "step": 1339 }, { "epoch": 0.6652600223408216, "grad_norm": 0.13645194981403033, "learning_rate": 4.674449292257796e-06, "loss": 0.7676, "step": 1340 }, { "epoch": 0.665756485044061, "grad_norm": 0.14519132705834006, "learning_rate": 4.6739668624949196e-06, "loss": 0.7952, "step": 1341 }, { "epoch": 0.6662529477473005, "grad_norm": 0.13696033048575837, "learning_rate": 4.6734841004760644e-06, "loss": 0.7269, "step": 1342 }, { "epoch": 0.6667494104505399, "grad_norm": 0.134676078421444, "learning_rate": 4.673001006275013e-06, "loss": 0.7453, "step": 1343 }, { "epoch": 0.6672458731537794, "grad_norm": 0.14578953183512341, "learning_rate": 4.672517579965601e-06, "loss": 0.8015, "step": 1344 }, { "epoch": 0.6677423358570187, "grad_norm": 0.13466978301237612, "learning_rate": 4.6720338216217096e-06, "loss": 0.7375, "step": 1345 }, { "epoch": 0.6682387985602581, "grad_norm": 0.13431133998664943, "learning_rate": 4.671549731317274e-06, "loss": 0.7931, "step": 1346 }, { "epoch": 0.6687352612634976, "grad_norm": 0.14300934227788512, "learning_rate": 4.67106530912628e-06, "loss": 0.8223, "step": 1347 }, { "epoch": 0.669231723966737, "grad_norm": 0.1335079638925206, "learning_rate": 4.670580555122765e-06, "loss": 0.7081, "step": 1348 }, { "epoch": 0.6697281866699765, "grad_norm": 0.13925505067882116, "learning_rate": 4.670095469380814e-06, "loss": 0.7556, "step": 1349 }, { "epoch": 0.6702246493732158, "grad_norm": 0.1385402073066566, "learning_rate": 4.669610051974566e-06, "loss": 0.7862, "step": 1350 }, { "epoch": 0.6707211120764552, "grad_norm": 0.14168635102257782, "learning_rate": 4.669124302978208e-06, "loss": 0.7532, "step": 1351 }, { "epoch": 0.6712175747796947, "grad_norm": 0.1319645107321289, "learning_rate": 4.6686382224659795e-06, "loss": 0.721, "step": 1352 }, { "epoch": 0.6717140374829341, "grad_norm": 0.14159994069821713, "learning_rate": 4.66815181051217e-06, "loss": 0.7324, "step": 1353 }, { "epoch": 0.6722105001861736, "grad_norm": 0.13302384063587522, "learning_rate": 4.667665067191121e-06, "loss": 0.7914, "step": 1354 }, { "epoch": 0.6727069628894129, "grad_norm": 0.14520590994816845, "learning_rate": 4.667177992577222e-06, "loss": 0.803, "step": 1355 }, { "epoch": 0.6732034255926523, "grad_norm": 0.13258982595212945, "learning_rate": 4.666690586744914e-06, "loss": 0.7802, "step": 1356 }, { "epoch": 0.6736998882958918, "grad_norm": 0.14243373706488904, "learning_rate": 4.666202849768691e-06, "loss": 0.7915, "step": 1357 }, { "epoch": 0.6741963509991312, "grad_norm": 0.14197795066279506, "learning_rate": 4.6657147817230945e-06, "loss": 0.7584, "step": 1358 }, { "epoch": 0.6746928137023707, "grad_norm": 0.13496558186041357, "learning_rate": 4.665226382682718e-06, "loss": 0.735, "step": 1359 }, { "epoch": 0.67518927640561, "grad_norm": 0.13833577662827512, "learning_rate": 4.664737652722205e-06, "loss": 0.7653, "step": 1360 }, { "epoch": 0.6756857391088494, "grad_norm": 0.13481054586272334, "learning_rate": 4.664248591916252e-06, "loss": 0.7518, "step": 1361 }, { "epoch": 0.6761822018120889, "grad_norm": 0.13380123421346032, "learning_rate": 4.663759200339603e-06, "loss": 0.742, "step": 1362 }, { "epoch": 0.6766786645153283, "grad_norm": 0.13210724903338833, "learning_rate": 4.663269478067053e-06, "loss": 0.7546, "step": 1363 }, { "epoch": 0.6771751272185677, "grad_norm": 0.13236042360384812, "learning_rate": 4.6627794251734485e-06, "loss": 0.7404, "step": 1364 }, { "epoch": 0.6776715899218071, "grad_norm": 0.13755480780239387, "learning_rate": 4.662289041733686e-06, "loss": 0.7639, "step": 1365 }, { "epoch": 0.6781680526250465, "grad_norm": 0.13277407792473037, "learning_rate": 4.661798327822713e-06, "loss": 0.78, "step": 1366 }, { "epoch": 0.678664515328286, "grad_norm": 0.13349951689576497, "learning_rate": 4.661307283515528e-06, "loss": 0.7362, "step": 1367 }, { "epoch": 0.6791609780315254, "grad_norm": 0.1454330089242611, "learning_rate": 4.660815908887179e-06, "loss": 0.786, "step": 1368 }, { "epoch": 0.6796574407347648, "grad_norm": 0.13805559122664302, "learning_rate": 4.660324204012764e-06, "loss": 0.7168, "step": 1369 }, { "epoch": 0.6801539034380042, "grad_norm": 0.1390252829777148, "learning_rate": 4.659832168967432e-06, "loss": 0.736, "step": 1370 }, { "epoch": 0.6806503661412436, "grad_norm": 0.13618470081629033, "learning_rate": 4.659339803826384e-06, "loss": 0.7786, "step": 1371 }, { "epoch": 0.6811468288444831, "grad_norm": 0.13932005962814825, "learning_rate": 4.658847108664869e-06, "loss": 0.7333, "step": 1372 }, { "epoch": 0.6816432915477225, "grad_norm": 0.1357691607502142, "learning_rate": 4.6583540835581885e-06, "loss": 0.7899, "step": 1373 }, { "epoch": 0.6821397542509618, "grad_norm": 0.1464322972528331, "learning_rate": 4.657860728581692e-06, "loss": 0.8177, "step": 1374 }, { "epoch": 0.6826362169542013, "grad_norm": 0.13218140620942265, "learning_rate": 4.657367043810783e-06, "loss": 0.7308, "step": 1375 }, { "epoch": 0.6831326796574407, "grad_norm": 0.1461276762453906, "learning_rate": 4.656873029320911e-06, "loss": 0.7628, "step": 1376 }, { "epoch": 0.6836291423606802, "grad_norm": 0.13682220089172875, "learning_rate": 4.656378685187579e-06, "loss": 0.8118, "step": 1377 }, { "epoch": 0.6841256050639196, "grad_norm": 0.13808113028870317, "learning_rate": 4.655884011486341e-06, "loss": 0.778, "step": 1378 }, { "epoch": 0.684622067767159, "grad_norm": 0.13141352149483548, "learning_rate": 4.655389008292798e-06, "loss": 0.7884, "step": 1379 }, { "epoch": 0.6851185304703984, "grad_norm": 0.13773832084927776, "learning_rate": 4.654893675682605e-06, "loss": 0.7103, "step": 1380 }, { "epoch": 0.6856149931736378, "grad_norm": 0.139138670836966, "learning_rate": 4.654398013731464e-06, "loss": 0.7643, "step": 1381 }, { "epoch": 0.6861114558768773, "grad_norm": 0.13544047173805013, "learning_rate": 4.65390202251513e-06, "loss": 0.7806, "step": 1382 }, { "epoch": 0.6866079185801167, "grad_norm": 0.13728555567097042, "learning_rate": 4.653405702109407e-06, "loss": 0.7961, "step": 1383 }, { "epoch": 0.687104381283356, "grad_norm": 0.13134136227546692, "learning_rate": 4.65290905259015e-06, "loss": 0.7708, "step": 1384 }, { "epoch": 0.6876008439865955, "grad_norm": 0.14072716822510162, "learning_rate": 4.652412074033263e-06, "loss": 0.7598, "step": 1385 }, { "epoch": 0.6880973066898349, "grad_norm": 0.18508181188821365, "learning_rate": 4.651914766514703e-06, "loss": 0.7481, "step": 1386 }, { "epoch": 0.6885937693930744, "grad_norm": 0.13447285883304055, "learning_rate": 4.651417130110473e-06, "loss": 0.7048, "step": 1387 }, { "epoch": 0.6890902320963138, "grad_norm": 0.13797950483454202, "learning_rate": 4.65091916489663e-06, "loss": 0.7958, "step": 1388 }, { "epoch": 0.6895866947995531, "grad_norm": 0.14969109022454788, "learning_rate": 4.65042087094928e-06, "loss": 0.8282, "step": 1389 }, { "epoch": 0.6900831575027926, "grad_norm": 0.13393294593783833, "learning_rate": 4.64992224834458e-06, "loss": 0.6999, "step": 1390 }, { "epoch": 0.690579620206032, "grad_norm": 0.13957414144835012, "learning_rate": 4.649423297158736e-06, "loss": 0.8215, "step": 1391 }, { "epoch": 0.6910760829092715, "grad_norm": 0.13264027359412842, "learning_rate": 4.648924017468003e-06, "loss": 0.7282, "step": 1392 }, { "epoch": 0.6915725456125109, "grad_norm": 0.13318683074105853, "learning_rate": 4.648424409348691e-06, "loss": 0.7237, "step": 1393 }, { "epoch": 0.6920690083157502, "grad_norm": 0.14079961861355597, "learning_rate": 4.647924472877154e-06, "loss": 0.7589, "step": 1394 }, { "epoch": 0.6925654710189897, "grad_norm": 0.13173357758319654, "learning_rate": 4.647424208129801e-06, "loss": 0.7327, "step": 1395 }, { "epoch": 0.6930619337222291, "grad_norm": 0.13990361296789605, "learning_rate": 4.646923615183089e-06, "loss": 0.7952, "step": 1396 }, { "epoch": 0.6935583964254686, "grad_norm": 0.13139251687364273, "learning_rate": 4.646422694113526e-06, "loss": 0.7317, "step": 1397 }, { "epoch": 0.6940548591287079, "grad_norm": 0.14426243953892712, "learning_rate": 4.645921444997669e-06, "loss": 0.807, "step": 1398 }, { "epoch": 0.6945513218319473, "grad_norm": 0.13812024833123682, "learning_rate": 4.645419867912127e-06, "loss": 0.7351, "step": 1399 }, { "epoch": 0.6950477845351868, "grad_norm": 0.1382819086009079, "learning_rate": 4.644917962933558e-06, "loss": 0.7554, "step": 1400 }, { "epoch": 0.6955442472384262, "grad_norm": 0.13000958747168673, "learning_rate": 4.644415730138669e-06, "loss": 0.7277, "step": 1401 }, { "epoch": 0.6960407099416657, "grad_norm": 0.14153919488215605, "learning_rate": 4.643913169604218e-06, "loss": 0.8176, "step": 1402 }, { "epoch": 0.696537172644905, "grad_norm": 0.13514290656512232, "learning_rate": 4.643410281407014e-06, "loss": 0.7458, "step": 1403 }, { "epoch": 0.6970336353481444, "grad_norm": 0.1323956462686169, "learning_rate": 4.642907065623916e-06, "loss": 0.7801, "step": 1404 }, { "epoch": 0.6975300980513839, "grad_norm": 0.13517969708499833, "learning_rate": 4.642403522331832e-06, "loss": 0.7493, "step": 1405 }, { "epoch": 0.6980265607546233, "grad_norm": 0.14025387703072958, "learning_rate": 4.641899651607721e-06, "loss": 0.7722, "step": 1406 }, { "epoch": 0.6985230234578628, "grad_norm": 0.13173070675094867, "learning_rate": 4.64139545352859e-06, "loss": 0.7185, "step": 1407 }, { "epoch": 0.6990194861611021, "grad_norm": 0.13136540290209375, "learning_rate": 4.640890928171499e-06, "loss": 0.7754, "step": 1408 }, { "epoch": 0.6995159488643415, "grad_norm": 0.13827112688996815, "learning_rate": 4.640386075613556e-06, "loss": 0.792, "step": 1409 }, { "epoch": 0.700012411567581, "grad_norm": 0.13975488734948466, "learning_rate": 4.639880895931919e-06, "loss": 0.7988, "step": 1410 }, { "epoch": 0.7005088742708204, "grad_norm": 0.1336382331212796, "learning_rate": 4.6393753892038e-06, "loss": 0.7215, "step": 1411 }, { "epoch": 0.7010053369740599, "grad_norm": 0.13724481212117196, "learning_rate": 4.638869555506452e-06, "loss": 0.7844, "step": 1412 }, { "epoch": 0.7015017996772992, "grad_norm": 0.13404812402428848, "learning_rate": 4.638363394917189e-06, "loss": 0.7224, "step": 1413 }, { "epoch": 0.7019982623805386, "grad_norm": 0.1407590590379482, "learning_rate": 4.637856907513366e-06, "loss": 0.7599, "step": 1414 }, { "epoch": 0.7024947250837781, "grad_norm": 0.14010062720805114, "learning_rate": 4.637350093372393e-06, "loss": 0.7773, "step": 1415 }, { "epoch": 0.7029911877870175, "grad_norm": 0.13348329573150353, "learning_rate": 4.636842952571727e-06, "loss": 0.761, "step": 1416 }, { "epoch": 0.703487650490257, "grad_norm": 0.14369061438866643, "learning_rate": 4.636335485188879e-06, "loss": 0.7699, "step": 1417 }, { "epoch": 0.7039841131934963, "grad_norm": 0.13433232422419997, "learning_rate": 4.635827691301404e-06, "loss": 0.7661, "step": 1418 }, { "epoch": 0.7044805758967357, "grad_norm": 0.14032937963835693, "learning_rate": 4.635319570986913e-06, "loss": 0.7695, "step": 1419 }, { "epoch": 0.7049770385999752, "grad_norm": 0.13443137094696417, "learning_rate": 4.634811124323062e-06, "loss": 0.7334, "step": 1420 }, { "epoch": 0.7054735013032146, "grad_norm": 0.1342398609304696, "learning_rate": 4.63430235138756e-06, "loss": 0.7465, "step": 1421 }, { "epoch": 0.7059699640064541, "grad_norm": 0.13174620950915217, "learning_rate": 4.6337932522581656e-06, "loss": 0.7551, "step": 1422 }, { "epoch": 0.7064664267096934, "grad_norm": 0.1439021802463562, "learning_rate": 4.633283827012684e-06, "loss": 0.7224, "step": 1423 }, { "epoch": 0.7069628894129328, "grad_norm": 0.1431482149278234, "learning_rate": 4.632774075728974e-06, "loss": 0.8265, "step": 1424 }, { "epoch": 0.7074593521161723, "grad_norm": 0.13651431312093584, "learning_rate": 4.632263998484944e-06, "loss": 0.7448, "step": 1425 }, { "epoch": 0.7079558148194117, "grad_norm": 0.13562176642096896, "learning_rate": 4.63175359535855e-06, "loss": 0.731, "step": 1426 }, { "epoch": 0.7084522775226512, "grad_norm": 0.1331841089582722, "learning_rate": 4.631242866427798e-06, "loss": 0.7785, "step": 1427 }, { "epoch": 0.7089487402258905, "grad_norm": 0.13572962352737827, "learning_rate": 4.6307318117707465e-06, "loss": 0.7472, "step": 1428 }, { "epoch": 0.7094452029291299, "grad_norm": 0.1337025002321428, "learning_rate": 4.630220431465501e-06, "loss": 0.7177, "step": 1429 }, { "epoch": 0.7099416656323694, "grad_norm": 0.13714690654454087, "learning_rate": 4.629708725590219e-06, "loss": 0.7595, "step": 1430 }, { "epoch": 0.7104381283356088, "grad_norm": 0.13237056679093584, "learning_rate": 4.629196694223104e-06, "loss": 0.7719, "step": 1431 }, { "epoch": 0.7109345910388482, "grad_norm": 0.1347783497383712, "learning_rate": 4.628684337442414e-06, "loss": 0.7559, "step": 1432 }, { "epoch": 0.7114310537420876, "grad_norm": 0.14212353203573067, "learning_rate": 4.6281716553264535e-06, "loss": 0.7647, "step": 1433 }, { "epoch": 0.711927516445327, "grad_norm": 0.1391353138825361, "learning_rate": 4.627658647953579e-06, "loss": 0.7869, "step": 1434 }, { "epoch": 0.7124239791485665, "grad_norm": 0.1367605845380993, "learning_rate": 4.6271453154021936e-06, "loss": 0.7652, "step": 1435 }, { "epoch": 0.7129204418518059, "grad_norm": 0.13776225189007474, "learning_rate": 4.626631657750754e-06, "loss": 0.7352, "step": 1436 }, { "epoch": 0.7134169045550453, "grad_norm": 0.15623278977035873, "learning_rate": 4.626117675077762e-06, "loss": 0.772, "step": 1437 }, { "epoch": 0.7139133672582847, "grad_norm": 0.13542364624954573, "learning_rate": 4.625603367461775e-06, "loss": 0.755, "step": 1438 }, { "epoch": 0.7144098299615241, "grad_norm": 0.1363585075610717, "learning_rate": 4.6250887349813935e-06, "loss": 0.7923, "step": 1439 }, { "epoch": 0.7149062926647636, "grad_norm": 0.1349317484055486, "learning_rate": 4.6245737777152725e-06, "loss": 0.737, "step": 1440 }, { "epoch": 0.715402755368003, "grad_norm": 0.12840691438132237, "learning_rate": 4.624058495742115e-06, "loss": 0.7329, "step": 1441 }, { "epoch": 0.7158992180712423, "grad_norm": 0.14115995132355305, "learning_rate": 4.623542889140671e-06, "loss": 0.7799, "step": 1442 }, { "epoch": 0.7163956807744818, "grad_norm": 0.1357743633796934, "learning_rate": 4.623026957989746e-06, "loss": 0.7492, "step": 1443 }, { "epoch": 0.7168921434777212, "grad_norm": 0.140747920382236, "learning_rate": 4.622510702368191e-06, "loss": 0.7822, "step": 1444 }, { "epoch": 0.7173886061809607, "grad_norm": 0.13586217424188604, "learning_rate": 4.621994122354907e-06, "loss": 0.737, "step": 1445 }, { "epoch": 0.7178850688842001, "grad_norm": 0.1319421489214161, "learning_rate": 4.621477218028845e-06, "loss": 0.7439, "step": 1446 }, { "epoch": 0.7183815315874394, "grad_norm": 0.1467289582504143, "learning_rate": 4.620959989469005e-06, "loss": 0.7969, "step": 1447 }, { "epoch": 0.7188779942906789, "grad_norm": 0.13593369108799191, "learning_rate": 4.620442436754438e-06, "loss": 0.7792, "step": 1448 }, { "epoch": 0.7193744569939183, "grad_norm": 0.13667469147949424, "learning_rate": 4.619924559964243e-06, "loss": 0.7416, "step": 1449 }, { "epoch": 0.7198709196971578, "grad_norm": 0.13941735212726125, "learning_rate": 4.61940635917757e-06, "loss": 0.7836, "step": 1450 }, { "epoch": 0.7203673824003972, "grad_norm": 0.14522609940766362, "learning_rate": 4.618887834473616e-06, "loss": 0.8191, "step": 1451 }, { "epoch": 0.7208638451036365, "grad_norm": 0.1347623385534868, "learning_rate": 4.618368985931631e-06, "loss": 0.72, "step": 1452 }, { "epoch": 0.721360307806876, "grad_norm": 0.13229916497360666, "learning_rate": 4.617849813630913e-06, "loss": 0.7019, "step": 1453 }, { "epoch": 0.7218567705101154, "grad_norm": 0.13890901285175117, "learning_rate": 4.617330317650806e-06, "loss": 0.7371, "step": 1454 }, { "epoch": 0.7223532332133549, "grad_norm": 0.13269598761541726, "learning_rate": 4.6168104980707105e-06, "loss": 0.7291, "step": 1455 }, { "epoch": 0.7228496959165943, "grad_norm": 0.1348717930791648, "learning_rate": 4.61629035497007e-06, "loss": 0.7328, "step": 1456 }, { "epoch": 0.7233461586198336, "grad_norm": 0.13798017679082805, "learning_rate": 4.615769888428382e-06, "loss": 0.7664, "step": 1457 }, { "epoch": 0.7238426213230731, "grad_norm": 0.13280778392582876, "learning_rate": 4.615249098525189e-06, "loss": 0.73, "step": 1458 }, { "epoch": 0.7243390840263125, "grad_norm": 0.13189279813229704, "learning_rate": 4.614727985340087e-06, "loss": 0.7065, "step": 1459 }, { "epoch": 0.724835546729552, "grad_norm": 0.1378715254587552, "learning_rate": 4.61420654895272e-06, "loss": 0.7774, "step": 1460 }, { "epoch": 0.7253320094327914, "grad_norm": 0.1311464168604436, "learning_rate": 4.613684789442781e-06, "loss": 0.7273, "step": 1461 }, { "epoch": 0.7258284721360307, "grad_norm": 0.14345074089926546, "learning_rate": 4.613162706890011e-06, "loss": 0.7582, "step": 1462 }, { "epoch": 0.7263249348392702, "grad_norm": 0.13588405827285918, "learning_rate": 4.612640301374204e-06, "loss": 0.7507, "step": 1463 }, { "epoch": 0.7268213975425096, "grad_norm": 0.13152235282939082, "learning_rate": 4.6121175729752e-06, "loss": 0.7425, "step": 1464 }, { "epoch": 0.7273178602457491, "grad_norm": 0.1387161072572674, "learning_rate": 4.611594521772891e-06, "loss": 0.7184, "step": 1465 }, { "epoch": 0.7278143229489885, "grad_norm": 0.13801900811363219, "learning_rate": 4.611071147847216e-06, "loss": 0.7633, "step": 1466 }, { "epoch": 0.7283107856522278, "grad_norm": 0.2567131236080573, "learning_rate": 4.610547451278164e-06, "loss": 0.7701, "step": 1467 }, { "epoch": 0.7288072483554673, "grad_norm": 0.14345292271439838, "learning_rate": 4.6100234321457746e-06, "loss": 0.749, "step": 1468 }, { "epoch": 0.7293037110587067, "grad_norm": 0.13240897000188626, "learning_rate": 4.6094990905301354e-06, "loss": 0.7646, "step": 1469 }, { "epoch": 0.7298001737619462, "grad_norm": 0.13348526628448198, "learning_rate": 4.608974426511383e-06, "loss": 0.7496, "step": 1470 }, { "epoch": 0.7302966364651855, "grad_norm": 0.14970955112386783, "learning_rate": 4.608449440169705e-06, "loss": 0.7601, "step": 1471 }, { "epoch": 0.7307930991684249, "grad_norm": 0.1407703027493505, "learning_rate": 4.607924131585336e-06, "loss": 0.7591, "step": 1472 }, { "epoch": 0.7312895618716644, "grad_norm": 0.1318082753134835, "learning_rate": 4.607398500838561e-06, "loss": 0.7051, "step": 1473 }, { "epoch": 0.7317860245749038, "grad_norm": 0.34847062208945645, "learning_rate": 4.606872548009716e-06, "loss": 0.7823, "step": 1474 }, { "epoch": 0.7322824872781433, "grad_norm": 0.13028506291705924, "learning_rate": 4.606346273179182e-06, "loss": 0.7068, "step": 1475 }, { "epoch": 0.7327789499813826, "grad_norm": 0.1452834099987651, "learning_rate": 4.605819676427393e-06, "loss": 0.7924, "step": 1476 }, { "epoch": 0.733275412684622, "grad_norm": 0.13237347766328517, "learning_rate": 4.605292757834832e-06, "loss": 0.7094, "step": 1477 }, { "epoch": 0.7337718753878615, "grad_norm": 0.14517976496072124, "learning_rate": 4.6047655174820275e-06, "loss": 0.7906, "step": 1478 }, { "epoch": 0.7342683380911009, "grad_norm": 0.14057990814296611, "learning_rate": 4.604237955449561e-06, "loss": 0.7671, "step": 1479 }, { "epoch": 0.7347648007943404, "grad_norm": 0.13415285581754496, "learning_rate": 4.603710071818062e-06, "loss": 0.6983, "step": 1480 }, { "epoch": 0.7352612634975797, "grad_norm": 0.13508721935027235, "learning_rate": 4.603181866668209e-06, "loss": 0.7999, "step": 1481 }, { "epoch": 0.7357577262008191, "grad_norm": 0.19074847415713053, "learning_rate": 4.60265334008073e-06, "loss": 0.7689, "step": 1482 }, { "epoch": 0.7362541889040586, "grad_norm": 0.1359795575688059, "learning_rate": 4.602124492136401e-06, "loss": 0.7653, "step": 1483 }, { "epoch": 0.736750651607298, "grad_norm": 0.15392110510905885, "learning_rate": 4.601595322916049e-06, "loss": 0.7726, "step": 1484 }, { "epoch": 0.7372471143105375, "grad_norm": 0.1400383089128481, "learning_rate": 4.601065832500548e-06, "loss": 0.7744, "step": 1485 }, { "epoch": 0.7377435770137768, "grad_norm": 0.14190989377886226, "learning_rate": 4.600536020970822e-06, "loss": 0.779, "step": 1486 }, { "epoch": 0.7382400397170162, "grad_norm": 0.14143141374332557, "learning_rate": 4.600005888407846e-06, "loss": 0.8004, "step": 1487 }, { "epoch": 0.7387365024202557, "grad_norm": 0.1448247491292055, "learning_rate": 4.59947543489264e-06, "loss": 0.7369, "step": 1488 }, { "epoch": 0.7392329651234951, "grad_norm": 0.14353791020576437, "learning_rate": 4.598944660506276e-06, "loss": 0.7517, "step": 1489 }, { "epoch": 0.7397294278267346, "grad_norm": 0.1397411403947714, "learning_rate": 4.598413565329876e-06, "loss": 0.7872, "step": 1490 }, { "epoch": 0.7402258905299739, "grad_norm": 0.14686545557180947, "learning_rate": 4.597882149444607e-06, "loss": 0.7655, "step": 1491 }, { "epoch": 0.7407223532332133, "grad_norm": 0.13888886824301397, "learning_rate": 4.597350412931688e-06, "loss": 0.7759, "step": 1492 }, { "epoch": 0.7412188159364528, "grad_norm": 0.13368575317341758, "learning_rate": 4.5968183558723876e-06, "loss": 0.7642, "step": 1493 }, { "epoch": 0.7417152786396922, "grad_norm": 0.13671963488130195, "learning_rate": 4.596285978348022e-06, "loss": 0.8001, "step": 1494 }, { "epoch": 0.7422117413429317, "grad_norm": 0.13436579260102113, "learning_rate": 4.595753280439955e-06, "loss": 0.7504, "step": 1495 }, { "epoch": 0.742708204046171, "grad_norm": 0.1350714284302843, "learning_rate": 4.5952202622296015e-06, "loss": 0.7258, "step": 1496 }, { "epoch": 0.7432046667494104, "grad_norm": 0.14306485237289734, "learning_rate": 4.594686923798426e-06, "loss": 0.7651, "step": 1497 }, { "epoch": 0.7437011294526499, "grad_norm": 0.13844557571686955, "learning_rate": 4.594153265227941e-06, "loss": 0.7099, "step": 1498 }, { "epoch": 0.7441975921558893, "grad_norm": 0.13813214807550506, "learning_rate": 4.5936192865997055e-06, "loss": 0.7654, "step": 1499 }, { "epoch": 0.7446940548591288, "grad_norm": 0.14890702452844934, "learning_rate": 4.59308498799533e-06, "loss": 0.7431, "step": 1500 }, { "epoch": 0.7451905175623681, "grad_norm": 0.13792041924804616, "learning_rate": 4.592550369496475e-06, "loss": 0.7929, "step": 1501 }, { "epoch": 0.7456869802656075, "grad_norm": 0.14078699310970763, "learning_rate": 4.592015431184847e-06, "loss": 0.7453, "step": 1502 }, { "epoch": 0.746183442968847, "grad_norm": 0.14047870303022741, "learning_rate": 4.591480173142204e-06, "loss": 0.7569, "step": 1503 }, { "epoch": 0.7466799056720864, "grad_norm": 0.1306675590393236, "learning_rate": 4.590944595450351e-06, "loss": 0.7193, "step": 1504 }, { "epoch": 0.7471763683753258, "grad_norm": 0.1360716461251853, "learning_rate": 4.590408698191142e-06, "loss": 0.7554, "step": 1505 }, { "epoch": 0.7476728310785652, "grad_norm": 0.13401655261545067, "learning_rate": 4.58987248144648e-06, "loss": 0.7505, "step": 1506 }, { "epoch": 0.7481692937818046, "grad_norm": 0.13656970997298545, "learning_rate": 4.589335945298318e-06, "loss": 0.7943, "step": 1507 }, { "epoch": 0.7486657564850441, "grad_norm": 0.13808934356621622, "learning_rate": 4.588799089828657e-06, "loss": 0.8049, "step": 1508 }, { "epoch": 0.7491622191882835, "grad_norm": 0.14038587528162197, "learning_rate": 4.588261915119547e-06, "loss": 0.7568, "step": 1509 }, { "epoch": 0.7496586818915228, "grad_norm": 0.13223604221937782, "learning_rate": 4.587724421253085e-06, "loss": 0.7214, "step": 1510 }, { "epoch": 0.7501551445947623, "grad_norm": 0.13312725764514682, "learning_rate": 4.5871866083114206e-06, "loss": 0.7061, "step": 1511 }, { "epoch": 0.7506516072980017, "grad_norm": 0.13511067606997035, "learning_rate": 4.586648476376747e-06, "loss": 0.7755, "step": 1512 }, { "epoch": 0.7506516072980017, "eval_loss": 0.7611762285232544, "eval_runtime": 135.5972, "eval_samples_per_second": 223.847, "eval_steps_per_second": 27.987, "step": 1512 }, { "epoch": 0.7511480700012412, "grad_norm": 0.1379636114182231, "learning_rate": 4.586110025531312e-06, "loss": 0.7179, "step": 1513 }, { "epoch": 0.7516445327044806, "grad_norm": 0.13856904591246538, "learning_rate": 4.585571255857408e-06, "loss": 0.7782, "step": 1514 }, { "epoch": 0.75214099540772, "grad_norm": 0.1338118052517091, "learning_rate": 4.585032167437375e-06, "loss": 0.7429, "step": 1515 }, { "epoch": 0.7526374581109594, "grad_norm": 0.12995673319277454, "learning_rate": 4.584492760353607e-06, "loss": 0.7371, "step": 1516 }, { "epoch": 0.7531339208141988, "grad_norm": 0.13175985923680122, "learning_rate": 4.583953034688544e-06, "loss": 0.7269, "step": 1517 }, { "epoch": 0.7536303835174383, "grad_norm": 0.13337482313493468, "learning_rate": 4.5834129905246725e-06, "loss": 0.7407, "step": 1518 }, { "epoch": 0.7541268462206777, "grad_norm": 0.13450828654016309, "learning_rate": 4.582872627944531e-06, "loss": 0.7411, "step": 1519 }, { "epoch": 0.754623308923917, "grad_norm": 0.13585453398292363, "learning_rate": 4.582331947030704e-06, "loss": 0.7709, "step": 1520 }, { "epoch": 0.7551197716271565, "grad_norm": 0.16174753448144683, "learning_rate": 4.581790947865827e-06, "loss": 0.7486, "step": 1521 }, { "epoch": 0.7556162343303959, "grad_norm": 0.13477064890424165, "learning_rate": 4.581249630532582e-06, "loss": 0.7218, "step": 1522 }, { "epoch": 0.7561126970336354, "grad_norm": 0.14221417784622448, "learning_rate": 4.580707995113703e-06, "loss": 0.7464, "step": 1523 }, { "epoch": 0.7566091597368748, "grad_norm": 0.1471611979618392, "learning_rate": 4.580166041691966e-06, "loss": 0.7741, "step": 1524 }, { "epoch": 0.7571056224401141, "grad_norm": 0.13565970403363753, "learning_rate": 4.579623770350205e-06, "loss": 0.7506, "step": 1525 }, { "epoch": 0.7576020851433536, "grad_norm": 0.14016327819488653, "learning_rate": 4.579081181171292e-06, "loss": 0.7694, "step": 1526 }, { "epoch": 0.758098547846593, "grad_norm": 0.13870513168528667, "learning_rate": 4.5785382742381586e-06, "loss": 0.7733, "step": 1527 }, { "epoch": 0.7585950105498325, "grad_norm": 0.14061847589896362, "learning_rate": 4.577995049633776e-06, "loss": 0.708, "step": 1528 }, { "epoch": 0.7590914732530719, "grad_norm": 0.12927094131164107, "learning_rate": 4.577451507441167e-06, "loss": 0.7336, "step": 1529 }, { "epoch": 0.7595879359563112, "grad_norm": 0.13010758376806392, "learning_rate": 4.576907647743406e-06, "loss": 0.7284, "step": 1530 }, { "epoch": 0.7600843986595507, "grad_norm": 0.13474374832851801, "learning_rate": 4.576363470623612e-06, "loss": 0.7211, "step": 1531 }, { "epoch": 0.7605808613627901, "grad_norm": 0.13588121835212055, "learning_rate": 4.575818976164952e-06, "loss": 0.7767, "step": 1532 }, { "epoch": 0.7610773240660296, "grad_norm": 0.13581473484517131, "learning_rate": 4.575274164450645e-06, "loss": 0.7967, "step": 1533 }, { "epoch": 0.761573786769269, "grad_norm": 0.1319619163329429, "learning_rate": 4.574729035563957e-06, "loss": 0.7489, "step": 1534 }, { "epoch": 0.7620702494725083, "grad_norm": 0.13070368454070141, "learning_rate": 4.574183589588202e-06, "loss": 0.7371, "step": 1535 }, { "epoch": 0.7625667121757478, "grad_norm": 0.14405906001792207, "learning_rate": 4.573637826606742e-06, "loss": 0.8019, "step": 1536 }, { "epoch": 0.7630631748789872, "grad_norm": 0.13830904744938607, "learning_rate": 4.573091746702988e-06, "loss": 0.8099, "step": 1537 }, { "epoch": 0.7635596375822267, "grad_norm": 0.1308369875528322, "learning_rate": 4.572545349960401e-06, "loss": 0.7099, "step": 1538 }, { "epoch": 0.764056100285466, "grad_norm": 0.13746899245422908, "learning_rate": 4.571998636462487e-06, "loss": 0.7422, "step": 1539 }, { "epoch": 0.7645525629887054, "grad_norm": 0.13435052506307477, "learning_rate": 4.571451606292803e-06, "loss": 0.7412, "step": 1540 }, { "epoch": 0.7650490256919449, "grad_norm": 0.14575768487845997, "learning_rate": 4.570904259534955e-06, "loss": 0.799, "step": 1541 }, { "epoch": 0.7655454883951843, "grad_norm": 0.13296485164519237, "learning_rate": 4.570356596272596e-06, "loss": 0.7189, "step": 1542 }, { "epoch": 0.7660419510984238, "grad_norm": 0.13484403439852938, "learning_rate": 4.569808616589426e-06, "loss": 0.7768, "step": 1543 }, { "epoch": 0.7665384138016631, "grad_norm": 0.1332711897109076, "learning_rate": 4.569260320569196e-06, "loss": 0.756, "step": 1544 }, { "epoch": 0.7670348765049025, "grad_norm": 0.1419358962518124, "learning_rate": 4.568711708295704e-06, "loss": 0.7668, "step": 1545 }, { "epoch": 0.767531339208142, "grad_norm": 0.13596711085205604, "learning_rate": 4.5681627798527965e-06, "loss": 0.7775, "step": 1546 }, { "epoch": 0.7680278019113814, "grad_norm": 0.13788837250657413, "learning_rate": 4.5676135353243685e-06, "loss": 0.724, "step": 1547 }, { "epoch": 0.7685242646146209, "grad_norm": 0.13462158663256213, "learning_rate": 4.567063974794363e-06, "loss": 0.7606, "step": 1548 }, { "epoch": 0.7690207273178602, "grad_norm": 0.14640713050413612, "learning_rate": 4.566514098346774e-06, "loss": 0.75, "step": 1549 }, { "epoch": 0.7695171900210996, "grad_norm": 0.14008125739494734, "learning_rate": 4.565963906065637e-06, "loss": 0.7769, "step": 1550 }, { "epoch": 0.7700136527243391, "grad_norm": 0.13457373398967837, "learning_rate": 4.565413398035043e-06, "loss": 0.7223, "step": 1551 }, { "epoch": 0.7705101154275785, "grad_norm": 0.12909038799878353, "learning_rate": 4.564862574339126e-06, "loss": 0.7108, "step": 1552 }, { "epoch": 0.771006578130818, "grad_norm": 0.15132140494001553, "learning_rate": 4.564311435062074e-06, "loss": 0.7578, "step": 1553 }, { "epoch": 0.7715030408340573, "grad_norm": 0.13338663206299184, "learning_rate": 4.563759980288117e-06, "loss": 0.7337, "step": 1554 }, { "epoch": 0.7719995035372967, "grad_norm": 0.13671337018894522, "learning_rate": 4.563208210101536e-06, "loss": 0.7435, "step": 1555 }, { "epoch": 0.7724959662405362, "grad_norm": 0.1339512782588895, "learning_rate": 4.562656124586663e-06, "loss": 0.734, "step": 1556 }, { "epoch": 0.7729924289437756, "grad_norm": 0.13189544572945966, "learning_rate": 4.562103723827872e-06, "loss": 0.7639, "step": 1557 }, { "epoch": 0.7734888916470151, "grad_norm": 0.13412150276620746, "learning_rate": 4.561551007909592e-06, "loss": 0.7409, "step": 1558 }, { "epoch": 0.7739853543502544, "grad_norm": 0.13352894722705605, "learning_rate": 4.560997976916293e-06, "loss": 0.7254, "step": 1559 }, { "epoch": 0.7744818170534938, "grad_norm": 0.13385714130707613, "learning_rate": 4.560444630932499e-06, "loss": 0.7355, "step": 1560 }, { "epoch": 0.7749782797567333, "grad_norm": 0.14383353757373613, "learning_rate": 4.5598909700427805e-06, "loss": 0.7632, "step": 1561 }, { "epoch": 0.7754747424599727, "grad_norm": 0.13057620170286605, "learning_rate": 4.559336994331755e-06, "loss": 0.74, "step": 1562 }, { "epoch": 0.7759712051632122, "grad_norm": 0.22403616831916165, "learning_rate": 4.558782703884089e-06, "loss": 0.7815, "step": 1563 }, { "epoch": 0.7764676678664515, "grad_norm": 0.13988728351688395, "learning_rate": 4.558228098784496e-06, "loss": 0.8111, "step": 1564 }, { "epoch": 0.7769641305696909, "grad_norm": 0.12851052315735476, "learning_rate": 4.55767317911774e-06, "loss": 0.7076, "step": 1565 }, { "epoch": 0.7774605932729304, "grad_norm": 0.13445147845150343, "learning_rate": 4.557117944968631e-06, "loss": 0.7776, "step": 1566 }, { "epoch": 0.7779570559761698, "grad_norm": 0.13626735223788075, "learning_rate": 4.556562396422027e-06, "loss": 0.7418, "step": 1567 }, { "epoch": 0.7784535186794093, "grad_norm": 0.13902885288869005, "learning_rate": 4.5560065335628356e-06, "loss": 0.7585, "step": 1568 }, { "epoch": 0.7789499813826486, "grad_norm": 0.1347101759097276, "learning_rate": 4.55545035647601e-06, "loss": 0.7497, "step": 1569 }, { "epoch": 0.779446444085888, "grad_norm": 0.14453358307933684, "learning_rate": 4.5548938652465555e-06, "loss": 0.7764, "step": 1570 }, { "epoch": 0.7799429067891275, "grad_norm": 0.13507717127592933, "learning_rate": 4.55433705995952e-06, "loss": 0.7964, "step": 1571 }, { "epoch": 0.7804393694923669, "grad_norm": 0.13356730248477877, "learning_rate": 4.553779940700005e-06, "loss": 0.7519, "step": 1572 }, { "epoch": 0.7809358321956062, "grad_norm": 0.14113785981123475, "learning_rate": 4.553222507553155e-06, "loss": 0.6896, "step": 1573 }, { "epoch": 0.7814322948988457, "grad_norm": 0.13798920017452704, "learning_rate": 4.552664760604167e-06, "loss": 0.753, "step": 1574 }, { "epoch": 0.7819287576020851, "grad_norm": 0.13634927023321175, "learning_rate": 4.552106699938281e-06, "loss": 0.7779, "step": 1575 }, { "epoch": 0.7824252203053246, "grad_norm": 0.13862438111784472, "learning_rate": 4.551548325640789e-06, "loss": 0.7653, "step": 1576 }, { "epoch": 0.782921683008564, "grad_norm": 0.13229593912177584, "learning_rate": 4.550989637797031e-06, "loss": 0.7289, "step": 1577 }, { "epoch": 0.7834181457118033, "grad_norm": 0.13125159194957922, "learning_rate": 4.55043063649239e-06, "loss": 0.7287, "step": 1578 }, { "epoch": 0.7839146084150428, "grad_norm": 0.1417064775776418, "learning_rate": 4.549871321812304e-06, "loss": 0.7426, "step": 1579 }, { "epoch": 0.7844110711182822, "grad_norm": 0.16883124210144626, "learning_rate": 4.549311693842252e-06, "loss": 0.8431, "step": 1580 }, { "epoch": 0.7849075338215217, "grad_norm": 0.13414566338046124, "learning_rate": 4.548751752667767e-06, "loss": 0.8359, "step": 1581 }, { "epoch": 0.7854039965247611, "grad_norm": 0.13725844757239852, "learning_rate": 4.548191498374425e-06, "loss": 0.755, "step": 1582 }, { "epoch": 0.7859004592280004, "grad_norm": 0.1361745571866967, "learning_rate": 4.547630931047853e-06, "loss": 0.7442, "step": 1583 }, { "epoch": 0.7863969219312399, "grad_norm": 0.13750113879876855, "learning_rate": 4.547070050773725e-06, "loss": 0.7196, "step": 1584 }, { "epoch": 0.7868933846344793, "grad_norm": 0.13909283760787944, "learning_rate": 4.5465088576377614e-06, "loss": 0.77, "step": 1585 }, { "epoch": 0.7873898473377188, "grad_norm": 0.13324992099967756, "learning_rate": 4.545947351725732e-06, "loss": 0.7228, "step": 1586 }, { "epoch": 0.7878863100409582, "grad_norm": 0.13413613523960244, "learning_rate": 4.5453855331234555e-06, "loss": 0.7636, "step": 1587 }, { "epoch": 0.7883827727441975, "grad_norm": 0.13214222144352114, "learning_rate": 4.544823401916794e-06, "loss": 0.7119, "step": 1588 }, { "epoch": 0.788879235447437, "grad_norm": 0.1347073970533374, "learning_rate": 4.544260958191663e-06, "loss": 0.7696, "step": 1589 }, { "epoch": 0.7893756981506764, "grad_norm": 0.1322596137845652, "learning_rate": 4.543698202034021e-06, "loss": 0.7173, "step": 1590 }, { "epoch": 0.7898721608539159, "grad_norm": 0.13812558700775102, "learning_rate": 4.543135133529878e-06, "loss": 0.7625, "step": 1591 }, { "epoch": 0.7903686235571553, "grad_norm": 0.13570198883227674, "learning_rate": 4.542571752765288e-06, "loss": 0.8061, "step": 1592 }, { "epoch": 0.7908650862603946, "grad_norm": 0.1502291763200914, "learning_rate": 4.542008059826356e-06, "loss": 0.7479, "step": 1593 }, { "epoch": 0.7913615489636341, "grad_norm": 0.14188040098609095, "learning_rate": 4.5414440547992325e-06, "loss": 0.7453, "step": 1594 }, { "epoch": 0.7918580116668735, "grad_norm": 0.12979186868674533, "learning_rate": 4.540879737770118e-06, "loss": 0.7411, "step": 1595 }, { "epoch": 0.792354474370113, "grad_norm": 0.13210637876846007, "learning_rate": 4.540315108825258e-06, "loss": 0.7322, "step": 1596 }, { "epoch": 0.7928509370733524, "grad_norm": 0.13744920964363747, "learning_rate": 4.539750168050949e-06, "loss": 0.7314, "step": 1597 }, { "epoch": 0.7933473997765917, "grad_norm": 0.13854762530674888, "learning_rate": 4.539184915533531e-06, "loss": 0.7586, "step": 1598 }, { "epoch": 0.7938438624798312, "grad_norm": 0.14216681342036058, "learning_rate": 4.538619351359393e-06, "loss": 0.7504, "step": 1599 }, { "epoch": 0.7943403251830706, "grad_norm": 0.13158698944804878, "learning_rate": 4.538053475614976e-06, "loss": 0.7344, "step": 1600 }, { "epoch": 0.7948367878863101, "grad_norm": 0.133052297447574, "learning_rate": 4.537487288386763e-06, "loss": 0.7265, "step": 1601 }, { "epoch": 0.7953332505895495, "grad_norm": 0.14026215243529064, "learning_rate": 4.536920789761286e-06, "loss": 0.7422, "step": 1602 }, { "epoch": 0.7958297132927888, "grad_norm": 0.13205252140563034, "learning_rate": 4.536353979825125e-06, "loss": 0.7223, "step": 1603 }, { "epoch": 0.7963261759960283, "grad_norm": 0.12972258505388326, "learning_rate": 4.535786858664909e-06, "loss": 0.7183, "step": 1604 }, { "epoch": 0.7968226386992677, "grad_norm": 0.14485812514741278, "learning_rate": 4.5352194263673135e-06, "loss": 0.7563, "step": 1605 }, { "epoch": 0.7973191014025072, "grad_norm": 0.14507460323696492, "learning_rate": 4.534651683019061e-06, "loss": 0.7467, "step": 1606 }, { "epoch": 0.7978155641057466, "grad_norm": 0.13781694651061116, "learning_rate": 4.534083628706921e-06, "loss": 0.7728, "step": 1607 }, { "epoch": 0.7983120268089859, "grad_norm": 0.13742843647821426, "learning_rate": 4.533515263517713e-06, "loss": 0.7739, "step": 1608 }, { "epoch": 0.7988084895122254, "grad_norm": 0.14505009227353202, "learning_rate": 4.532946587538302e-06, "loss": 0.7943, "step": 1609 }, { "epoch": 0.7993049522154648, "grad_norm": 0.13478499327557095, "learning_rate": 4.532377600855601e-06, "loss": 0.744, "step": 1610 }, { "epoch": 0.7998014149187043, "grad_norm": 0.13368137981304712, "learning_rate": 4.53180830355657e-06, "loss": 0.7207, "step": 1611 }, { "epoch": 0.8002978776219436, "grad_norm": 0.1367615628975349, "learning_rate": 4.531238695728218e-06, "loss": 0.7439, "step": 1612 }, { "epoch": 0.800794340325183, "grad_norm": 0.1414644058349886, "learning_rate": 4.5306687774576e-06, "loss": 0.8015, "step": 1613 }, { "epoch": 0.8012908030284225, "grad_norm": 0.1367196701213475, "learning_rate": 4.530098548831817e-06, "loss": 0.7596, "step": 1614 }, { "epoch": 0.8017872657316619, "grad_norm": 0.13522642664685247, "learning_rate": 4.529528009938022e-06, "loss": 0.7498, "step": 1615 }, { "epoch": 0.8022837284349014, "grad_norm": 0.13889758013185657, "learning_rate": 4.528957160863412e-06, "loss": 0.7589, "step": 1616 }, { "epoch": 0.8027801911381407, "grad_norm": 0.13673089456510074, "learning_rate": 4.528386001695232e-06, "loss": 0.7202, "step": 1617 }, { "epoch": 0.8032766538413801, "grad_norm": 0.13642986188283496, "learning_rate": 4.5278145325207735e-06, "loss": 0.7115, "step": 1618 }, { "epoch": 0.8037731165446196, "grad_norm": 0.13872017776841988, "learning_rate": 4.527242753427378e-06, "loss": 0.7473, "step": 1619 }, { "epoch": 0.804269579247859, "grad_norm": 0.13515176865610115, "learning_rate": 4.526670664502432e-06, "loss": 0.7328, "step": 1620 }, { "epoch": 0.8047660419510985, "grad_norm": 0.13643944196282173, "learning_rate": 4.52609826583337e-06, "loss": 0.7182, "step": 1621 }, { "epoch": 0.8052625046543378, "grad_norm": 0.13670018877356768, "learning_rate": 4.525525557507673e-06, "loss": 0.7556, "step": 1622 }, { "epoch": 0.8057589673575772, "grad_norm": 0.13504339939566765, "learning_rate": 4.524952539612872e-06, "loss": 0.7584, "step": 1623 }, { "epoch": 0.8062554300608167, "grad_norm": 0.13368086807436755, "learning_rate": 4.524379212236544e-06, "loss": 0.7667, "step": 1624 }, { "epoch": 0.8067518927640561, "grad_norm": 0.14428096858814662, "learning_rate": 4.5238055754663105e-06, "loss": 0.8251, "step": 1625 }, { "epoch": 0.8072483554672956, "grad_norm": 0.14219994124125016, "learning_rate": 4.523231629389845e-06, "loss": 0.739, "step": 1626 }, { "epoch": 0.8077448181705349, "grad_norm": 0.1326897765805104, "learning_rate": 4.522657374094864e-06, "loss": 0.7558, "step": 1627 }, { "epoch": 0.8082412808737743, "grad_norm": 0.13799541492346612, "learning_rate": 4.522082809669135e-06, "loss": 0.7563, "step": 1628 }, { "epoch": 0.8087377435770138, "grad_norm": 0.13141846846705796, "learning_rate": 4.52150793620047e-06, "loss": 0.6994, "step": 1629 }, { "epoch": 0.8092342062802532, "grad_norm": 0.14133853029523705, "learning_rate": 4.520932753776729e-06, "loss": 0.7406, "step": 1630 }, { "epoch": 0.8097306689834927, "grad_norm": 0.14262392679819444, "learning_rate": 4.52035726248582e-06, "loss": 0.7419, "step": 1631 }, { "epoch": 0.810227131686732, "grad_norm": 0.13946667026700096, "learning_rate": 4.519781462415698e-06, "loss": 0.7623, "step": 1632 }, { "epoch": 0.8107235943899714, "grad_norm": 0.14076729287492634, "learning_rate": 4.5192053536543636e-06, "loss": 0.7801, "step": 1633 }, { "epoch": 0.8112200570932109, "grad_norm": 0.132725847901949, "learning_rate": 4.518628936289867e-06, "loss": 0.7443, "step": 1634 }, { "epoch": 0.8117165197964503, "grad_norm": 0.13691408254882964, "learning_rate": 4.518052210410303e-06, "loss": 0.7553, "step": 1635 }, { "epoch": 0.8122129824996898, "grad_norm": 0.13655790072644422, "learning_rate": 4.517475176103816e-06, "loss": 0.7799, "step": 1636 }, { "epoch": 0.8127094452029291, "grad_norm": 0.13537469163429455, "learning_rate": 4.5168978334585955e-06, "loss": 0.7422, "step": 1637 }, { "epoch": 0.8132059079061685, "grad_norm": 0.13341240046444236, "learning_rate": 4.5163201825628805e-06, "loss": 0.7611, "step": 1638 }, { "epoch": 0.813702370609408, "grad_norm": 0.1330300275048859, "learning_rate": 4.515742223504954e-06, "loss": 0.7526, "step": 1639 }, { "epoch": 0.8141988333126474, "grad_norm": 0.1289270904071567, "learning_rate": 4.51516395637315e-06, "loss": 0.6907, "step": 1640 }, { "epoch": 0.8146952960158869, "grad_norm": 0.13653838617553693, "learning_rate": 4.514585381255845e-06, "loss": 0.7272, "step": 1641 }, { "epoch": 0.8151917587191262, "grad_norm": 0.14220864005729456, "learning_rate": 4.514006498241465e-06, "loss": 0.7533, "step": 1642 }, { "epoch": 0.8156882214223656, "grad_norm": 0.1334721980787763, "learning_rate": 4.513427307418485e-06, "loss": 0.7727, "step": 1643 }, { "epoch": 0.8161846841256051, "grad_norm": 0.13759063653846024, "learning_rate": 4.512847808875424e-06, "loss": 0.7862, "step": 1644 }, { "epoch": 0.8166811468288445, "grad_norm": 0.13689081776996725, "learning_rate": 4.512268002700848e-06, "loss": 0.7217, "step": 1645 }, { "epoch": 0.8171776095320838, "grad_norm": 0.1447472368767314, "learning_rate": 4.5116878889833735e-06, "loss": 0.7671, "step": 1646 }, { "epoch": 0.8176740722353233, "grad_norm": 0.13925674131676116, "learning_rate": 4.511107467811659e-06, "loss": 0.7461, "step": 1647 }, { "epoch": 0.8181705349385627, "grad_norm": 0.12956181530658847, "learning_rate": 4.510526739274415e-06, "loss": 0.7147, "step": 1648 }, { "epoch": 0.8186669976418022, "grad_norm": 0.14156922012695827, "learning_rate": 4.509945703460394e-06, "loss": 0.7724, "step": 1649 }, { "epoch": 0.8191634603450416, "grad_norm": 0.1337498799210342, "learning_rate": 4.509364360458399e-06, "loss": 0.7494, "step": 1650 }, { "epoch": 0.819659923048281, "grad_norm": 0.14328558587229834, "learning_rate": 4.50878271035728e-06, "loss": 0.7505, "step": 1651 }, { "epoch": 0.8201563857515204, "grad_norm": 0.13902637442350718, "learning_rate": 4.508200753245932e-06, "loss": 0.7722, "step": 1652 }, { "epoch": 0.8206528484547598, "grad_norm": 0.13220848269342542, "learning_rate": 4.507618489213298e-06, "loss": 0.7764, "step": 1653 }, { "epoch": 0.8211493111579993, "grad_norm": 0.1327753210553815, "learning_rate": 4.507035918348367e-06, "loss": 0.7135, "step": 1654 }, { "epoch": 0.8216457738612387, "grad_norm": 0.1406321284403388, "learning_rate": 4.506453040740177e-06, "loss": 0.7467, "step": 1655 }, { "epoch": 0.822142236564478, "grad_norm": 0.13632541689225586, "learning_rate": 4.505869856477811e-06, "loss": 0.749, "step": 1656 }, { "epoch": 0.8226386992677175, "grad_norm": 0.14246540537344085, "learning_rate": 4.505286365650398e-06, "loss": 0.7677, "step": 1657 }, { "epoch": 0.8231351619709569, "grad_norm": 0.13667688867874997, "learning_rate": 4.504702568347117e-06, "loss": 0.7307, "step": 1658 }, { "epoch": 0.8236316246741964, "grad_norm": 0.1431423731753008, "learning_rate": 4.5041184646571915e-06, "loss": 0.7707, "step": 1659 }, { "epoch": 0.8241280873774358, "grad_norm": 0.14005994273435252, "learning_rate": 4.5035340546698915e-06, "loss": 0.7612, "step": 1660 }, { "epoch": 0.8246245500806751, "grad_norm": 0.13515045815804208, "learning_rate": 4.502949338474536e-06, "loss": 0.7746, "step": 1661 }, { "epoch": 0.8251210127839146, "grad_norm": 0.13147659327086783, "learning_rate": 4.50236431616049e-06, "loss": 0.7014, "step": 1662 }, { "epoch": 0.825617475487154, "grad_norm": 0.13900795898461954, "learning_rate": 4.501778987817162e-06, "loss": 0.7509, "step": 1663 }, { "epoch": 0.8261139381903935, "grad_norm": 0.139057514709019, "learning_rate": 4.501193353534013e-06, "loss": 0.7598, "step": 1664 }, { "epoch": 0.8266104008936329, "grad_norm": 0.1431266182489607, "learning_rate": 4.500607413400546e-06, "loss": 0.7914, "step": 1665 }, { "epoch": 0.8271068635968722, "grad_norm": 0.1334091878020073, "learning_rate": 4.5000211675063134e-06, "loss": 0.7663, "step": 1666 }, { "epoch": 0.8276033263001117, "grad_norm": 0.13230619594672582, "learning_rate": 4.499434615940913e-06, "loss": 0.751, "step": 1667 }, { "epoch": 0.8280997890033511, "grad_norm": 0.14337035164539053, "learning_rate": 4.498847758793991e-06, "loss": 0.7694, "step": 1668 }, { "epoch": 0.8285962517065906, "grad_norm": 0.13666058128401123, "learning_rate": 4.498260596155237e-06, "loss": 0.7791, "step": 1669 }, { "epoch": 0.82909271440983, "grad_norm": 0.14465851413099565, "learning_rate": 4.497673128114391e-06, "loss": 0.7803, "step": 1670 }, { "epoch": 0.8295891771130693, "grad_norm": 0.14652649532294804, "learning_rate": 4.497085354761237e-06, "loss": 0.7799, "step": 1671 }, { "epoch": 0.8300856398163088, "grad_norm": 0.13131787380504173, "learning_rate": 4.4964972761856086e-06, "loss": 0.7452, "step": 1672 }, { "epoch": 0.8305821025195482, "grad_norm": 0.14145954525931556, "learning_rate": 4.495908892477382e-06, "loss": 0.7794, "step": 1673 }, { "epoch": 0.8310785652227877, "grad_norm": 0.1384021322282935, "learning_rate": 4.495320203726483e-06, "loss": 0.7108, "step": 1674 }, { "epoch": 0.8315750279260271, "grad_norm": 0.1415771904329555, "learning_rate": 4.494731210022884e-06, "loss": 0.7311, "step": 1675 }, { "epoch": 0.8320714906292664, "grad_norm": 0.1438025629578128, "learning_rate": 4.494141911456602e-06, "loss": 0.7992, "step": 1676 }, { "epoch": 0.8325679533325059, "grad_norm": 0.1414714443428166, "learning_rate": 4.4935523081177035e-06, "loss": 0.7469, "step": 1677 }, { "epoch": 0.8330644160357453, "grad_norm": 0.14271477816026024, "learning_rate": 4.492962400096298e-06, "loss": 0.8226, "step": 1678 }, { "epoch": 0.8335608787389848, "grad_norm": 0.1443375820025856, "learning_rate": 4.492372187482545e-06, "loss": 0.8516, "step": 1679 }, { "epoch": 0.8340573414422241, "grad_norm": 0.13815933437015357, "learning_rate": 4.491781670366648e-06, "loss": 0.8077, "step": 1680 }, { "epoch": 0.8345538041454635, "grad_norm": 0.1388616626139753, "learning_rate": 4.491190848838858e-06, "loss": 0.7864, "step": 1681 }, { "epoch": 0.835050266848703, "grad_norm": 0.13570857271523626, "learning_rate": 4.490599722989474e-06, "loss": 0.7412, "step": 1682 }, { "epoch": 0.8355467295519424, "grad_norm": 0.1438978989274344, "learning_rate": 4.490008292908839e-06, "loss": 0.7208, "step": 1683 }, { "epoch": 0.8360431922551819, "grad_norm": 0.143547018622754, "learning_rate": 4.4894165586873426e-06, "loss": 0.7816, "step": 1684 }, { "epoch": 0.8365396549584212, "grad_norm": 0.13635442176917034, "learning_rate": 4.488824520415425e-06, "loss": 0.7406, "step": 1685 }, { "epoch": 0.8370361176616606, "grad_norm": 0.14562544936887342, "learning_rate": 4.4882321781835666e-06, "loss": 0.7833, "step": 1686 }, { "epoch": 0.8375325803649001, "grad_norm": 0.14484628055837453, "learning_rate": 4.4876395320822984e-06, "loss": 0.7634, "step": 1687 }, { "epoch": 0.8380290430681395, "grad_norm": 0.13816444882771123, "learning_rate": 4.487046582202198e-06, "loss": 0.7416, "step": 1688 }, { "epoch": 0.838525505771379, "grad_norm": 0.14266194907778323, "learning_rate": 4.486453328633887e-06, "loss": 0.7377, "step": 1689 }, { "epoch": 0.8390219684746183, "grad_norm": 0.14297267750408402, "learning_rate": 4.485859771468035e-06, "loss": 0.8454, "step": 1690 }, { "epoch": 0.8395184311778577, "grad_norm": 0.1403189883914047, "learning_rate": 4.4852659107953574e-06, "loss": 0.7529, "step": 1691 }, { "epoch": 0.8400148938810972, "grad_norm": 0.13495181072938314, "learning_rate": 4.484671746706617e-06, "loss": 0.7426, "step": 1692 }, { "epoch": 0.8405113565843366, "grad_norm": 0.13845259745230812, "learning_rate": 4.484077279292622e-06, "loss": 0.768, "step": 1693 }, { "epoch": 0.8410078192875761, "grad_norm": 0.1386205743503297, "learning_rate": 4.483482508644228e-06, "loss": 0.7478, "step": 1694 }, { "epoch": 0.8415042819908154, "grad_norm": 0.14075629580146526, "learning_rate": 4.482887434852334e-06, "loss": 0.7795, "step": 1695 }, { "epoch": 0.8420007446940548, "grad_norm": 0.13730753156141765, "learning_rate": 4.4822920580078885e-06, "loss": 0.7747, "step": 1696 }, { "epoch": 0.8424972073972943, "grad_norm": 0.13372905034092983, "learning_rate": 4.481696378201887e-06, "loss": 0.7619, "step": 1697 }, { "epoch": 0.8429936701005337, "grad_norm": 0.13367135782867837, "learning_rate": 4.481100395525367e-06, "loss": 0.7271, "step": 1698 }, { "epoch": 0.8434901328037732, "grad_norm": 0.15581577968511073, "learning_rate": 4.4805041100694165e-06, "loss": 0.7648, "step": 1699 }, { "epoch": 0.8439865955070125, "grad_norm": 0.1401851479710573, "learning_rate": 4.479907521925168e-06, "loss": 0.7723, "step": 1700 }, { "epoch": 0.8444830582102519, "grad_norm": 0.13725236828476695, "learning_rate": 4.4793106311838e-06, "loss": 0.7257, "step": 1701 }, { "epoch": 0.8449795209134914, "grad_norm": 0.1356076078010008, "learning_rate": 4.478713437936538e-06, "loss": 0.7374, "step": 1702 }, { "epoch": 0.8454759836167308, "grad_norm": 0.13425339120243374, "learning_rate": 4.478115942274652e-06, "loss": 0.7241, "step": 1703 }, { "epoch": 0.8459724463199703, "grad_norm": 0.13679058651123055, "learning_rate": 4.477518144289462e-06, "loss": 0.766, "step": 1704 }, { "epoch": 0.8464689090232096, "grad_norm": 0.13984700650107668, "learning_rate": 4.476920044072331e-06, "loss": 0.7733, "step": 1705 }, { "epoch": 0.846965371726449, "grad_norm": 0.13407682285557562, "learning_rate": 4.476321641714669e-06, "loss": 0.7057, "step": 1706 }, { "epoch": 0.8474618344296885, "grad_norm": 0.13417781616310068, "learning_rate": 4.475722937307931e-06, "loss": 0.7953, "step": 1707 }, { "epoch": 0.8479582971329279, "grad_norm": 0.13220445140764078, "learning_rate": 4.475123930943621e-06, "loss": 0.6939, "step": 1708 }, { "epoch": 0.8484547598361674, "grad_norm": 0.13757038634337526, "learning_rate": 4.474524622713286e-06, "loss": 0.7181, "step": 1709 }, { "epoch": 0.8489512225394067, "grad_norm": 0.1405832726304441, "learning_rate": 4.473925012708522e-06, "loss": 0.7676, "step": 1710 }, { "epoch": 0.8494476852426461, "grad_norm": 0.14268436213062238, "learning_rate": 4.47332510102097e-06, "loss": 0.7663, "step": 1711 }, { "epoch": 0.8499441479458856, "grad_norm": 0.13994019393605722, "learning_rate": 4.472724887742316e-06, "loss": 0.7941, "step": 1712 }, { "epoch": 0.850440610649125, "grad_norm": 0.13811450219788193, "learning_rate": 4.472124372964292e-06, "loss": 0.734, "step": 1713 }, { "epoch": 0.8509370733523643, "grad_norm": 0.1318935825701855, "learning_rate": 4.471523556778679e-06, "loss": 0.739, "step": 1714 }, { "epoch": 0.8514335360556038, "grad_norm": 0.1419282323412836, "learning_rate": 4.470922439277301e-06, "loss": 0.7729, "step": 1715 }, { "epoch": 0.8519299987588432, "grad_norm": 0.1393041197530227, "learning_rate": 4.47032102055203e-06, "loss": 0.7483, "step": 1716 }, { "epoch": 0.8524264614620827, "grad_norm": 0.13626087249042654, "learning_rate": 4.469719300694783e-06, "loss": 0.7349, "step": 1717 }, { "epoch": 0.8529229241653221, "grad_norm": 0.1464856140158772, "learning_rate": 4.469117279797522e-06, "loss": 0.836, "step": 1718 }, { "epoch": 0.8534193868685614, "grad_norm": 0.1409940881870901, "learning_rate": 4.468514957952258e-06, "loss": 0.7472, "step": 1719 }, { "epoch": 0.8539158495718009, "grad_norm": 0.14285146112331779, "learning_rate": 4.467912335251045e-06, "loss": 0.8371, "step": 1720 }, { "epoch": 0.8544123122750403, "grad_norm": 0.13806293901326697, "learning_rate": 4.467309411785984e-06, "loss": 0.8033, "step": 1721 }, { "epoch": 0.8549087749782798, "grad_norm": 0.1361187990693462, "learning_rate": 4.466706187649223e-06, "loss": 0.7249, "step": 1722 }, { "epoch": 0.8554052376815192, "grad_norm": 0.1392046645207596, "learning_rate": 4.466102662932956e-06, "loss": 0.7949, "step": 1723 }, { "epoch": 0.8559017003847585, "grad_norm": 0.1402331299424571, "learning_rate": 4.46549883772942e-06, "loss": 0.8077, "step": 1724 }, { "epoch": 0.856398163087998, "grad_norm": 0.1350641709477664, "learning_rate": 4.464894712130902e-06, "loss": 0.7314, "step": 1725 }, { "epoch": 0.8568946257912374, "grad_norm": 0.13330313530654128, "learning_rate": 4.464290286229731e-06, "loss": 0.7255, "step": 1726 }, { "epoch": 0.8573910884944769, "grad_norm": 0.14647673202761563, "learning_rate": 4.463685560118285e-06, "loss": 0.7336, "step": 1727 }, { "epoch": 0.8578875511977163, "grad_norm": 0.13358377643813468, "learning_rate": 4.463080533888987e-06, "loss": 0.7289, "step": 1728 }, { "epoch": 0.8583840139009556, "grad_norm": 0.16223228182246507, "learning_rate": 4.4624752076343044e-06, "loss": 0.8098, "step": 1729 }, { "epoch": 0.8588804766041951, "grad_norm": 0.1362449815913255, "learning_rate": 4.461869581446752e-06, "loss": 0.7671, "step": 1730 }, { "epoch": 0.8593769393074345, "grad_norm": 0.14081968859441232, "learning_rate": 4.461263655418891e-06, "loss": 0.6943, "step": 1731 }, { "epoch": 0.859873402010674, "grad_norm": 0.13606349832132275, "learning_rate": 4.460657429643326e-06, "loss": 0.7334, "step": 1732 }, { "epoch": 0.8603698647139134, "grad_norm": 0.1369487135112441, "learning_rate": 4.460050904212711e-06, "loss": 0.7225, "step": 1733 }, { "epoch": 0.8608663274171527, "grad_norm": 0.14809100220976384, "learning_rate": 4.45944407921974e-06, "loss": 0.7686, "step": 1734 }, { "epoch": 0.8613627901203922, "grad_norm": 0.13703527982602742, "learning_rate": 4.458836954757161e-06, "loss": 0.7545, "step": 1735 }, { "epoch": 0.8618592528236316, "grad_norm": 0.1351821399394306, "learning_rate": 4.4582295309177595e-06, "loss": 0.7797, "step": 1736 }, { "epoch": 0.8623557155268711, "grad_norm": 0.1354673204843339, "learning_rate": 4.457621807794372e-06, "loss": 0.8066, "step": 1737 }, { "epoch": 0.8628521782301105, "grad_norm": 0.13862706776026357, "learning_rate": 4.457013785479881e-06, "loss": 0.7333, "step": 1738 }, { "epoch": 0.8633486409333498, "grad_norm": 0.1389346115440188, "learning_rate": 4.45640546406721e-06, "loss": 0.764, "step": 1739 }, { "epoch": 0.8638451036365893, "grad_norm": 0.15558347243623133, "learning_rate": 4.455796843649332e-06, "loss": 0.7593, "step": 1740 }, { "epoch": 0.8643415663398287, "grad_norm": 0.14353099958802945, "learning_rate": 4.455187924319266e-06, "loss": 0.7507, "step": 1741 }, { "epoch": 0.8648380290430682, "grad_norm": 0.14119369358264125, "learning_rate": 4.454578706170075e-06, "loss": 0.7599, "step": 1742 }, { "epoch": 0.8653344917463076, "grad_norm": 0.13360839981041983, "learning_rate": 4.453969189294867e-06, "loss": 0.7387, "step": 1743 }, { "epoch": 0.8658309544495469, "grad_norm": 0.14010427578913046, "learning_rate": 4.453359373786799e-06, "loss": 0.754, "step": 1744 }, { "epoch": 0.8663274171527864, "grad_norm": 0.13347970173643106, "learning_rate": 4.45274925973907e-06, "loss": 0.7363, "step": 1745 }, { "epoch": 0.8668238798560258, "grad_norm": 0.143214337774515, "learning_rate": 4.4521388472449265e-06, "loss": 0.74, "step": 1746 }, { "epoch": 0.8673203425592653, "grad_norm": 0.14170555954343675, "learning_rate": 4.451528136397661e-06, "loss": 0.74, "step": 1747 }, { "epoch": 0.8678168052625047, "grad_norm": 0.13910626879867338, "learning_rate": 4.450917127290609e-06, "loss": 0.7129, "step": 1748 }, { "epoch": 0.868313267965744, "grad_norm": 0.14451162598319245, "learning_rate": 4.4503058200171566e-06, "loss": 0.7683, "step": 1749 }, { "epoch": 0.8688097306689835, "grad_norm": 0.13670177426956756, "learning_rate": 4.449694214670729e-06, "loss": 0.7196, "step": 1750 }, { "epoch": 0.8693061933722229, "grad_norm": 0.1425374537551891, "learning_rate": 4.449082311344802e-06, "loss": 0.713, "step": 1751 }, { "epoch": 0.8698026560754624, "grad_norm": 0.13684146510608788, "learning_rate": 4.4484701101328944e-06, "loss": 0.6968, "step": 1752 }, { "epoch": 0.8702991187787017, "grad_norm": 0.13936638023848308, "learning_rate": 4.447857611128572e-06, "loss": 0.7731, "step": 1753 }, { "epoch": 0.8707955814819411, "grad_norm": 0.14653517321071075, "learning_rate": 4.447244814425446e-06, "loss": 0.7236, "step": 1754 }, { "epoch": 0.8712920441851806, "grad_norm": 0.14331030940384665, "learning_rate": 4.446631720117171e-06, "loss": 0.7306, "step": 1755 }, { "epoch": 0.87178850688842, "grad_norm": 0.14673685332839945, "learning_rate": 4.446018328297449e-06, "loss": 0.7242, "step": 1756 }, { "epoch": 0.8722849695916595, "grad_norm": 0.1347449091302609, "learning_rate": 4.445404639060028e-06, "loss": 0.7709, "step": 1757 }, { "epoch": 0.8727814322948988, "grad_norm": 0.1411266352695579, "learning_rate": 4.4447906524987006e-06, "loss": 0.8022, "step": 1758 }, { "epoch": 0.8732778949981382, "grad_norm": 0.14208496504174006, "learning_rate": 4.444176368707305e-06, "loss": 0.7066, "step": 1759 }, { "epoch": 0.8737743577013777, "grad_norm": 0.13674759170668294, "learning_rate": 4.443561787779722e-06, "loss": 0.7553, "step": 1760 }, { "epoch": 0.8742708204046171, "grad_norm": 0.14090838321939578, "learning_rate": 4.442946909809884e-06, "loss": 0.704, "step": 1761 }, { "epoch": 0.8747672831078566, "grad_norm": 0.14668605063528445, "learning_rate": 4.442331734891763e-06, "loss": 0.7503, "step": 1762 }, { "epoch": 0.8752637458110959, "grad_norm": 0.1397458175530683, "learning_rate": 4.441716263119379e-06, "loss": 0.7832, "step": 1763 }, { "epoch": 0.8757602085143353, "grad_norm": 0.138318109593277, "learning_rate": 4.441100494586797e-06, "loss": 0.7535, "step": 1764 }, { "epoch": 0.8762566712175748, "grad_norm": 0.14189795702675218, "learning_rate": 4.4404844293881285e-06, "loss": 0.7643, "step": 1765 }, { "epoch": 0.8767531339208142, "grad_norm": 0.13684604258689606, "learning_rate": 4.439868067617528e-06, "loss": 0.7602, "step": 1766 }, { "epoch": 0.8772495966240537, "grad_norm": 0.13657716615346063, "learning_rate": 4.4392514093691965e-06, "loss": 0.7519, "step": 1767 }, { "epoch": 0.877746059327293, "grad_norm": 0.16720007998528186, "learning_rate": 4.43863445473738e-06, "loss": 0.7311, "step": 1768 }, { "epoch": 0.8782425220305324, "grad_norm": 0.13527051039700383, "learning_rate": 4.4380172038163716e-06, "loss": 0.727, "step": 1769 }, { "epoch": 0.8787389847337719, "grad_norm": 0.13977003542487368, "learning_rate": 4.437399656700507e-06, "loss": 0.7412, "step": 1770 }, { "epoch": 0.8792354474370113, "grad_norm": 0.14066430808137664, "learning_rate": 4.436781813484169e-06, "loss": 0.7626, "step": 1771 }, { "epoch": 0.8797319101402508, "grad_norm": 0.14316718597333672, "learning_rate": 4.436163674261785e-06, "loss": 0.783, "step": 1772 }, { "epoch": 0.8802283728434901, "grad_norm": 0.14181967035602686, "learning_rate": 4.435545239127827e-06, "loss": 0.7452, "step": 1773 }, { "epoch": 0.8807248355467295, "grad_norm": 0.13364947209125708, "learning_rate": 4.434926508176814e-06, "loss": 0.7446, "step": 1774 }, { "epoch": 0.881221298249969, "grad_norm": 0.1351435150199207, "learning_rate": 4.434307481503307e-06, "loss": 0.7263, "step": 1775 }, { "epoch": 0.8817177609532084, "grad_norm": 0.13242522480531707, "learning_rate": 4.433688159201917e-06, "loss": 0.6855, "step": 1776 }, { "epoch": 0.8822142236564479, "grad_norm": 0.1392138600504658, "learning_rate": 4.433068541367296e-06, "loss": 0.7762, "step": 1777 }, { "epoch": 0.8827106863596872, "grad_norm": 0.13595036312344588, "learning_rate": 4.432448628094142e-06, "loss": 0.7368, "step": 1778 }, { "epoch": 0.8832071490629266, "grad_norm": 0.14043982715211084, "learning_rate": 4.4318284194772e-06, "loss": 0.7809, "step": 1779 }, { "epoch": 0.8837036117661661, "grad_norm": 0.13928983515854973, "learning_rate": 4.431207915611259e-06, "loss": 0.7963, "step": 1780 }, { "epoch": 0.8842000744694055, "grad_norm": 0.15323129087107526, "learning_rate": 4.430587116591152e-06, "loss": 0.7638, "step": 1781 }, { "epoch": 0.884696537172645, "grad_norm": 0.13672127380953056, "learning_rate": 4.429966022511759e-06, "loss": 0.7521, "step": 1782 }, { "epoch": 0.8851929998758843, "grad_norm": 0.13675982862960748, "learning_rate": 4.429344633468005e-06, "loss": 0.735, "step": 1783 }, { "epoch": 0.8856894625791237, "grad_norm": 0.14468317226179433, "learning_rate": 4.428722949554858e-06, "loss": 0.7728, "step": 1784 }, { "epoch": 0.8861859252823632, "grad_norm": 0.1392240635874152, "learning_rate": 4.428100970867332e-06, "loss": 0.7631, "step": 1785 }, { "epoch": 0.8866823879856026, "grad_norm": 0.13905551197842414, "learning_rate": 4.4274786975004886e-06, "loss": 0.7664, "step": 1786 }, { "epoch": 0.887178850688842, "grad_norm": 0.13218082313555352, "learning_rate": 4.426856129549431e-06, "loss": 0.7664, "step": 1787 }, { "epoch": 0.8876753133920814, "grad_norm": 0.14542219733133013, "learning_rate": 4.426233267109308e-06, "loss": 0.7537, "step": 1788 }, { "epoch": 0.8881717760953208, "grad_norm": 0.13937675043101774, "learning_rate": 4.4256101102753154e-06, "loss": 0.7624, "step": 1789 }, { "epoch": 0.8886682387985603, "grad_norm": 0.14213592075320755, "learning_rate": 4.424986659142691e-06, "loss": 0.7745, "step": 1790 }, { "epoch": 0.8891647015017997, "grad_norm": 0.13222515034482818, "learning_rate": 4.424362913806723e-06, "loss": 0.7704, "step": 1791 }, { "epoch": 0.889661164205039, "grad_norm": 0.12997849285210467, "learning_rate": 4.423738874362737e-06, "loss": 0.7473, "step": 1792 }, { "epoch": 0.8901576269082785, "grad_norm": 0.13865947461800895, "learning_rate": 4.423114540906108e-06, "loss": 0.7922, "step": 1793 }, { "epoch": 0.8906540896115179, "grad_norm": 0.1400478476922931, "learning_rate": 4.422489913532258e-06, "loss": 0.7136, "step": 1794 }, { "epoch": 0.8911505523147574, "grad_norm": 0.13354935512552799, "learning_rate": 4.421864992336648e-06, "loss": 0.7358, "step": 1795 }, { "epoch": 0.8916470150179968, "grad_norm": 0.1503327640511868, "learning_rate": 4.421239777414789e-06, "loss": 0.73, "step": 1796 }, { "epoch": 0.8921434777212361, "grad_norm": 0.1378043512733515, "learning_rate": 4.4206142688622365e-06, "loss": 0.7978, "step": 1797 }, { "epoch": 0.8926399404244756, "grad_norm": 0.14901127137101272, "learning_rate": 4.4199884667745866e-06, "loss": 0.8067, "step": 1798 }, { "epoch": 0.893136403127715, "grad_norm": 0.14348536864057324, "learning_rate": 4.419362371247483e-06, "loss": 0.7077, "step": 1799 }, { "epoch": 0.8936328658309545, "grad_norm": 0.14175662380934076, "learning_rate": 4.418735982376617e-06, "loss": 0.7913, "step": 1800 }, { "epoch": 0.8941293285341939, "grad_norm": 0.1358379514515194, "learning_rate": 4.41810930025772e-06, "loss": 0.7867, "step": 1801 }, { "epoch": 0.8946257912374332, "grad_norm": 0.13785444500689084, "learning_rate": 4.417482324986572e-06, "loss": 0.8063, "step": 1802 }, { "epoch": 0.8951222539406727, "grad_norm": 0.13967910977720893, "learning_rate": 4.416855056658994e-06, "loss": 0.7678, "step": 1803 }, { "epoch": 0.8956187166439121, "grad_norm": 0.13147269887815224, "learning_rate": 4.416227495370855e-06, "loss": 0.7064, "step": 1804 }, { "epoch": 0.8961151793471516, "grad_norm": 0.13960191918704914, "learning_rate": 4.415599641218068e-06, "loss": 0.7231, "step": 1805 }, { "epoch": 0.896611642050391, "grad_norm": 0.15042506597615363, "learning_rate": 4.4149714942965905e-06, "loss": 0.7629, "step": 1806 }, { "epoch": 0.8971081047536303, "grad_norm": 0.15012721023542636, "learning_rate": 4.414343054702424e-06, "loss": 0.8244, "step": 1807 }, { "epoch": 0.8976045674568698, "grad_norm": 0.13677278853363833, "learning_rate": 4.4137143225316155e-06, "loss": 0.7664, "step": 1808 }, { "epoch": 0.8981010301601092, "grad_norm": 0.13593716948768078, "learning_rate": 4.4130852978802575e-06, "loss": 0.7305, "step": 1809 }, { "epoch": 0.8985974928633487, "grad_norm": 0.13364129430992383, "learning_rate": 4.412455980844486e-06, "loss": 0.7374, "step": 1810 }, { "epoch": 0.8990939555665881, "grad_norm": 0.1383082700694781, "learning_rate": 4.4118263715204805e-06, "loss": 0.7336, "step": 1811 }, { "epoch": 0.8995904182698274, "grad_norm": 0.13423570328137593, "learning_rate": 4.4111964700044684e-06, "loss": 0.7759, "step": 1812 }, { "epoch": 0.9000868809730669, "grad_norm": 0.1427047436168598, "learning_rate": 4.41056627639272e-06, "loss": 0.7461, "step": 1813 }, { "epoch": 0.9005833436763063, "grad_norm": 0.14114806673952404, "learning_rate": 4.409935790781549e-06, "loss": 0.8111, "step": 1814 }, { "epoch": 0.9010798063795458, "grad_norm": 0.13948474917166498, "learning_rate": 4.4093050132673166e-06, "loss": 0.7487, "step": 1815 }, { "epoch": 0.9015762690827852, "grad_norm": 0.13399238294763033, "learning_rate": 4.408673943946426e-06, "loss": 0.7499, "step": 1816 }, { "epoch": 0.9020727317860245, "grad_norm": 0.13587971531472334, "learning_rate": 4.408042582915327e-06, "loss": 0.7214, "step": 1817 }, { "epoch": 0.902569194489264, "grad_norm": 0.14273283326406191, "learning_rate": 4.407410930270512e-06, "loss": 0.726, "step": 1818 }, { "epoch": 0.9030656571925034, "grad_norm": 0.1377272028925061, "learning_rate": 4.406778986108519e-06, "loss": 0.7314, "step": 1819 }, { "epoch": 0.9035621198957429, "grad_norm": 0.14415656316000652, "learning_rate": 4.406146750525931e-06, "loss": 0.8112, "step": 1820 }, { "epoch": 0.9040585825989822, "grad_norm": 0.13299537098888492, "learning_rate": 4.405514223619375e-06, "loss": 0.7636, "step": 1821 }, { "epoch": 0.9045550453022216, "grad_norm": 0.14030718986113996, "learning_rate": 4.404881405485522e-06, "loss": 0.7662, "step": 1822 }, { "epoch": 0.9050515080054611, "grad_norm": 0.1285002264716999, "learning_rate": 4.404248296221089e-06, "loss": 0.7578, "step": 1823 }, { "epoch": 0.9055479707087005, "grad_norm": 0.1357249456374853, "learning_rate": 4.4036148959228365e-06, "loss": 0.785, "step": 1824 }, { "epoch": 0.90604443341194, "grad_norm": 0.13335345428150927, "learning_rate": 4.402981204687569e-06, "loss": 0.7519, "step": 1825 }, { "epoch": 0.9065408961151793, "grad_norm": 0.13323854464179258, "learning_rate": 4.402347222612137e-06, "loss": 0.7363, "step": 1826 }, { "epoch": 0.9070373588184187, "grad_norm": 0.1480468732609157, "learning_rate": 4.401712949793433e-06, "loss": 0.8581, "step": 1827 }, { "epoch": 0.9075338215216582, "grad_norm": 0.13471889336102866, "learning_rate": 4.401078386328397e-06, "loss": 0.7886, "step": 1828 }, { "epoch": 0.9080302842248976, "grad_norm": 0.1330688142356741, "learning_rate": 4.40044353231401e-06, "loss": 0.7205, "step": 1829 }, { "epoch": 0.9085267469281371, "grad_norm": 0.14615671078844134, "learning_rate": 4.3998083878473004e-06, "loss": 0.7106, "step": 1830 }, { "epoch": 0.9090232096313764, "grad_norm": 0.13549720868989765, "learning_rate": 4.399172953025341e-06, "loss": 0.7009, "step": 1831 }, { "epoch": 0.9095196723346158, "grad_norm": 0.13185385773550817, "learning_rate": 4.398537227945246e-06, "loss": 0.7404, "step": 1832 }, { "epoch": 0.9100161350378553, "grad_norm": 0.13266084243229623, "learning_rate": 4.397901212704176e-06, "loss": 0.762, "step": 1833 }, { "epoch": 0.9105125977410947, "grad_norm": 0.1359129457194815, "learning_rate": 4.397264907399337e-06, "loss": 0.7739, "step": 1834 }, { "epoch": 0.9110090604443342, "grad_norm": 0.13102612310251532, "learning_rate": 4.396628312127977e-06, "loss": 0.7081, "step": 1835 }, { "epoch": 0.9115055231475735, "grad_norm": 0.1352565063423689, "learning_rate": 4.39599142698739e-06, "loss": 0.7689, "step": 1836 }, { "epoch": 0.9120019858508129, "grad_norm": 0.1358199459420324, "learning_rate": 4.395354252074912e-06, "loss": 0.7418, "step": 1837 }, { "epoch": 0.9124984485540524, "grad_norm": 0.1340420997123764, "learning_rate": 4.394716787487928e-06, "loss": 0.7555, "step": 1838 }, { "epoch": 0.9129949112572918, "grad_norm": 0.14029534457477436, "learning_rate": 4.394079033323862e-06, "loss": 0.759, "step": 1839 }, { "epoch": 0.9134913739605313, "grad_norm": 0.14168093185036393, "learning_rate": 4.393440989680184e-06, "loss": 0.793, "step": 1840 }, { "epoch": 0.9139878366637706, "grad_norm": 0.13613343802332856, "learning_rate": 4.39280265665441e-06, "loss": 0.7622, "step": 1841 }, { "epoch": 0.91448429936701, "grad_norm": 0.134141801326751, "learning_rate": 4.392164034344099e-06, "loss": 0.7318, "step": 1842 }, { "epoch": 0.9149807620702495, "grad_norm": 0.13523796345465022, "learning_rate": 4.391525122846855e-06, "loss": 0.7306, "step": 1843 }, { "epoch": 0.9154772247734889, "grad_norm": 0.1404574112739095, "learning_rate": 4.390885922260323e-06, "loss": 0.7078, "step": 1844 }, { "epoch": 0.9159736874767284, "grad_norm": 0.1336817191525083, "learning_rate": 4.390246432682196e-06, "loss": 0.7827, "step": 1845 }, { "epoch": 0.9164701501799677, "grad_norm": 0.13488164789434137, "learning_rate": 4.389606654210209e-06, "loss": 0.7829, "step": 1846 }, { "epoch": 0.9169666128832071, "grad_norm": 0.13753302958393473, "learning_rate": 4.388966586942144e-06, "loss": 0.7592, "step": 1847 }, { "epoch": 0.9174630755864466, "grad_norm": 0.13535888688211714, "learning_rate": 4.388326230975822e-06, "loss": 0.7047, "step": 1848 }, { "epoch": 0.917959538289686, "grad_norm": 0.15778012406959804, "learning_rate": 4.387685586409113e-06, "loss": 0.8187, "step": 1849 }, { "epoch": 0.9184560009929255, "grad_norm": 0.1421612541209436, "learning_rate": 4.387044653339929e-06, "loss": 0.7156, "step": 1850 }, { "epoch": 0.9189524636961648, "grad_norm": 0.13379900149196902, "learning_rate": 4.386403431866227e-06, "loss": 0.7026, "step": 1851 }, { "epoch": 0.9194489263994042, "grad_norm": 0.13738592358777368, "learning_rate": 4.385761922086006e-06, "loss": 0.7395, "step": 1852 }, { "epoch": 0.9199453891026437, "grad_norm": 0.13390600462410002, "learning_rate": 4.385120124097311e-06, "loss": 0.7409, "step": 1853 }, { "epoch": 0.9204418518058831, "grad_norm": 0.16039084660182953, "learning_rate": 4.38447803799823e-06, "loss": 0.7908, "step": 1854 }, { "epoch": 0.9209383145091226, "grad_norm": 0.14623098390188546, "learning_rate": 4.383835663886897e-06, "loss": 0.8183, "step": 1855 }, { "epoch": 0.9214347772123619, "grad_norm": 0.13634108193684458, "learning_rate": 4.383193001861488e-06, "loss": 0.7497, "step": 1856 }, { "epoch": 0.9219312399156013, "grad_norm": 0.14386362828828383, "learning_rate": 4.382550052020223e-06, "loss": 0.7665, "step": 1857 }, { "epoch": 0.9224277026188408, "grad_norm": 0.14077375377005916, "learning_rate": 4.381906814461366e-06, "loss": 0.7573, "step": 1858 }, { "epoch": 0.9229241653220802, "grad_norm": 0.13040249185121516, "learning_rate": 4.3812632892832275e-06, "loss": 0.7399, "step": 1859 }, { "epoch": 0.9234206280253195, "grad_norm": 0.14035305401909165, "learning_rate": 4.38061947658416e-06, "loss": 0.7425, "step": 1860 }, { "epoch": 0.923917090728559, "grad_norm": 0.13852311964581557, "learning_rate": 4.379975376462557e-06, "loss": 0.7366, "step": 1861 }, { "epoch": 0.9244135534317984, "grad_norm": 0.14569169577045035, "learning_rate": 4.379330989016861e-06, "loss": 0.7598, "step": 1862 }, { "epoch": 0.9249100161350379, "grad_norm": 0.1420209523517496, "learning_rate": 4.3786863143455575e-06, "loss": 0.7468, "step": 1863 }, { "epoch": 0.9254064788382773, "grad_norm": 0.13850510279377587, "learning_rate": 4.378041352547172e-06, "loss": 0.7813, "step": 1864 }, { "epoch": 0.9259029415415166, "grad_norm": 0.13571804405986027, "learning_rate": 4.3773961037202784e-06, "loss": 0.7678, "step": 1865 }, { "epoch": 0.9263994042447561, "grad_norm": 0.15050220230033115, "learning_rate": 4.3767505679634925e-06, "loss": 0.7502, "step": 1866 }, { "epoch": 0.9268958669479955, "grad_norm": 0.13968131538073705, "learning_rate": 4.376104745375474e-06, "loss": 0.7503, "step": 1867 }, { "epoch": 0.927392329651235, "grad_norm": 0.15025837625476948, "learning_rate": 4.375458636054924e-06, "loss": 0.7845, "step": 1868 }, { "epoch": 0.9278887923544744, "grad_norm": 0.14063528342813086, "learning_rate": 4.374812240100594e-06, "loss": 0.7252, "step": 1869 }, { "epoch": 0.9283852550577137, "grad_norm": 0.13713965606972475, "learning_rate": 4.374165557611273e-06, "loss": 0.7426, "step": 1870 }, { "epoch": 0.9288817177609532, "grad_norm": 0.13695944367515048, "learning_rate": 4.373518588685797e-06, "loss": 0.7409, "step": 1871 }, { "epoch": 0.9293781804641926, "grad_norm": 0.1422748389549011, "learning_rate": 4.372871333423044e-06, "loss": 0.7377, "step": 1872 }, { "epoch": 0.9298746431674321, "grad_norm": 0.15953047007232668, "learning_rate": 4.372223791921937e-06, "loss": 0.794, "step": 1873 }, { "epoch": 0.9303711058706715, "grad_norm": 0.14027429808379158, "learning_rate": 4.371575964281441e-06, "loss": 0.7338, "step": 1874 }, { "epoch": 0.9308675685739108, "grad_norm": 0.1402138448734584, "learning_rate": 4.370927850600569e-06, "loss": 0.8293, "step": 1875 }, { "epoch": 0.9313640312771503, "grad_norm": 0.14210713241359957, "learning_rate": 4.370279450978372e-06, "loss": 0.724, "step": 1876 }, { "epoch": 0.9318604939803897, "grad_norm": 0.13815508146885405, "learning_rate": 4.369630765513949e-06, "loss": 0.7071, "step": 1877 }, { "epoch": 0.9323569566836292, "grad_norm": 0.13952059008632328, "learning_rate": 4.368981794306441e-06, "loss": 0.7539, "step": 1878 }, { "epoch": 0.9328534193868686, "grad_norm": 0.13939971283229288, "learning_rate": 4.368332537455032e-06, "loss": 0.7861, "step": 1879 }, { "epoch": 0.9333498820901079, "grad_norm": 0.1419171606522094, "learning_rate": 4.367682995058952e-06, "loss": 0.7177, "step": 1880 }, { "epoch": 0.9338463447933474, "grad_norm": 0.1368329170972017, "learning_rate": 4.367033167217472e-06, "loss": 0.6944, "step": 1881 }, { "epoch": 0.9343428074965868, "grad_norm": 0.13781096600616807, "learning_rate": 4.366383054029907e-06, "loss": 0.7615, "step": 1882 }, { "epoch": 0.9348392701998263, "grad_norm": 0.13708727022446426, "learning_rate": 4.365732655595618e-06, "loss": 0.743, "step": 1883 }, { "epoch": 0.9353357329030657, "grad_norm": 0.13672878592574392, "learning_rate": 4.365081972014007e-06, "loss": 0.7352, "step": 1884 }, { "epoch": 0.935832195606305, "grad_norm": 0.1367173447197288, "learning_rate": 4.364431003384522e-06, "loss": 0.7197, "step": 1885 }, { "epoch": 0.9363286583095445, "grad_norm": 0.13990056273075885, "learning_rate": 4.36377974980665e-06, "loss": 0.7777, "step": 1886 }, { "epoch": 0.9368251210127839, "grad_norm": 0.1330138557768458, "learning_rate": 4.363128211379929e-06, "loss": 0.7605, "step": 1887 }, { "epoch": 0.9373215837160234, "grad_norm": 0.13245661584732554, "learning_rate": 4.362476388203932e-06, "loss": 0.6882, "step": 1888 }, { "epoch": 0.9378180464192628, "grad_norm": 0.13413118235673072, "learning_rate": 4.361824280378283e-06, "loss": 0.7746, "step": 1889 }, { "epoch": 0.9383145091225021, "grad_norm": 0.1482744865905161, "learning_rate": 4.361171888002644e-06, "loss": 0.6962, "step": 1890 }, { "epoch": 0.9388109718257416, "grad_norm": 0.13948550276160163, "learning_rate": 4.360519211176724e-06, "loss": 0.7621, "step": 1891 }, { "epoch": 0.939307434528981, "grad_norm": 0.13306011715981336, "learning_rate": 4.359866250000273e-06, "loss": 0.7034, "step": 1892 }, { "epoch": 0.9398038972322205, "grad_norm": 0.13596845635437071, "learning_rate": 4.359213004573087e-06, "loss": 0.743, "step": 1893 }, { "epoch": 0.9403003599354598, "grad_norm": 0.1353452240695457, "learning_rate": 4.358559474995003e-06, "loss": 0.7207, "step": 1894 }, { "epoch": 0.9407968226386992, "grad_norm": 0.14241184989931713, "learning_rate": 4.357905661365904e-06, "loss": 0.7474, "step": 1895 }, { "epoch": 0.9412932853419387, "grad_norm": 0.13833487675781403, "learning_rate": 4.357251563785712e-06, "loss": 0.783, "step": 1896 }, { "epoch": 0.9417897480451781, "grad_norm": 0.14544580018979297, "learning_rate": 4.3565971823543995e-06, "loss": 0.7439, "step": 1897 }, { "epoch": 0.9422862107484176, "grad_norm": 0.13830700272336566, "learning_rate": 4.355942517171975e-06, "loss": 0.7761, "step": 1898 }, { "epoch": 0.9427826734516569, "grad_norm": 0.13022704197917237, "learning_rate": 4.355287568338494e-06, "loss": 0.7591, "step": 1899 }, { "epoch": 0.9432791361548963, "grad_norm": 0.14112497002640229, "learning_rate": 4.354632335954056e-06, "loss": 0.7541, "step": 1900 }, { "epoch": 0.9437755988581358, "grad_norm": 0.13978038311901106, "learning_rate": 4.353976820118803e-06, "loss": 0.7811, "step": 1901 }, { "epoch": 0.9442720615613752, "grad_norm": 0.14646514863621188, "learning_rate": 4.353321020932918e-06, "loss": 0.77, "step": 1902 }, { "epoch": 0.9447685242646147, "grad_norm": 0.14275771666539322, "learning_rate": 4.352664938496631e-06, "loss": 0.7156, "step": 1903 }, { "epoch": 0.945264986967854, "grad_norm": 0.15199585201866644, "learning_rate": 4.352008572910213e-06, "loss": 0.7803, "step": 1904 }, { "epoch": 0.9457614496710934, "grad_norm": 0.15012570466464772, "learning_rate": 4.35135192427398e-06, "loss": 0.8116, "step": 1905 }, { "epoch": 0.9462579123743329, "grad_norm": 0.14798566149846193, "learning_rate": 4.350694992688289e-06, "loss": 0.7983, "step": 1906 }, { "epoch": 0.9467543750775723, "grad_norm": 0.13120199262792004, "learning_rate": 4.350037778253543e-06, "loss": 0.6915, "step": 1907 }, { "epoch": 0.9472508377808118, "grad_norm": 0.14145729405767365, "learning_rate": 4.3493802810701845e-06, "loss": 0.7309, "step": 1908 }, { "epoch": 0.9477473004840511, "grad_norm": 0.15086944052092097, "learning_rate": 4.348722501238704e-06, "loss": 0.744, "step": 1909 }, { "epoch": 0.9482437631872905, "grad_norm": 0.14048210428493602, "learning_rate": 4.348064438859629e-06, "loss": 0.8235, "step": 1910 }, { "epoch": 0.94874022589053, "grad_norm": 0.14254823425879515, "learning_rate": 4.347406094033539e-06, "loss": 0.7813, "step": 1911 }, { "epoch": 0.9492366885937694, "grad_norm": 0.1346422118252251, "learning_rate": 4.346747466861046e-06, "loss": 0.7329, "step": 1912 }, { "epoch": 0.9497331512970089, "grad_norm": 0.13446205456154095, "learning_rate": 4.346088557442813e-06, "loss": 0.7271, "step": 1913 }, { "epoch": 0.9502296140002482, "grad_norm": 0.13403300687387085, "learning_rate": 4.345429365879545e-06, "loss": 0.7879, "step": 1914 }, { "epoch": 0.9507260767034876, "grad_norm": 0.13602743859277094, "learning_rate": 4.344769892271987e-06, "loss": 0.7527, "step": 1915 }, { "epoch": 0.9512225394067271, "grad_norm": 0.13274614412032776, "learning_rate": 4.34411013672093e-06, "loss": 0.7, "step": 1916 }, { "epoch": 0.9517190021099665, "grad_norm": 0.1417039402628044, "learning_rate": 4.343450099327207e-06, "loss": 0.7745, "step": 1917 }, { "epoch": 0.952215464813206, "grad_norm": 0.13859569069194763, "learning_rate": 4.342789780191693e-06, "loss": 0.8001, "step": 1918 }, { "epoch": 0.9527119275164453, "grad_norm": 0.14105331622265738, "learning_rate": 4.342129179415308e-06, "loss": 0.7568, "step": 1919 }, { "epoch": 0.9532083902196847, "grad_norm": 0.13279996199472438, "learning_rate": 4.341468297099014e-06, "loss": 0.7257, "step": 1920 }, { "epoch": 0.9537048529229242, "grad_norm": 0.14360632974813572, "learning_rate": 4.340807133343817e-06, "loss": 0.7325, "step": 1921 }, { "epoch": 0.9542013156261636, "grad_norm": 0.1388585347465505, "learning_rate": 4.340145688250766e-06, "loss": 0.7798, "step": 1922 }, { "epoch": 0.9546977783294031, "grad_norm": 0.1432237090077402, "learning_rate": 4.339483961920949e-06, "loss": 0.7478, "step": 1923 }, { "epoch": 0.9551942410326424, "grad_norm": 0.13728709678976309, "learning_rate": 4.3388219544555035e-06, "loss": 0.7208, "step": 1924 }, { "epoch": 0.9556907037358818, "grad_norm": 0.138040404421968, "learning_rate": 4.338159665955605e-06, "loss": 0.7527, "step": 1925 }, { "epoch": 0.9561871664391213, "grad_norm": 0.13539548550777614, "learning_rate": 4.337497096522474e-06, "loss": 0.759, "step": 1926 }, { "epoch": 0.9566836291423607, "grad_norm": 0.14360137088724848, "learning_rate": 4.336834246257374e-06, "loss": 0.7671, "step": 1927 }, { "epoch": 0.9571800918456, "grad_norm": 0.13929702076400688, "learning_rate": 4.336171115261611e-06, "loss": 0.7567, "step": 1928 }, { "epoch": 0.9576765545488395, "grad_norm": 0.13535723942569114, "learning_rate": 4.335507703636533e-06, "loss": 0.7778, "step": 1929 }, { "epoch": 0.9581730172520789, "grad_norm": 0.1388756684946805, "learning_rate": 4.334844011483534e-06, "loss": 0.7776, "step": 1930 }, { "epoch": 0.9586694799553184, "grad_norm": 0.1483955001430376, "learning_rate": 4.3341800389040465e-06, "loss": 0.791, "step": 1931 }, { "epoch": 0.9591659426585578, "grad_norm": 0.13859104875120595, "learning_rate": 4.333515785999549e-06, "loss": 0.7262, "step": 1932 }, { "epoch": 0.9596624053617971, "grad_norm": 0.14028837341254094, "learning_rate": 4.3328512528715624e-06, "loss": 0.7514, "step": 1933 }, { "epoch": 0.9601588680650366, "grad_norm": 0.13979863885496932, "learning_rate": 4.332186439621649e-06, "loss": 0.7499, "step": 1934 }, { "epoch": 0.960655330768276, "grad_norm": 0.1364911188988977, "learning_rate": 4.331521346351415e-06, "loss": 0.7733, "step": 1935 }, { "epoch": 0.9611517934715155, "grad_norm": 0.13731108508080173, "learning_rate": 4.330855973162509e-06, "loss": 0.7811, "step": 1936 }, { "epoch": 0.9616482561747549, "grad_norm": 0.13586218904438216, "learning_rate": 4.330190320156623e-06, "loss": 0.7125, "step": 1937 }, { "epoch": 0.9621447188779942, "grad_norm": 0.14068018909196964, "learning_rate": 4.329524387435493e-06, "loss": 0.7356, "step": 1938 }, { "epoch": 0.9626411815812337, "grad_norm": 0.13818425959230285, "learning_rate": 4.328858175100893e-06, "loss": 0.803, "step": 1939 }, { "epoch": 0.9631376442844731, "grad_norm": 0.13682184061425626, "learning_rate": 4.328191683254646e-06, "loss": 0.7409, "step": 1940 }, { "epoch": 0.9636341069877126, "grad_norm": 0.13139663700635784, "learning_rate": 4.327524911998611e-06, "loss": 0.7198, "step": 1941 }, { "epoch": 0.964130569690952, "grad_norm": 0.13247474416573432, "learning_rate": 4.326857861434697e-06, "loss": 0.7251, "step": 1942 }, { "epoch": 0.9646270323941913, "grad_norm": 0.13908703323534935, "learning_rate": 4.326190531664849e-06, "loss": 0.7511, "step": 1943 }, { "epoch": 0.9651234950974308, "grad_norm": 0.13675826493632884, "learning_rate": 4.32552292279106e-06, "loss": 0.7485, "step": 1944 }, { "epoch": 0.9656199578006702, "grad_norm": 0.13134376978813925, "learning_rate": 4.3248550349153614e-06, "loss": 0.6937, "step": 1945 }, { "epoch": 0.9661164205039097, "grad_norm": 0.13386771511134407, "learning_rate": 4.324186868139831e-06, "loss": 0.7163, "step": 1946 }, { "epoch": 0.9666128832071491, "grad_norm": 0.14332833450437155, "learning_rate": 4.323518422566586e-06, "loss": 0.7213, "step": 1947 }, { "epoch": 0.9671093459103884, "grad_norm": 0.1420284438393322, "learning_rate": 4.322849698297787e-06, "loss": 0.7476, "step": 1948 }, { "epoch": 0.9676058086136279, "grad_norm": 0.13709550107289373, "learning_rate": 4.322180695435641e-06, "loss": 0.7763, "step": 1949 }, { "epoch": 0.9681022713168673, "grad_norm": 0.14649727566446094, "learning_rate": 4.32151141408239e-06, "loss": 0.7886, "step": 1950 }, { "epoch": 0.9685987340201068, "grad_norm": 0.13158072122538214, "learning_rate": 4.320841854340327e-06, "loss": 0.7063, "step": 1951 }, { "epoch": 0.9690951967233462, "grad_norm": 0.13262016397878115, "learning_rate": 4.32017201631178e-06, "loss": 0.6991, "step": 1952 }, { "epoch": 0.9695916594265855, "grad_norm": 0.14641301035110524, "learning_rate": 4.319501900099125e-06, "loss": 0.7397, "step": 1953 }, { "epoch": 0.970088122129825, "grad_norm": 0.1372723043098086, "learning_rate": 4.318831505804778e-06, "loss": 0.7251, "step": 1954 }, { "epoch": 0.9705845848330644, "grad_norm": 0.13338014846503463, "learning_rate": 4.318160833531199e-06, "loss": 0.6907, "step": 1955 }, { "epoch": 0.9710810475363039, "grad_norm": 0.15322411141967948, "learning_rate": 4.317489883380887e-06, "loss": 0.7292, "step": 1956 }, { "epoch": 0.9715775102395433, "grad_norm": 0.14101937816845833, "learning_rate": 4.3168186554563885e-06, "loss": 0.7558, "step": 1957 }, { "epoch": 0.9720739729427826, "grad_norm": 0.14044388792373283, "learning_rate": 4.316147149860289e-06, "loss": 0.7488, "step": 1958 }, { "epoch": 0.9725704356460221, "grad_norm": 0.13087099733521357, "learning_rate": 4.315475366695217e-06, "loss": 0.7315, "step": 1959 }, { "epoch": 0.9730668983492615, "grad_norm": 0.13481714845223355, "learning_rate": 4.314803306063845e-06, "loss": 0.7712, "step": 1960 }, { "epoch": 0.973563361052501, "grad_norm": 0.14067827771924268, "learning_rate": 4.3141309680688845e-06, "loss": 0.768, "step": 1961 }, { "epoch": 0.9740598237557403, "grad_norm": 0.15976691205185806, "learning_rate": 4.313458352813093e-06, "loss": 0.7287, "step": 1962 }, { "epoch": 0.9745562864589797, "grad_norm": 0.15233463929685187, "learning_rate": 4.31278546039927e-06, "loss": 0.7577, "step": 1963 }, { "epoch": 0.9750527491622192, "grad_norm": 0.13881182089614136, "learning_rate": 4.312112290930255e-06, "loss": 0.7525, "step": 1964 }, { "epoch": 0.9755492118654586, "grad_norm": 0.13613919026600058, "learning_rate": 4.31143884450893e-06, "loss": 0.7338, "step": 1965 }, { "epoch": 0.9760456745686981, "grad_norm": 0.14009747972178313, "learning_rate": 4.310765121238223e-06, "loss": 0.7685, "step": 1966 }, { "epoch": 0.9765421372719374, "grad_norm": 0.1398735823592921, "learning_rate": 4.310091121221101e-06, "loss": 0.7142, "step": 1967 }, { "epoch": 0.9770385999751768, "grad_norm": 0.15370727403582576, "learning_rate": 4.3094168445605735e-06, "loss": 0.7518, "step": 1968 }, { "epoch": 0.9775350626784163, "grad_norm": 0.13221826861513192, "learning_rate": 4.308742291359692e-06, "loss": 0.7045, "step": 1969 }, { "epoch": 0.9780315253816557, "grad_norm": 0.13298617913810548, "learning_rate": 4.308067461721553e-06, "loss": 0.7197, "step": 1970 }, { "epoch": 0.9785279880848952, "grad_norm": 0.13310197028851603, "learning_rate": 4.307392355749293e-06, "loss": 0.7276, "step": 1971 }, { "epoch": 0.9790244507881345, "grad_norm": 0.13441131799419243, "learning_rate": 4.30671697354609e-06, "loss": 0.7461, "step": 1972 }, { "epoch": 0.9795209134913739, "grad_norm": 0.13598707003248428, "learning_rate": 4.306041315215167e-06, "loss": 0.751, "step": 1973 }, { "epoch": 0.9800173761946134, "grad_norm": 0.13020590326743284, "learning_rate": 4.305365380859786e-06, "loss": 0.7471, "step": 1974 }, { "epoch": 0.9805138388978528, "grad_norm": 0.13548633327801685, "learning_rate": 4.304689170583254e-06, "loss": 0.7304, "step": 1975 }, { "epoch": 0.9810103016010923, "grad_norm": 0.14147295011534075, "learning_rate": 4.304012684488917e-06, "loss": 0.7926, "step": 1976 }, { "epoch": 0.9815067643043316, "grad_norm": 0.14704693808375974, "learning_rate": 4.303335922680167e-06, "loss": 0.7629, "step": 1977 }, { "epoch": 0.982003227007571, "grad_norm": 0.135220427389001, "learning_rate": 4.302658885260436e-06, "loss": 0.7305, "step": 1978 }, { "epoch": 0.9824996897108105, "grad_norm": 0.13439053824847158, "learning_rate": 4.301981572333197e-06, "loss": 0.7752, "step": 1979 }, { "epoch": 0.9829961524140499, "grad_norm": 0.13771634609127179, "learning_rate": 4.3013039840019675e-06, "loss": 0.7251, "step": 1980 }, { "epoch": 0.9834926151172894, "grad_norm": 0.1326406234325998, "learning_rate": 4.300626120370306e-06, "loss": 0.7595, "step": 1981 }, { "epoch": 0.9839890778205287, "grad_norm": 0.14246037641515524, "learning_rate": 4.2999479815418135e-06, "loss": 0.7618, "step": 1982 }, { "epoch": 0.9844855405237681, "grad_norm": 0.14003774333863, "learning_rate": 4.299269567620131e-06, "loss": 0.7133, "step": 1983 }, { "epoch": 0.9849820032270076, "grad_norm": 0.13443584919833715, "learning_rate": 4.2985908787089445e-06, "loss": 0.7722, "step": 1984 }, { "epoch": 0.985478465930247, "grad_norm": 0.13668575653481171, "learning_rate": 4.29791191491198e-06, "loss": 0.7583, "step": 1985 }, { "epoch": 0.9859749286334865, "grad_norm": 0.14279910896798906, "learning_rate": 4.297232676333007e-06, "loss": 0.7981, "step": 1986 }, { "epoch": 0.9864713913367258, "grad_norm": 0.14161222593397266, "learning_rate": 4.296553163075836e-06, "loss": 0.7594, "step": 1987 }, { "epoch": 0.9869678540399652, "grad_norm": 0.13831563618388676, "learning_rate": 4.295873375244319e-06, "loss": 0.6993, "step": 1988 }, { "epoch": 0.9874643167432047, "grad_norm": 0.13787046565146302, "learning_rate": 4.2951933129423515e-06, "loss": 0.7358, "step": 1989 }, { "epoch": 0.9879607794464441, "grad_norm": 0.13779555768214863, "learning_rate": 4.29451297627387e-06, "loss": 0.7178, "step": 1990 }, { "epoch": 0.9884572421496836, "grad_norm": 0.13217653339807073, "learning_rate": 4.293832365342853e-06, "loss": 0.7439, "step": 1991 }, { "epoch": 0.9889537048529229, "grad_norm": 0.15651907445083008, "learning_rate": 4.293151480253321e-06, "loss": 0.7852, "step": 1992 }, { "epoch": 0.9894501675561623, "grad_norm": 0.14035753112824711, "learning_rate": 4.292470321109336e-06, "loss": 0.8147, "step": 1993 }, { "epoch": 0.9899466302594018, "grad_norm": 0.1344265290214628, "learning_rate": 4.291788888015002e-06, "loss": 0.7576, "step": 1994 }, { "epoch": 0.9904430929626412, "grad_norm": 0.13379707071517158, "learning_rate": 4.291107181074466e-06, "loss": 0.7163, "step": 1995 }, { "epoch": 0.9909395556658807, "grad_norm": 0.1399492515733686, "learning_rate": 4.290425200391917e-06, "loss": 0.7922, "step": 1996 }, { "epoch": 0.99143601836912, "grad_norm": 0.14344554049611216, "learning_rate": 4.289742946071581e-06, "loss": 0.7665, "step": 1997 }, { "epoch": 0.9919324810723594, "grad_norm": 0.1382864134304432, "learning_rate": 4.2890604182177336e-06, "loss": 0.7557, "step": 1998 }, { "epoch": 0.9924289437755989, "grad_norm": 0.14309914226914516, "learning_rate": 4.288377616934686e-06, "loss": 0.7213, "step": 1999 }, { "epoch": 0.9929254064788383, "grad_norm": 0.13717535026025166, "learning_rate": 4.287694542326795e-06, "loss": 0.7113, "step": 2000 }, { "epoch": 0.9934218691820776, "grad_norm": 0.13746827797324848, "learning_rate": 4.287011194498456e-06, "loss": 0.7891, "step": 2001 }, { "epoch": 0.9939183318853171, "grad_norm": 0.1421486270249576, "learning_rate": 4.2863275735541085e-06, "loss": 0.737, "step": 2002 }, { "epoch": 0.9944147945885565, "grad_norm": 0.13682547516347832, "learning_rate": 4.285643679598233e-06, "loss": 0.7373, "step": 2003 }, { "epoch": 0.994911257291796, "grad_norm": 0.1266137711385144, "learning_rate": 4.284959512735352e-06, "loss": 0.6983, "step": 2004 }, { "epoch": 0.9954077199950354, "grad_norm": 0.14377205813881055, "learning_rate": 4.284275073070028e-06, "loss": 0.7426, "step": 2005 }, { "epoch": 0.9959041826982747, "grad_norm": 0.1350794657949948, "learning_rate": 4.283590360706868e-06, "loss": 0.7668, "step": 2006 }, { "epoch": 0.9964006454015142, "grad_norm": 0.14250458148212633, "learning_rate": 4.28290537575052e-06, "loss": 0.8081, "step": 2007 }, { "epoch": 0.9968971081047536, "grad_norm": 0.13575371037098183, "learning_rate": 4.282220118305672e-06, "loss": 0.7055, "step": 2008 }, { "epoch": 0.9973935708079931, "grad_norm": 0.13942020771733923, "learning_rate": 4.281534588477054e-06, "loss": 0.7798, "step": 2009 }, { "epoch": 0.9978900335112325, "grad_norm": 0.13808731679650627, "learning_rate": 4.280848786369439e-06, "loss": 0.7241, "step": 2010 }, { "epoch": 0.9983864962144718, "grad_norm": 0.14200950616113964, "learning_rate": 4.280162712087641e-06, "loss": 0.7211, "step": 2011 }, { "epoch": 0.9988829589177113, "grad_norm": 0.13227471958984183, "learning_rate": 4.2794763657365155e-06, "loss": 0.7434, "step": 2012 }, { "epoch": 0.9993794216209507, "grad_norm": 0.1506748510159815, "learning_rate": 4.278789747420959e-06, "loss": 0.772, "step": 2013 }, { "epoch": 0.9998758843241902, "grad_norm": 0.1692117125114903, "learning_rate": 4.27810285724591e-06, "loss": 0.8515, "step": 2014 }, { "epoch": 1.0, "grad_norm": 0.1692117125114903, "learning_rate": 4.27741569531635e-06, "loss": 0.1869, "step": 2015 }, { "epoch": 1.0003723470274295, "grad_norm": 0.13641143847300066, "learning_rate": 4.276728261737298e-06, "loss": 0.5693, "step": 2016 }, { "epoch": 1.0003723470274295, "eval_loss": 0.7493109107017517, "eval_runtime": 135.5139, "eval_samples_per_second": 223.984, "eval_steps_per_second": 28.005, "step": 2016 }, { "epoch": 1.0004964627032393, "grad_norm": 0.14993340674591726, "learning_rate": 4.27604055661382e-06, "loss": 0.741, "step": 2017 }, { "epoch": 1.0009929254064789, "grad_norm": 0.15136956647535096, "learning_rate": 4.275352580051019e-06, "loss": 0.7305, "step": 2018 }, { "epoch": 1.0014893881097182, "grad_norm": 0.13556367994060173, "learning_rate": 4.274664332154042e-06, "loss": 0.6991, "step": 2019 }, { "epoch": 1.0019858508129578, "grad_norm": 0.13474503555709075, "learning_rate": 4.273975813028076e-06, "loss": 0.7061, "step": 2020 }, { "epoch": 1.002482313516197, "grad_norm": 0.13926546524904157, "learning_rate": 4.273287022778351e-06, "loss": 0.7501, "step": 2021 }, { "epoch": 1.0029787762194364, "grad_norm": 0.1417176043167176, "learning_rate": 4.272597961510137e-06, "loss": 0.7204, "step": 2022 }, { "epoch": 1.003475238922676, "grad_norm": 0.1419815927815163, "learning_rate": 4.271908629328747e-06, "loss": 0.7269, "step": 2023 }, { "epoch": 1.0039717016259153, "grad_norm": 0.14837462280399044, "learning_rate": 4.2712190263395315e-06, "loss": 0.7593, "step": 2024 }, { "epoch": 1.0044681643291549, "grad_norm": 0.1424958004520062, "learning_rate": 4.270529152647889e-06, "loss": 0.7987, "step": 2025 }, { "epoch": 1.0049646270323942, "grad_norm": 0.13557327739425565, "learning_rate": 4.269839008359252e-06, "loss": 0.7095, "step": 2026 }, { "epoch": 1.0054610897356335, "grad_norm": 0.1350892189012179, "learning_rate": 4.269148593579101e-06, "loss": 0.7297, "step": 2027 }, { "epoch": 1.005957552438873, "grad_norm": 0.1431149142071869, "learning_rate": 4.268457908412953e-06, "loss": 0.7074, "step": 2028 }, { "epoch": 1.0064540151421124, "grad_norm": 0.13418266947538876, "learning_rate": 4.267766952966369e-06, "loss": 0.7235, "step": 2029 }, { "epoch": 1.006950477845352, "grad_norm": 0.131551107709248, "learning_rate": 4.267075727344951e-06, "loss": 0.7286, "step": 2030 }, { "epoch": 1.0074469405485913, "grad_norm": 0.13666318482495718, "learning_rate": 4.266384231654339e-06, "loss": 0.7225, "step": 2031 }, { "epoch": 1.0079434032518306, "grad_norm": 0.14957531887872869, "learning_rate": 4.265692466000221e-06, "loss": 0.7452, "step": 2032 }, { "epoch": 1.0084398659550702, "grad_norm": 0.1352458457312148, "learning_rate": 4.2650004304883195e-06, "loss": 0.7488, "step": 2033 }, { "epoch": 1.0089363286583095, "grad_norm": 0.13431771924046446, "learning_rate": 4.2643081252244024e-06, "loss": 0.7408, "step": 2034 }, { "epoch": 1.009432791361549, "grad_norm": 0.1404702073381516, "learning_rate": 4.263615550314276e-06, "loss": 0.7136, "step": 2035 }, { "epoch": 1.0099292540647884, "grad_norm": 0.14333737075703962, "learning_rate": 4.262922705863791e-06, "loss": 0.7315, "step": 2036 }, { "epoch": 1.0104257167680277, "grad_norm": 0.13931858496722177, "learning_rate": 4.262229591978836e-06, "loss": 0.7515, "step": 2037 }, { "epoch": 1.0109221794712673, "grad_norm": 0.14111176709272, "learning_rate": 4.261536208765343e-06, "loss": 0.685, "step": 2038 }, { "epoch": 1.0114186421745066, "grad_norm": 0.13873104450186644, "learning_rate": 4.260842556329285e-06, "loss": 0.7419, "step": 2039 }, { "epoch": 1.0119151048777462, "grad_norm": 0.14733619687470917, "learning_rate": 4.260148634776675e-06, "loss": 0.745, "step": 2040 }, { "epoch": 1.0124115675809855, "grad_norm": 0.13985004223191186, "learning_rate": 4.259454444213568e-06, "loss": 0.7246, "step": 2041 }, { "epoch": 1.0129080302842248, "grad_norm": 0.13995194992924537, "learning_rate": 4.25875998474606e-06, "loss": 0.7642, "step": 2042 }, { "epoch": 1.0134044929874644, "grad_norm": 0.1362055125185364, "learning_rate": 4.258065256480288e-06, "loss": 0.7341, "step": 2043 }, { "epoch": 1.0139009556907037, "grad_norm": 0.1422465441519774, "learning_rate": 4.25737025952243e-06, "loss": 0.7266, "step": 2044 }, { "epoch": 1.0143974183939433, "grad_norm": 0.13512192671393297, "learning_rate": 4.2566749939787056e-06, "loss": 0.7098, "step": 2045 }, { "epoch": 1.0148938810971826, "grad_norm": 0.13453521708159794, "learning_rate": 4.255979459955374e-06, "loss": 0.6598, "step": 2046 }, { "epoch": 1.015390343800422, "grad_norm": 0.1458759220859774, "learning_rate": 4.255283657558736e-06, "loss": 0.7255, "step": 2047 }, { "epoch": 1.0158868065036615, "grad_norm": 0.13826399594709918, "learning_rate": 4.2545875868951355e-06, "loss": 0.6747, "step": 2048 }, { "epoch": 1.0163832692069008, "grad_norm": 0.15055189765595503, "learning_rate": 4.253891248070956e-06, "loss": 0.7719, "step": 2049 }, { "epoch": 1.0168797319101404, "grad_norm": 0.13416248781345586, "learning_rate": 4.253194641192621e-06, "loss": 0.6815, "step": 2050 }, { "epoch": 1.0173761946133797, "grad_norm": 0.13559489599334676, "learning_rate": 4.252497766366593e-06, "loss": 0.7037, "step": 2051 }, { "epoch": 1.017872657316619, "grad_norm": 0.172228917141983, "learning_rate": 4.251800623699382e-06, "loss": 0.7434, "step": 2052 }, { "epoch": 1.0183691200198586, "grad_norm": 0.1370557981462678, "learning_rate": 4.251103213297534e-06, "loss": 0.724, "step": 2053 }, { "epoch": 1.018865582723098, "grad_norm": 0.13629385144601702, "learning_rate": 4.250405535267636e-06, "loss": 0.753, "step": 2054 }, { "epoch": 1.0193620454263372, "grad_norm": 0.13309228573779064, "learning_rate": 4.249707589716318e-06, "loss": 0.7319, "step": 2055 }, { "epoch": 1.0198585081295768, "grad_norm": 0.13715514685240124, "learning_rate": 4.24900937675025e-06, "loss": 0.7897, "step": 2056 }, { "epoch": 1.0203549708328161, "grad_norm": 0.1357669090684218, "learning_rate": 4.24831089647614e-06, "loss": 0.7267, "step": 2057 }, { "epoch": 1.0208514335360557, "grad_norm": 0.1498727514671048, "learning_rate": 4.247612149000743e-06, "loss": 0.7297, "step": 2058 }, { "epoch": 1.021347896239295, "grad_norm": 0.14049047303313542, "learning_rate": 4.24691313443085e-06, "loss": 0.7304, "step": 2059 }, { "epoch": 1.0218443589425343, "grad_norm": 0.13909453659414045, "learning_rate": 4.2462138528732935e-06, "loss": 0.7048, "step": 2060 }, { "epoch": 1.022340821645774, "grad_norm": 0.13412019436547604, "learning_rate": 4.245514304434948e-06, "loss": 0.6914, "step": 2061 }, { "epoch": 1.0228372843490132, "grad_norm": 0.1391340532915626, "learning_rate": 4.244814489222728e-06, "loss": 0.7017, "step": 2062 }, { "epoch": 1.0233337470522528, "grad_norm": 0.14295513021033426, "learning_rate": 4.244114407343589e-06, "loss": 0.7457, "step": 2063 }, { "epoch": 1.023830209755492, "grad_norm": 0.13900941541232267, "learning_rate": 4.2434140589045286e-06, "loss": 0.7401, "step": 2064 }, { "epoch": 1.0243266724587314, "grad_norm": 0.13944892230939118, "learning_rate": 4.242713444012583e-06, "loss": 0.7874, "step": 2065 }, { "epoch": 1.024823135161971, "grad_norm": 0.13593208488424774, "learning_rate": 4.242012562774829e-06, "loss": 0.7239, "step": 2066 }, { "epoch": 1.0253195978652103, "grad_norm": 0.13931000983919048, "learning_rate": 4.241311415298386e-06, "loss": 0.7454, "step": 2067 }, { "epoch": 1.0258160605684499, "grad_norm": 0.1330003858692133, "learning_rate": 4.240610001690413e-06, "loss": 0.734, "step": 2068 }, { "epoch": 1.0263125232716892, "grad_norm": 0.14108224227175165, "learning_rate": 4.239908322058109e-06, "loss": 0.6962, "step": 2069 }, { "epoch": 1.0268089859749285, "grad_norm": 0.1391861450047936, "learning_rate": 4.239206376508716e-06, "loss": 0.7008, "step": 2070 }, { "epoch": 1.027305448678168, "grad_norm": 0.1370473901313178, "learning_rate": 4.238504165149515e-06, "loss": 0.7544, "step": 2071 }, { "epoch": 1.0278019113814074, "grad_norm": 0.13536037419786456, "learning_rate": 4.237801688087827e-06, "loss": 0.7369, "step": 2072 }, { "epoch": 1.028298374084647, "grad_norm": 0.14978518639680113, "learning_rate": 4.237098945431014e-06, "loss": 0.7679, "step": 2073 }, { "epoch": 1.0287948367878863, "grad_norm": 0.13912767164252948, "learning_rate": 4.236395937286479e-06, "loss": 0.7166, "step": 2074 }, { "epoch": 1.0292912994911256, "grad_norm": 0.1386818465495855, "learning_rate": 4.2356926637616665e-06, "loss": 0.7385, "step": 2075 }, { "epoch": 1.0297877621943652, "grad_norm": 0.1411775085091878, "learning_rate": 4.234989124964061e-06, "loss": 0.7256, "step": 2076 }, { "epoch": 1.0302842248976045, "grad_norm": 0.14419739467783685, "learning_rate": 4.234285321001185e-06, "loss": 0.7371, "step": 2077 }, { "epoch": 1.030780687600844, "grad_norm": 0.13803664666766124, "learning_rate": 4.233581251980604e-06, "loss": 0.7746, "step": 2078 }, { "epoch": 1.0312771503040834, "grad_norm": 0.14388287736016128, "learning_rate": 4.2328769180099265e-06, "loss": 0.7547, "step": 2079 }, { "epoch": 1.0317736130073227, "grad_norm": 0.14306598998666248, "learning_rate": 4.232172319196795e-06, "loss": 0.7117, "step": 2080 }, { "epoch": 1.0322700757105623, "grad_norm": 0.14932389713396255, "learning_rate": 4.231467455648899e-06, "loss": 0.7507, "step": 2081 }, { "epoch": 1.0327665384138016, "grad_norm": 0.14803637284334528, "learning_rate": 4.230762327473964e-06, "loss": 0.7168, "step": 2082 }, { "epoch": 1.0332630011170412, "grad_norm": 0.14111484516048733, "learning_rate": 4.2300569347797584e-06, "loss": 0.73, "step": 2083 }, { "epoch": 1.0337594638202805, "grad_norm": 0.13730617408120754, "learning_rate": 4.229351277674088e-06, "loss": 0.7295, "step": 2084 }, { "epoch": 1.0342559265235198, "grad_norm": 0.14834889202516877, "learning_rate": 4.228645356264805e-06, "loss": 0.8029, "step": 2085 }, { "epoch": 1.0347523892267594, "grad_norm": 0.14599972266177927, "learning_rate": 4.227939170659795e-06, "loss": 0.7629, "step": 2086 }, { "epoch": 1.0352488519299987, "grad_norm": 0.14092123214072694, "learning_rate": 4.227232720966988e-06, "loss": 0.7107, "step": 2087 }, { "epoch": 1.0357453146332383, "grad_norm": 0.14122940427509462, "learning_rate": 4.226526007294353e-06, "loss": 0.7213, "step": 2088 }, { "epoch": 1.0362417773364776, "grad_norm": 0.13842815838317993, "learning_rate": 4.225819029749902e-06, "loss": 0.7574, "step": 2089 }, { "epoch": 1.036738240039717, "grad_norm": 0.13969707819774496, "learning_rate": 4.225111788441682e-06, "loss": 0.7901, "step": 2090 }, { "epoch": 1.0372347027429565, "grad_norm": 0.14454482563554347, "learning_rate": 4.224404283477788e-06, "loss": 0.7426, "step": 2091 }, { "epoch": 1.0377311654461958, "grad_norm": 0.1589968114041004, "learning_rate": 4.223696514966346e-06, "loss": 0.757, "step": 2092 }, { "epoch": 1.0382276281494354, "grad_norm": 0.14630132012052294, "learning_rate": 4.2229884830155285e-06, "loss": 0.7052, "step": 2093 }, { "epoch": 1.0387240908526747, "grad_norm": 0.13974708317715842, "learning_rate": 4.222280187733549e-06, "loss": 0.7687, "step": 2094 }, { "epoch": 1.039220553555914, "grad_norm": 0.1383464362903483, "learning_rate": 4.2215716292286555e-06, "loss": 0.7048, "step": 2095 }, { "epoch": 1.0397170162591536, "grad_norm": 0.14009397553084701, "learning_rate": 4.220862807609144e-06, "loss": 0.6834, "step": 2096 }, { "epoch": 1.040213478962393, "grad_norm": 0.14079336307045506, "learning_rate": 4.220153722983342e-06, "loss": 0.7392, "step": 2097 }, { "epoch": 1.0407099416656325, "grad_norm": 0.13787553354880905, "learning_rate": 4.219444375459626e-06, "loss": 0.7496, "step": 2098 }, { "epoch": 1.0412064043688718, "grad_norm": 0.1400054541103246, "learning_rate": 4.218734765146405e-06, "loss": 0.7155, "step": 2099 }, { "epoch": 1.0417028670721111, "grad_norm": 0.13722004560528184, "learning_rate": 4.218024892152134e-06, "loss": 0.736, "step": 2100 }, { "epoch": 1.0421993297753507, "grad_norm": 0.13237548806271895, "learning_rate": 4.217314756585305e-06, "loss": 0.6985, "step": 2101 }, { "epoch": 1.04269579247859, "grad_norm": 0.13814683970889546, "learning_rate": 4.2166043585544495e-06, "loss": 0.742, "step": 2102 }, { "epoch": 1.0431922551818296, "grad_norm": 0.1519709587964635, "learning_rate": 4.2158936981681415e-06, "loss": 0.7286, "step": 2103 }, { "epoch": 1.043688717885069, "grad_norm": 0.13343117788795786, "learning_rate": 4.215182775534994e-06, "loss": 0.7769, "step": 2104 }, { "epoch": 1.0441851805883082, "grad_norm": 0.14132688675642163, "learning_rate": 4.21447159076366e-06, "loss": 0.7645, "step": 2105 }, { "epoch": 1.0446816432915478, "grad_norm": 0.142210221906363, "learning_rate": 4.213760143962834e-06, "loss": 0.7481, "step": 2106 }, { "epoch": 1.045178105994787, "grad_norm": 0.1374760586531246, "learning_rate": 4.2130484352412475e-06, "loss": 0.6909, "step": 2107 }, { "epoch": 1.0456745686980267, "grad_norm": 0.1309015889916558, "learning_rate": 4.212336464707674e-06, "loss": 0.7001, "step": 2108 }, { "epoch": 1.046171031401266, "grad_norm": 0.13676033591829806, "learning_rate": 4.211624232470927e-06, "loss": 0.7778, "step": 2109 }, { "epoch": 1.0466674941045053, "grad_norm": 0.13760414687569777, "learning_rate": 4.2109117386398595e-06, "loss": 0.7199, "step": 2110 }, { "epoch": 1.0471639568077449, "grad_norm": 0.14075911332828087, "learning_rate": 4.210198983323366e-06, "loss": 0.7741, "step": 2111 }, { "epoch": 1.0476604195109842, "grad_norm": 0.13356672723592797, "learning_rate": 4.209485966630377e-06, "loss": 0.7253, "step": 2112 }, { "epoch": 1.0481568822142238, "grad_norm": 0.14150649799028853, "learning_rate": 4.2087726886698695e-06, "loss": 0.7779, "step": 2113 }, { "epoch": 1.048653344917463, "grad_norm": 0.13823807245765327, "learning_rate": 4.208059149550855e-06, "loss": 0.741, "step": 2114 }, { "epoch": 1.0491498076207024, "grad_norm": 0.14366242195045892, "learning_rate": 4.207345349382385e-06, "loss": 0.7521, "step": 2115 }, { "epoch": 1.049646270323942, "grad_norm": 0.13980640222600257, "learning_rate": 4.206631288273554e-06, "loss": 0.7132, "step": 2116 }, { "epoch": 1.0501427330271813, "grad_norm": 0.14845761168836935, "learning_rate": 4.205916966333494e-06, "loss": 0.7072, "step": 2117 }, { "epoch": 1.0506391957304206, "grad_norm": 0.14139479483348766, "learning_rate": 4.2052023836713785e-06, "loss": 0.7411, "step": 2118 }, { "epoch": 1.0511356584336602, "grad_norm": 0.14059968450149515, "learning_rate": 4.204487540396419e-06, "loss": 0.691, "step": 2119 }, { "epoch": 1.0516321211368995, "grad_norm": 0.1362156286798585, "learning_rate": 4.203772436617868e-06, "loss": 0.7444, "step": 2120 }, { "epoch": 1.052128583840139, "grad_norm": 0.13831235259558317, "learning_rate": 4.203057072445019e-06, "loss": 0.7317, "step": 2121 }, { "epoch": 1.0526250465433784, "grad_norm": 0.14207834501053565, "learning_rate": 4.202341447987202e-06, "loss": 0.7359, "step": 2122 }, { "epoch": 1.0531215092466177, "grad_norm": 0.13778286488933011, "learning_rate": 4.201625563353791e-06, "loss": 0.7756, "step": 2123 }, { "epoch": 1.0536179719498573, "grad_norm": 0.13980048084090257, "learning_rate": 4.200909418654194e-06, "loss": 0.7331, "step": 2124 }, { "epoch": 1.0541144346530966, "grad_norm": 0.13997426427628695, "learning_rate": 4.200193013997866e-06, "loss": 0.6935, "step": 2125 }, { "epoch": 1.0546108973563362, "grad_norm": 0.1459212095265043, "learning_rate": 4.199476349494296e-06, "loss": 0.7204, "step": 2126 }, { "epoch": 1.0551073600595755, "grad_norm": 0.14329178565231715, "learning_rate": 4.198759425253015e-06, "loss": 0.7464, "step": 2127 }, { "epoch": 1.0556038227628148, "grad_norm": 0.1422075716131861, "learning_rate": 4.198042241383593e-06, "loss": 0.7486, "step": 2128 }, { "epoch": 1.0561002854660544, "grad_norm": 0.14092996648947848, "learning_rate": 4.19732479799564e-06, "loss": 0.7074, "step": 2129 }, { "epoch": 1.0565967481692937, "grad_norm": 0.14086274951921257, "learning_rate": 4.196607095198808e-06, "loss": 0.7577, "step": 2130 }, { "epoch": 1.0570932108725333, "grad_norm": 0.13776776723760562, "learning_rate": 4.1958891331027826e-06, "loss": 0.7324, "step": 2131 }, { "epoch": 1.0575896735757726, "grad_norm": 0.13566064448922902, "learning_rate": 4.1951709118172954e-06, "loss": 0.7626, "step": 2132 }, { "epoch": 1.058086136279012, "grad_norm": 0.13763795010324914, "learning_rate": 4.194452431452115e-06, "loss": 0.7537, "step": 2133 }, { "epoch": 1.0585825989822515, "grad_norm": 0.1459065764808873, "learning_rate": 4.193733692117048e-06, "loss": 0.7355, "step": 2134 }, { "epoch": 1.0590790616854908, "grad_norm": 0.13884201088984038, "learning_rate": 4.193014693921944e-06, "loss": 0.7535, "step": 2135 }, { "epoch": 1.0595755243887304, "grad_norm": 0.14001522032769478, "learning_rate": 4.192295436976688e-06, "loss": 0.7447, "step": 2136 }, { "epoch": 1.0600719870919697, "grad_norm": 0.15172505034245246, "learning_rate": 4.19157592139121e-06, "loss": 0.7522, "step": 2137 }, { "epoch": 1.060568449795209, "grad_norm": 0.13794840697062816, "learning_rate": 4.190856147275474e-06, "loss": 0.6883, "step": 2138 }, { "epoch": 1.0610649124984486, "grad_norm": 0.14217691316180742, "learning_rate": 4.190136114739487e-06, "loss": 0.727, "step": 2139 }, { "epoch": 1.061561375201688, "grad_norm": 0.13583861770554806, "learning_rate": 4.189415823893293e-06, "loss": 0.704, "step": 2140 }, { "epoch": 1.0620578379049275, "grad_norm": 0.1363645606259115, "learning_rate": 4.188695274846979e-06, "loss": 0.7381, "step": 2141 }, { "epoch": 1.0625543006081668, "grad_norm": 0.14814722988133758, "learning_rate": 4.1879744677106685e-06, "loss": 0.7346, "step": 2142 }, { "epoch": 1.0630507633114061, "grad_norm": 0.16059324736898764, "learning_rate": 4.1872534025945255e-06, "loss": 0.7899, "step": 2143 }, { "epoch": 1.0635472260146457, "grad_norm": 0.14014168016673342, "learning_rate": 4.186532079608753e-06, "loss": 0.7599, "step": 2144 }, { "epoch": 1.064043688717885, "grad_norm": 0.14674448113708913, "learning_rate": 4.185810498863592e-06, "loss": 0.7054, "step": 2145 }, { "epoch": 1.0645401514211246, "grad_norm": 0.13552371087730036, "learning_rate": 4.185088660469328e-06, "loss": 0.7602, "step": 2146 }, { "epoch": 1.065036614124364, "grad_norm": 0.1336555213075988, "learning_rate": 4.18436656453628e-06, "loss": 0.7583, "step": 2147 }, { "epoch": 1.0655330768276032, "grad_norm": 0.13559730592832767, "learning_rate": 4.1836442111748086e-06, "loss": 0.7456, "step": 2148 }, { "epoch": 1.0660295395308428, "grad_norm": 0.14992961750694342, "learning_rate": 4.182921600495316e-06, "loss": 0.7379, "step": 2149 }, { "epoch": 1.0665260022340821, "grad_norm": 0.13935556972523674, "learning_rate": 4.1821987326082396e-06, "loss": 0.696, "step": 2150 }, { "epoch": 1.0670224649373217, "grad_norm": 0.14083946307481723, "learning_rate": 4.181475607624059e-06, "loss": 0.7851, "step": 2151 }, { "epoch": 1.067518927640561, "grad_norm": 0.1421578052366813, "learning_rate": 4.1807522256532925e-06, "loss": 0.7259, "step": 2152 }, { "epoch": 1.0680153903438003, "grad_norm": 0.13548843386534212, "learning_rate": 4.1800285868064964e-06, "loss": 0.7304, "step": 2153 }, { "epoch": 1.0685118530470399, "grad_norm": 0.13702431747439558, "learning_rate": 4.179304691194269e-06, "loss": 0.758, "step": 2154 }, { "epoch": 1.0690083157502792, "grad_norm": 0.13318149795074, "learning_rate": 4.178580538927245e-06, "loss": 0.6844, "step": 2155 }, { "epoch": 1.0695047784535188, "grad_norm": 0.1407081020046659, "learning_rate": 4.177856130116099e-06, "loss": 0.7639, "step": 2156 }, { "epoch": 1.070001241156758, "grad_norm": 0.1438833118655734, "learning_rate": 4.177131464871545e-06, "loss": 0.7006, "step": 2157 }, { "epoch": 1.0704977038599974, "grad_norm": 0.1461487574061599, "learning_rate": 4.176406543304339e-06, "loss": 0.738, "step": 2158 }, { "epoch": 1.070994166563237, "grad_norm": 0.1398230912718051, "learning_rate": 4.175681365525271e-06, "loss": 0.6985, "step": 2159 }, { "epoch": 1.0714906292664763, "grad_norm": 0.13817741460521907, "learning_rate": 4.174955931645175e-06, "loss": 0.7285, "step": 2160 }, { "epoch": 1.0719870919697159, "grad_norm": 0.14224093463734003, "learning_rate": 4.1742302417749205e-06, "loss": 0.7586, "step": 2161 }, { "epoch": 1.0724835546729552, "grad_norm": 0.13835662156431366, "learning_rate": 4.173504296025417e-06, "loss": 0.7539, "step": 2162 }, { "epoch": 1.0729800173761945, "grad_norm": 0.13861535686149404, "learning_rate": 4.1727780945076155e-06, "loss": 0.654, "step": 2163 }, { "epoch": 1.073476480079434, "grad_norm": 0.13428993721206128, "learning_rate": 4.172051637332501e-06, "loss": 0.7285, "step": 2164 }, { "epoch": 1.0739729427826734, "grad_norm": 0.13538857395884993, "learning_rate": 4.171324924611105e-06, "loss": 0.7218, "step": 2165 }, { "epoch": 1.074469405485913, "grad_norm": 0.1452550536947396, "learning_rate": 4.170597956454492e-06, "loss": 0.8168, "step": 2166 }, { "epoch": 1.0749658681891523, "grad_norm": 0.13624518253158813, "learning_rate": 4.169870732973767e-06, "loss": 0.7141, "step": 2167 }, { "epoch": 1.0754623308923916, "grad_norm": 0.14029339027722842, "learning_rate": 4.169143254280074e-06, "loss": 0.7285, "step": 2168 }, { "epoch": 1.0759587935956312, "grad_norm": 0.14109579501178382, "learning_rate": 4.168415520484598e-06, "loss": 0.7896, "step": 2169 }, { "epoch": 1.0764552562988705, "grad_norm": 0.144954703610336, "learning_rate": 4.167687531698561e-06, "loss": 0.7255, "step": 2170 }, { "epoch": 1.07695171900211, "grad_norm": 0.13685269108041861, "learning_rate": 4.166959288033223e-06, "loss": 0.7257, "step": 2171 }, { "epoch": 1.0774481817053494, "grad_norm": 0.14271875588892513, "learning_rate": 4.166230789599886e-06, "loss": 0.7159, "step": 2172 }, { "epoch": 1.0779446444085887, "grad_norm": 0.13783479927505968, "learning_rate": 4.16550203650989e-06, "loss": 0.7129, "step": 2173 }, { "epoch": 1.0784411071118283, "grad_norm": 0.14283091206105697, "learning_rate": 4.16477302887461e-06, "loss": 0.6965, "step": 2174 }, { "epoch": 1.0789375698150676, "grad_norm": 0.14010606156996824, "learning_rate": 4.1640437668054665e-06, "loss": 0.7179, "step": 2175 }, { "epoch": 1.0794340325183072, "grad_norm": 0.14730576733153716, "learning_rate": 4.163314250413913e-06, "loss": 0.7082, "step": 2176 }, { "epoch": 1.0799304952215465, "grad_norm": 0.1335227779819202, "learning_rate": 4.1625844798114464e-06, "loss": 0.7183, "step": 2177 }, { "epoch": 1.0804269579247858, "grad_norm": 0.13601772614825755, "learning_rate": 4.1618544551096005e-06, "loss": 0.6847, "step": 2178 }, { "epoch": 1.0809234206280254, "grad_norm": 0.13698194445852674, "learning_rate": 4.1611241764199465e-06, "loss": 0.7435, "step": 2179 }, { "epoch": 1.0814198833312647, "grad_norm": 0.14062893743840316, "learning_rate": 4.160393643854095e-06, "loss": 0.7363, "step": 2180 }, { "epoch": 1.0819163460345043, "grad_norm": 0.13799273854449803, "learning_rate": 4.159662857523697e-06, "loss": 0.7243, "step": 2181 }, { "epoch": 1.0824128087377436, "grad_norm": 0.13716660868793465, "learning_rate": 4.158931817540443e-06, "loss": 0.7577, "step": 2182 }, { "epoch": 1.082909271440983, "grad_norm": 0.1466433823136944, "learning_rate": 4.158200524016061e-06, "loss": 0.7054, "step": 2183 }, { "epoch": 1.0834057341442225, "grad_norm": 0.1382394309059659, "learning_rate": 4.1574689770623145e-06, "loss": 0.7566, "step": 2184 }, { "epoch": 1.0839021968474618, "grad_norm": 0.13654874611387244, "learning_rate": 4.15673717679101e-06, "loss": 0.7251, "step": 2185 }, { "epoch": 1.0843986595507014, "grad_norm": 0.1367869405798336, "learning_rate": 4.156005123313993e-06, "loss": 0.719, "step": 2186 }, { "epoch": 1.0848951222539407, "grad_norm": 0.1317568061970169, "learning_rate": 4.155272816743145e-06, "loss": 0.6876, "step": 2187 }, { "epoch": 1.08539158495718, "grad_norm": 0.13762978347128463, "learning_rate": 4.1545402571903855e-06, "loss": 0.7022, "step": 2188 }, { "epoch": 1.0858880476604196, "grad_norm": 0.1362143217786678, "learning_rate": 4.153807444767677e-06, "loss": 0.7125, "step": 2189 }, { "epoch": 1.086384510363659, "grad_norm": 0.1516046995027248, "learning_rate": 4.153074379587018e-06, "loss": 0.809, "step": 2190 }, { "epoch": 1.0868809730668985, "grad_norm": 0.13562138385068911, "learning_rate": 4.152341061760445e-06, "loss": 0.7172, "step": 2191 }, { "epoch": 1.0873774357701378, "grad_norm": 0.14008342460158815, "learning_rate": 4.151607491400034e-06, "loss": 0.7781, "step": 2192 }, { "epoch": 1.0878738984733771, "grad_norm": 0.13999792205054443, "learning_rate": 4.150873668617899e-06, "loss": 0.6984, "step": 2193 }, { "epoch": 1.0883703611766167, "grad_norm": 0.12918569307833325, "learning_rate": 4.150139593526193e-06, "loss": 0.707, "step": 2194 }, { "epoch": 1.088866823879856, "grad_norm": 0.1373176298653365, "learning_rate": 4.149405266237109e-06, "loss": 0.7073, "step": 2195 }, { "epoch": 1.0893632865830956, "grad_norm": 0.12770538449262095, "learning_rate": 4.148670686862877e-06, "loss": 0.6754, "step": 2196 }, { "epoch": 1.089859749286335, "grad_norm": 0.14027683877629007, "learning_rate": 4.147935855515763e-06, "loss": 0.7618, "step": 2197 }, { "epoch": 1.0903562119895742, "grad_norm": 0.13474412755523313, "learning_rate": 4.1472007723080774e-06, "loss": 0.7486, "step": 2198 }, { "epoch": 1.0908526746928138, "grad_norm": 0.13436431752038755, "learning_rate": 4.146465437352164e-06, "loss": 0.7237, "step": 2199 }, { "epoch": 1.091349137396053, "grad_norm": 0.13682091282171036, "learning_rate": 4.145729850760408e-06, "loss": 0.7377, "step": 2200 }, { "epoch": 1.0918456000992927, "grad_norm": 0.13924706240826093, "learning_rate": 4.144994012645232e-06, "loss": 0.7402, "step": 2201 }, { "epoch": 1.092342062802532, "grad_norm": 0.14103770773255148, "learning_rate": 4.1442579231190964e-06, "loss": 0.69, "step": 2202 }, { "epoch": 1.0928385255057713, "grad_norm": 0.1394654587729206, "learning_rate": 4.143521582294501e-06, "loss": 0.7393, "step": 2203 }, { "epoch": 1.0933349882090109, "grad_norm": 0.13644960570918493, "learning_rate": 4.1427849902839826e-06, "loss": 0.7141, "step": 2204 }, { "epoch": 1.0938314509122502, "grad_norm": 0.13692258907444782, "learning_rate": 4.142048147200119e-06, "loss": 0.7393, "step": 2205 }, { "epoch": 1.0943279136154895, "grad_norm": 0.1373142702334565, "learning_rate": 4.141311053155524e-06, "loss": 0.755, "step": 2206 }, { "epoch": 1.094824376318729, "grad_norm": 0.13763207554959367, "learning_rate": 4.140573708262852e-06, "loss": 0.7546, "step": 2207 }, { "epoch": 1.0953208390219684, "grad_norm": 0.14478103422831126, "learning_rate": 4.139836112634792e-06, "loss": 0.7679, "step": 2208 }, { "epoch": 1.095817301725208, "grad_norm": 0.1390259770870048, "learning_rate": 4.139098266384076e-06, "loss": 0.743, "step": 2209 }, { "epoch": 1.0963137644284473, "grad_norm": 0.14390923525858076, "learning_rate": 4.138360169623471e-06, "loss": 0.7453, "step": 2210 }, { "epoch": 1.0968102271316866, "grad_norm": 0.14517278952992427, "learning_rate": 4.137621822465782e-06, "loss": 0.7426, "step": 2211 }, { "epoch": 1.0973066898349262, "grad_norm": 0.15525845893147422, "learning_rate": 4.1368832250238564e-06, "loss": 0.7313, "step": 2212 }, { "epoch": 1.0978031525381655, "grad_norm": 0.14066801820933797, "learning_rate": 4.136144377410574e-06, "loss": 0.7877, "step": 2213 }, { "epoch": 1.098299615241405, "grad_norm": 0.1424752647162409, "learning_rate": 4.135405279738858e-06, "loss": 0.7418, "step": 2214 }, { "epoch": 1.0987960779446444, "grad_norm": 0.15190627011179594, "learning_rate": 4.134665932121665e-06, "loss": 0.7654, "step": 2215 }, { "epoch": 1.0992925406478837, "grad_norm": 0.1412407950996624, "learning_rate": 4.133926334671996e-06, "loss": 0.6707, "step": 2216 }, { "epoch": 1.0997890033511233, "grad_norm": 0.14456861330667964, "learning_rate": 4.133186487502884e-06, "loss": 0.7841, "step": 2217 }, { "epoch": 1.1002854660543626, "grad_norm": 0.15407876229352044, "learning_rate": 4.1324463907274035e-06, "loss": 0.7367, "step": 2218 }, { "epoch": 1.1007819287576022, "grad_norm": 0.14216692584554558, "learning_rate": 4.131706044458667e-06, "loss": 0.7149, "step": 2219 }, { "epoch": 1.1012783914608415, "grad_norm": 0.1324859332684942, "learning_rate": 4.130965448809824e-06, "loss": 0.6563, "step": 2220 }, { "epoch": 1.1017748541640808, "grad_norm": 0.13983336523306109, "learning_rate": 4.130224603894062e-06, "loss": 0.7078, "step": 2221 }, { "epoch": 1.1022713168673204, "grad_norm": 0.14405406563289364, "learning_rate": 4.129483509824608e-06, "loss": 0.7529, "step": 2222 }, { "epoch": 1.1027677795705597, "grad_norm": 0.14158523055382277, "learning_rate": 4.128742166714726e-06, "loss": 0.7113, "step": 2223 }, { "epoch": 1.1032642422737993, "grad_norm": 0.13671925424224868, "learning_rate": 4.128000574677719e-06, "loss": 0.746, "step": 2224 }, { "epoch": 1.1037607049770386, "grad_norm": 0.13505116983586582, "learning_rate": 4.127258733826929e-06, "loss": 0.7181, "step": 2225 }, { "epoch": 1.104257167680278, "grad_norm": 0.1374278474736183, "learning_rate": 4.126516644275731e-06, "loss": 0.7529, "step": 2226 }, { "epoch": 1.1047536303835175, "grad_norm": 0.14165368952037294, "learning_rate": 4.125774306137543e-06, "loss": 0.7476, "step": 2227 }, { "epoch": 1.1052500930867568, "grad_norm": 0.15143713359026406, "learning_rate": 4.12503171952582e-06, "loss": 0.7326, "step": 2228 }, { "epoch": 1.1057465557899964, "grad_norm": 0.137643913086052, "learning_rate": 4.124288884554053e-06, "loss": 0.7351, "step": 2229 }, { "epoch": 1.1062430184932357, "grad_norm": 0.13550077923391268, "learning_rate": 4.123545801335776e-06, "loss": 0.7181, "step": 2230 }, { "epoch": 1.106739481196475, "grad_norm": 0.13907847760418357, "learning_rate": 4.122802469984552e-06, "loss": 0.7113, "step": 2231 }, { "epoch": 1.1072359438997146, "grad_norm": 0.1409638418684744, "learning_rate": 4.122058890613991e-06, "loss": 0.7532, "step": 2232 }, { "epoch": 1.107732406602954, "grad_norm": 0.13839521717196338, "learning_rate": 4.121315063337737e-06, "loss": 0.7593, "step": 2233 }, { "epoch": 1.1082288693061935, "grad_norm": 0.14464848496201313, "learning_rate": 4.120570988269472e-06, "loss": 0.798, "step": 2234 }, { "epoch": 1.1087253320094328, "grad_norm": 0.14170615709306067, "learning_rate": 4.119826665522914e-06, "loss": 0.7214, "step": 2235 }, { "epoch": 1.1092217947126721, "grad_norm": 0.13750992725040465, "learning_rate": 4.119082095211823e-06, "loss": 0.7025, "step": 2236 }, { "epoch": 1.1097182574159117, "grad_norm": 0.13524887733070357, "learning_rate": 4.118337277449993e-06, "loss": 0.7299, "step": 2237 }, { "epoch": 1.110214720119151, "grad_norm": 0.13742212811395596, "learning_rate": 4.117592212351258e-06, "loss": 0.7152, "step": 2238 }, { "epoch": 1.1107111828223906, "grad_norm": 0.13549859577744783, "learning_rate": 4.11684690002949e-06, "loss": 0.7939, "step": 2239 }, { "epoch": 1.11120764552563, "grad_norm": 0.13072436304815396, "learning_rate": 4.116101340598597e-06, "loss": 0.6907, "step": 2240 }, { "epoch": 1.1117041082288692, "grad_norm": 0.13708530876777447, "learning_rate": 4.115355534172527e-06, "loss": 0.7668, "step": 2241 }, { "epoch": 1.1122005709321088, "grad_norm": 0.1331260842138928, "learning_rate": 4.114609480865264e-06, "loss": 0.7315, "step": 2242 }, { "epoch": 1.112697033635348, "grad_norm": 0.1345957058229882, "learning_rate": 4.113863180790829e-06, "loss": 0.7249, "step": 2243 }, { "epoch": 1.1131934963385877, "grad_norm": 0.13896142397489988, "learning_rate": 4.113116634063285e-06, "loss": 0.7174, "step": 2244 }, { "epoch": 1.113689959041827, "grad_norm": 0.13616762035864557, "learning_rate": 4.1123698407967265e-06, "loss": 0.7206, "step": 2245 }, { "epoch": 1.1141864217450663, "grad_norm": 0.1428487363329108, "learning_rate": 4.11162280110529e-06, "loss": 0.7447, "step": 2246 }, { "epoch": 1.1146828844483059, "grad_norm": 0.1381247261812151, "learning_rate": 4.110875515103148e-06, "loss": 0.7262, "step": 2247 }, { "epoch": 1.1151793471515452, "grad_norm": 0.13985941699766852, "learning_rate": 4.110127982904513e-06, "loss": 0.737, "step": 2248 }, { "epoch": 1.1156758098547848, "grad_norm": 0.1371407690195463, "learning_rate": 4.109380204623634e-06, "loss": 0.7367, "step": 2249 }, { "epoch": 1.116172272558024, "grad_norm": 0.1410923264807246, "learning_rate": 4.108632180374794e-06, "loss": 0.685, "step": 2250 }, { "epoch": 1.1166687352612634, "grad_norm": 0.13607561024834763, "learning_rate": 4.107883910272316e-06, "loss": 0.7416, "step": 2251 }, { "epoch": 1.117165197964503, "grad_norm": 0.14401553966823152, "learning_rate": 4.107135394430565e-06, "loss": 0.7741, "step": 2252 }, { "epoch": 1.1176616606677423, "grad_norm": 0.1347979188420261, "learning_rate": 4.106386632963935e-06, "loss": 0.7289, "step": 2253 }, { "epoch": 1.1181581233709816, "grad_norm": 0.13563125984475435, "learning_rate": 4.105637625986867e-06, "loss": 0.7241, "step": 2254 }, { "epoch": 1.1186545860742212, "grad_norm": 0.13554783899608014, "learning_rate": 4.104888373613832e-06, "loss": 0.7051, "step": 2255 }, { "epoch": 1.1191510487774605, "grad_norm": 0.13902713235820974, "learning_rate": 4.10413887595934e-06, "loss": 0.7442, "step": 2256 }, { "epoch": 1.1196475114807, "grad_norm": 0.1416398097741949, "learning_rate": 4.1033891331379425e-06, "loss": 0.7234, "step": 2257 }, { "epoch": 1.1201439741839394, "grad_norm": 0.13151231872027344, "learning_rate": 4.102639145264223e-06, "loss": 0.6901, "step": 2258 }, { "epoch": 1.1206404368871787, "grad_norm": 0.1310560747375517, "learning_rate": 4.101888912452809e-06, "loss": 0.7034, "step": 2259 }, { "epoch": 1.1211368995904183, "grad_norm": 0.13368674747757894, "learning_rate": 4.1011384348183565e-06, "loss": 0.7369, "step": 2260 }, { "epoch": 1.1216333622936576, "grad_norm": 0.14030562550210587, "learning_rate": 4.100387712475568e-06, "loss": 0.7473, "step": 2261 }, { "epoch": 1.1221298249968972, "grad_norm": 0.13783403570651975, "learning_rate": 4.0996367455391774e-06, "loss": 0.6827, "step": 2262 }, { "epoch": 1.1226262877001365, "grad_norm": 0.13794119866048046, "learning_rate": 4.098885534123958e-06, "loss": 0.7144, "step": 2263 }, { "epoch": 1.1231227504033758, "grad_norm": 0.1403803932613888, "learning_rate": 4.098134078344722e-06, "loss": 0.7752, "step": 2264 }, { "epoch": 1.1236192131066154, "grad_norm": 0.1393102973681631, "learning_rate": 4.097382378316316e-06, "loss": 0.7241, "step": 2265 }, { "epoch": 1.1241156758098547, "grad_norm": 0.13730972644736142, "learning_rate": 4.0966304341536255e-06, "loss": 0.7399, "step": 2266 }, { "epoch": 1.1246121385130943, "grad_norm": 0.14795637490908276, "learning_rate": 4.095878245971573e-06, "loss": 0.7947, "step": 2267 }, { "epoch": 1.1251086012163336, "grad_norm": 0.13800836804454775, "learning_rate": 4.095125813885118e-06, "loss": 0.7454, "step": 2268 }, { "epoch": 1.125605063919573, "grad_norm": 0.13510837243563636, "learning_rate": 4.094373138009259e-06, "loss": 0.7126, "step": 2269 }, { "epoch": 1.1261015266228125, "grad_norm": 0.14183611387295958, "learning_rate": 4.093620218459029e-06, "loss": 0.7305, "step": 2270 }, { "epoch": 1.1265979893260518, "grad_norm": 0.14547694629965133, "learning_rate": 4.092867055349501e-06, "loss": 0.7342, "step": 2271 }, { "epoch": 1.1270944520292914, "grad_norm": 0.13272000346114335, "learning_rate": 4.092113648795784e-06, "loss": 0.7244, "step": 2272 }, { "epoch": 1.1275909147325307, "grad_norm": 0.13445748304111405, "learning_rate": 4.091359998913021e-06, "loss": 0.7135, "step": 2273 }, { "epoch": 1.12808737743577, "grad_norm": 0.13823114531273367, "learning_rate": 4.0906061058164e-06, "loss": 0.7296, "step": 2274 }, { "epoch": 1.1285838401390096, "grad_norm": 0.1331655131810508, "learning_rate": 4.089851969621138e-06, "loss": 0.7039, "step": 2275 }, { "epoch": 1.129080302842249, "grad_norm": 0.13502020601948467, "learning_rate": 4.0890975904424946e-06, "loss": 0.7227, "step": 2276 }, { "epoch": 1.1295767655454885, "grad_norm": 0.13554787632722626, "learning_rate": 4.088342968395763e-06, "loss": 0.7166, "step": 2277 }, { "epoch": 1.1300732282487278, "grad_norm": 0.15351257577564006, "learning_rate": 4.087588103596276e-06, "loss": 0.7515, "step": 2278 }, { "epoch": 1.1305696909519671, "grad_norm": 0.13285248996749063, "learning_rate": 4.0868329961594025e-06, "loss": 0.7398, "step": 2279 }, { "epoch": 1.1310661536552067, "grad_norm": 0.14474300166445284, "learning_rate": 4.086077646200548e-06, "loss": 0.765, "step": 2280 }, { "epoch": 1.131562616358446, "grad_norm": 0.136914338868158, "learning_rate": 4.085322053835157e-06, "loss": 0.7387, "step": 2281 }, { "epoch": 1.1320590790616856, "grad_norm": 0.13701247994557483, "learning_rate": 4.084566219178708e-06, "loss": 0.7484, "step": 2282 }, { "epoch": 1.132555541764925, "grad_norm": 0.14213309578391173, "learning_rate": 4.08381014234672e-06, "loss": 0.734, "step": 2283 }, { "epoch": 1.1330520044681642, "grad_norm": 0.1413403485403749, "learning_rate": 4.0830538234547455e-06, "loss": 0.739, "step": 2284 }, { "epoch": 1.1335484671714038, "grad_norm": 0.14178436767190955, "learning_rate": 4.082297262618376e-06, "loss": 0.7338, "step": 2285 }, { "epoch": 1.1340449298746431, "grad_norm": 0.1384665628085734, "learning_rate": 4.081540459953241e-06, "loss": 0.7293, "step": 2286 }, { "epoch": 1.1345413925778827, "grad_norm": 0.14117953096422015, "learning_rate": 4.080783415575004e-06, "loss": 0.7241, "step": 2287 }, { "epoch": 1.135037855281122, "grad_norm": 0.13841249272824596, "learning_rate": 4.080026129599368e-06, "loss": 0.7129, "step": 2288 }, { "epoch": 1.1355343179843613, "grad_norm": 0.1389416393419498, "learning_rate": 4.079268602142072e-06, "loss": 0.7196, "step": 2289 }, { "epoch": 1.1360307806876009, "grad_norm": 0.13525972832589087, "learning_rate": 4.078510833318892e-06, "loss": 0.6799, "step": 2290 }, { "epoch": 1.1365272433908402, "grad_norm": 0.14116131185065883, "learning_rate": 4.077752823245641e-06, "loss": 0.7207, "step": 2291 }, { "epoch": 1.1370237060940798, "grad_norm": 0.13868074387182236, "learning_rate": 4.076994572038168e-06, "loss": 0.7414, "step": 2292 }, { "epoch": 1.137520168797319, "grad_norm": 0.13837495741874617, "learning_rate": 4.076236079812359e-06, "loss": 0.7518, "step": 2293 }, { "epoch": 1.1380166315005584, "grad_norm": 0.13909330761790814, "learning_rate": 4.075477346684139e-06, "loss": 0.7353, "step": 2294 }, { "epoch": 1.138513094203798, "grad_norm": 0.15340107106559084, "learning_rate": 4.074718372769467e-06, "loss": 0.7625, "step": 2295 }, { "epoch": 1.1390095569070373, "grad_norm": 0.14086941509084788, "learning_rate": 4.07395915818434e-06, "loss": 0.7586, "step": 2296 }, { "epoch": 1.1395060196102769, "grad_norm": 0.13446534552526093, "learning_rate": 4.073199703044793e-06, "loss": 0.7621, "step": 2297 }, { "epoch": 1.1400024823135162, "grad_norm": 0.1373097144881244, "learning_rate": 4.072440007466896e-06, "loss": 0.6873, "step": 2298 }, { "epoch": 1.1404989450167555, "grad_norm": 0.13928210663269355, "learning_rate": 4.071680071566756e-06, "loss": 0.7514, "step": 2299 }, { "epoch": 1.140995407719995, "grad_norm": 0.1421392811034866, "learning_rate": 4.070919895460517e-06, "loss": 0.7295, "step": 2300 }, { "epoch": 1.1414918704232344, "grad_norm": 0.13241332535161587, "learning_rate": 4.070159479264359e-06, "loss": 0.7118, "step": 2301 }, { "epoch": 1.141988333126474, "grad_norm": 0.1302518945245061, "learning_rate": 4.0693988230945e-06, "loss": 0.7399, "step": 2302 }, { "epoch": 1.1424847958297133, "grad_norm": 0.14324912590708974, "learning_rate": 4.0686379270671955e-06, "loss": 0.768, "step": 2303 }, { "epoch": 1.1429812585329526, "grad_norm": 0.14029305482378046, "learning_rate": 4.067876791298734e-06, "loss": 0.7787, "step": 2304 }, { "epoch": 1.1434777212361922, "grad_norm": 0.15155995127547886, "learning_rate": 4.067115415905445e-06, "loss": 0.7777, "step": 2305 }, { "epoch": 1.1439741839394315, "grad_norm": 0.13840654029220392, "learning_rate": 4.066353801003691e-06, "loss": 0.7449, "step": 2306 }, { "epoch": 1.144470646642671, "grad_norm": 0.1379028297317787, "learning_rate": 4.065591946709873e-06, "loss": 0.7468, "step": 2307 }, { "epoch": 1.1449671093459104, "grad_norm": 0.13315214722190571, "learning_rate": 4.064829853140428e-06, "loss": 0.7142, "step": 2308 }, { "epoch": 1.1454635720491497, "grad_norm": 0.13949231615359325, "learning_rate": 4.0640675204118305e-06, "loss": 0.6793, "step": 2309 }, { "epoch": 1.1459600347523893, "grad_norm": 0.14382388332090082, "learning_rate": 4.06330494864059e-06, "loss": 0.7437, "step": 2310 }, { "epoch": 1.1464564974556286, "grad_norm": 0.14590984361270626, "learning_rate": 4.062542137943254e-06, "loss": 0.8103, "step": 2311 }, { "epoch": 1.1469529601588682, "grad_norm": 0.1609583000360408, "learning_rate": 4.061779088436406e-06, "loss": 0.7311, "step": 2312 }, { "epoch": 1.1474494228621075, "grad_norm": 0.13805487885883164, "learning_rate": 4.061015800236665e-06, "loss": 0.7186, "step": 2313 }, { "epoch": 1.1479458855653468, "grad_norm": 0.13893797112646772, "learning_rate": 4.060252273460688e-06, "loss": 0.7381, "step": 2314 }, { "epoch": 1.1484423482685864, "grad_norm": 0.13923921175817758, "learning_rate": 4.0594885082251685e-06, "loss": 0.7233, "step": 2315 }, { "epoch": 1.1489388109718257, "grad_norm": 0.13902288626720571, "learning_rate": 4.058724504646834e-06, "loss": 0.7134, "step": 2316 }, { "epoch": 1.1494352736750653, "grad_norm": 0.13827739981540818, "learning_rate": 4.057960262842452e-06, "loss": 0.7263, "step": 2317 }, { "epoch": 1.1499317363783046, "grad_norm": 0.14118983269415886, "learning_rate": 4.057195782928823e-06, "loss": 0.7381, "step": 2318 }, { "epoch": 1.150428199081544, "grad_norm": 0.1322019940248954, "learning_rate": 4.056431065022787e-06, "loss": 0.7327, "step": 2319 }, { "epoch": 1.1509246617847835, "grad_norm": 0.1313464348071736, "learning_rate": 4.055666109241218e-06, "loss": 0.6943, "step": 2320 }, { "epoch": 1.1514211244880228, "grad_norm": 0.13797475553246952, "learning_rate": 4.054900915701028e-06, "loss": 0.7245, "step": 2321 }, { "epoch": 1.1519175871912624, "grad_norm": 0.15720508745478762, "learning_rate": 4.054135484519163e-06, "loss": 0.6975, "step": 2322 }, { "epoch": 1.1524140498945017, "grad_norm": 0.14150123947563276, "learning_rate": 4.053369815812608e-06, "loss": 0.7432, "step": 2323 }, { "epoch": 1.152910512597741, "grad_norm": 0.13546580390502527, "learning_rate": 4.052603909698383e-06, "loss": 0.6945, "step": 2324 }, { "epoch": 1.1534069753009806, "grad_norm": 0.13797133959426647, "learning_rate": 4.051837766293545e-06, "loss": 0.7659, "step": 2325 }, { "epoch": 1.15390343800422, "grad_norm": 0.13678662933843044, "learning_rate": 4.051071385715186e-06, "loss": 0.7662, "step": 2326 }, { "epoch": 1.1543999007074595, "grad_norm": 0.1456527753758712, "learning_rate": 4.0503047680804354e-06, "loss": 0.7451, "step": 2327 }, { "epoch": 1.1548963634106988, "grad_norm": 0.13498395892098344, "learning_rate": 4.0495379135064585e-06, "loss": 0.7201, "step": 2328 }, { "epoch": 1.1553928261139381, "grad_norm": 0.13613972964983925, "learning_rate": 4.048770822110456e-06, "loss": 0.692, "step": 2329 }, { "epoch": 1.1558892888171777, "grad_norm": 0.13981046419867807, "learning_rate": 4.048003494009666e-06, "loss": 0.7372, "step": 2330 }, { "epoch": 1.156385751520417, "grad_norm": 0.13473584929200108, "learning_rate": 4.047235929321363e-06, "loss": 0.7036, "step": 2331 }, { "epoch": 1.1568822142236566, "grad_norm": 0.13805910028899931, "learning_rate": 4.0464681281628545e-06, "loss": 0.6842, "step": 2332 }, { "epoch": 1.1573786769268959, "grad_norm": 0.139073362785338, "learning_rate": 4.04570009065149e-06, "loss": 0.7541, "step": 2333 }, { "epoch": 1.1578751396301352, "grad_norm": 0.13908629112696577, "learning_rate": 4.044931816904649e-06, "loss": 0.6851, "step": 2334 }, { "epoch": 1.1583716023333748, "grad_norm": 0.13330790586530183, "learning_rate": 4.044163307039751e-06, "loss": 0.7023, "step": 2335 }, { "epoch": 1.158868065036614, "grad_norm": 0.1516948361883498, "learning_rate": 4.043394561174252e-06, "loss": 0.7247, "step": 2336 }, { "epoch": 1.1593645277398537, "grad_norm": 0.13561857587166704, "learning_rate": 4.042625579425639e-06, "loss": 0.6929, "step": 2337 }, { "epoch": 1.159860990443093, "grad_norm": 0.1381505731871968, "learning_rate": 4.04185636191144e-06, "loss": 0.7877, "step": 2338 }, { "epoch": 1.1603574531463323, "grad_norm": 0.14465367702750925, "learning_rate": 4.04108690874922e-06, "loss": 0.7335, "step": 2339 }, { "epoch": 1.1608539158495719, "grad_norm": 0.13856535592329788, "learning_rate": 4.040317220056574e-06, "loss": 0.7261, "step": 2340 }, { "epoch": 1.1613503785528112, "grad_norm": 0.1359117640924008, "learning_rate": 4.039547295951138e-06, "loss": 0.7154, "step": 2341 }, { "epoch": 1.1618468412560508, "grad_norm": 0.14408892510554583, "learning_rate": 4.038777136550583e-06, "loss": 0.7449, "step": 2342 }, { "epoch": 1.16234330395929, "grad_norm": 0.1392596970150696, "learning_rate": 4.038006741972614e-06, "loss": 0.7677, "step": 2343 }, { "epoch": 1.1628397666625294, "grad_norm": 0.13340381888036884, "learning_rate": 4.037236112334976e-06, "loss": 0.6938, "step": 2344 }, { "epoch": 1.163336229365769, "grad_norm": 0.138524100141458, "learning_rate": 4.036465247755444e-06, "loss": 0.678, "step": 2345 }, { "epoch": 1.1638326920690083, "grad_norm": 0.13602968738361657, "learning_rate": 4.035694148351836e-06, "loss": 0.7189, "step": 2346 }, { "epoch": 1.1643291547722479, "grad_norm": 0.13592578354190615, "learning_rate": 4.034922814242001e-06, "loss": 0.6993, "step": 2347 }, { "epoch": 1.1648256174754872, "grad_norm": 0.13795965217373635, "learning_rate": 4.034151245543823e-06, "loss": 0.7135, "step": 2348 }, { "epoch": 1.1653220801787265, "grad_norm": 0.13946922199269032, "learning_rate": 4.033379442375225e-06, "loss": 0.7757, "step": 2349 }, { "epoch": 1.165818542881966, "grad_norm": 0.14045917203188657, "learning_rate": 4.032607404854166e-06, "loss": 0.7284, "step": 2350 }, { "epoch": 1.1663150055852054, "grad_norm": 0.1440756518140952, "learning_rate": 4.031835133098639e-06, "loss": 0.7247, "step": 2351 }, { "epoch": 1.166811468288445, "grad_norm": 0.13724789791500103, "learning_rate": 4.031062627226671e-06, "loss": 0.6963, "step": 2352 }, { "epoch": 1.1673079309916843, "grad_norm": 0.13553830052897844, "learning_rate": 4.030289887356332e-06, "loss": 0.6995, "step": 2353 }, { "epoch": 1.1678043936949236, "grad_norm": 0.13522250185478063, "learning_rate": 4.029516913605719e-06, "loss": 0.7359, "step": 2354 }, { "epoch": 1.1683008563981632, "grad_norm": 0.1386403051237985, "learning_rate": 4.028743706092969e-06, "loss": 0.7783, "step": 2355 }, { "epoch": 1.1687973191014025, "grad_norm": 0.14135795928242792, "learning_rate": 4.027970264936256e-06, "loss": 0.7393, "step": 2356 }, { "epoch": 1.1692937818046418, "grad_norm": 0.15298427203204537, "learning_rate": 4.027196590253786e-06, "loss": 0.7416, "step": 2357 }, { "epoch": 1.1697902445078814, "grad_norm": 0.13356310495808743, "learning_rate": 4.026422682163804e-06, "loss": 0.7174, "step": 2358 }, { "epoch": 1.1702867072111207, "grad_norm": 0.14258979851062023, "learning_rate": 4.02564854078459e-06, "loss": 0.7025, "step": 2359 }, { "epoch": 1.1707831699143603, "grad_norm": 0.1394231832343289, "learning_rate": 4.024874166234459e-06, "loss": 0.7084, "step": 2360 }, { "epoch": 1.1712796326175996, "grad_norm": 0.1311008710936396, "learning_rate": 4.02409955863176e-06, "loss": 0.7364, "step": 2361 }, { "epoch": 1.171776095320839, "grad_norm": 0.13638386404046124, "learning_rate": 4.023324718094881e-06, "loss": 0.7396, "step": 2362 }, { "epoch": 1.1722725580240785, "grad_norm": 0.14537907916628248, "learning_rate": 4.022549644742244e-06, "loss": 0.6914, "step": 2363 }, { "epoch": 1.1727690207273178, "grad_norm": 0.14040298917938324, "learning_rate": 4.0217743386923055e-06, "loss": 0.7228, "step": 2364 }, { "epoch": 1.1732654834305574, "grad_norm": 0.14446607932799382, "learning_rate": 4.020998800063559e-06, "loss": 0.7441, "step": 2365 }, { "epoch": 1.1737619461337967, "grad_norm": 0.13665124704375234, "learning_rate": 4.020223028974534e-06, "loss": 0.7516, "step": 2366 }, { "epoch": 1.174258408837036, "grad_norm": 0.13399912735468633, "learning_rate": 4.019447025543793e-06, "loss": 0.7183, "step": 2367 }, { "epoch": 1.1747548715402756, "grad_norm": 0.1464865537624882, "learning_rate": 4.018670789889938e-06, "loss": 0.7937, "step": 2368 }, { "epoch": 1.175251334243515, "grad_norm": 0.1420811329454974, "learning_rate": 4.0178943221316014e-06, "loss": 0.728, "step": 2369 }, { "epoch": 1.1757477969467545, "grad_norm": 0.1432300356176437, "learning_rate": 4.0171176223874555e-06, "loss": 0.657, "step": 2370 }, { "epoch": 1.1762442596499938, "grad_norm": 0.1432930911399873, "learning_rate": 4.0163406907762074e-06, "loss": 0.76, "step": 2371 }, { "epoch": 1.1767407223532331, "grad_norm": 0.14260952105955038, "learning_rate": 4.015563527416596e-06, "loss": 0.75, "step": 2372 }, { "epoch": 1.1772371850564727, "grad_norm": 0.14070002945171584, "learning_rate": 4.0147861324273994e-06, "loss": 0.7111, "step": 2373 }, { "epoch": 1.177733647759712, "grad_norm": 0.13824362597359197, "learning_rate": 4.014008505927431e-06, "loss": 0.7146, "step": 2374 }, { "epoch": 1.1782301104629516, "grad_norm": 0.13615039910533921, "learning_rate": 4.0132306480355374e-06, "loss": 0.6898, "step": 2375 }, { "epoch": 1.178726573166191, "grad_norm": 0.13640593448421545, "learning_rate": 4.012452558870602e-06, "loss": 0.7966, "step": 2376 }, { "epoch": 1.1792230358694302, "grad_norm": 0.13629910190444364, "learning_rate": 4.011674238551544e-06, "loss": 0.751, "step": 2377 }, { "epoch": 1.1797194985726698, "grad_norm": 0.13855859156200617, "learning_rate": 4.010895687197316e-06, "loss": 0.7481, "step": 2378 }, { "epoch": 1.180215961275909, "grad_norm": 0.14195565031842594, "learning_rate": 4.010116904926908e-06, "loss": 0.7214, "step": 2379 }, { "epoch": 1.1807124239791487, "grad_norm": 0.14168331391237854, "learning_rate": 4.009337891859343e-06, "loss": 0.7386, "step": 2380 }, { "epoch": 1.181208886682388, "grad_norm": 0.13835896324890185, "learning_rate": 4.008558648113682e-06, "loss": 0.7219, "step": 2381 }, { "epoch": 1.1817053493856273, "grad_norm": 0.14190577391365433, "learning_rate": 4.0077791738090195e-06, "loss": 0.7628, "step": 2382 }, { "epoch": 1.1822018120888669, "grad_norm": 0.1341911874683844, "learning_rate": 4.006999469064487e-06, "loss": 0.6969, "step": 2383 }, { "epoch": 1.1826982747921062, "grad_norm": 0.13969845313519358, "learning_rate": 4.006219533999247e-06, "loss": 0.7244, "step": 2384 }, { "epoch": 1.1831947374953455, "grad_norm": 0.13979366267677537, "learning_rate": 4.005439368732502e-06, "loss": 0.7645, "step": 2385 }, { "epoch": 1.183691200198585, "grad_norm": 0.1335214336393622, "learning_rate": 4.0046589733834875e-06, "loss": 0.688, "step": 2386 }, { "epoch": 1.1841876629018244, "grad_norm": 0.14780650963457725, "learning_rate": 4.003878348071474e-06, "loss": 0.7289, "step": 2387 }, { "epoch": 1.184684125605064, "grad_norm": 0.1340780920943342, "learning_rate": 4.0030974929157685e-06, "loss": 0.6878, "step": 2388 }, { "epoch": 1.1851805883083033, "grad_norm": 0.1359899436643017, "learning_rate": 4.002316408035711e-06, "loss": 0.7322, "step": 2389 }, { "epoch": 1.1856770510115426, "grad_norm": 0.13282726421917596, "learning_rate": 4.001535093550678e-06, "loss": 0.7032, "step": 2390 }, { "epoch": 1.1861735137147822, "grad_norm": 0.13903709744169873, "learning_rate": 4.000753549580082e-06, "loss": 0.6988, "step": 2391 }, { "epoch": 1.1866699764180215, "grad_norm": 0.1375603444876677, "learning_rate": 3.999971776243369e-06, "loss": 0.7292, "step": 2392 }, { "epoch": 1.187166439121261, "grad_norm": 0.13837931077084456, "learning_rate": 3.999189773660019e-06, "loss": 0.7046, "step": 2393 }, { "epoch": 1.1876629018245004, "grad_norm": 0.13869342809783666, "learning_rate": 3.998407541949551e-06, "loss": 0.6902, "step": 2394 }, { "epoch": 1.1881593645277397, "grad_norm": 0.13979740548264746, "learning_rate": 3.997625081231514e-06, "loss": 0.7353, "step": 2395 }, { "epoch": 1.1886558272309793, "grad_norm": 0.13527705164090786, "learning_rate": 3.996842391625497e-06, "loss": 0.726, "step": 2396 }, { "epoch": 1.1891522899342186, "grad_norm": 0.14174483460790632, "learning_rate": 3.99605947325112e-06, "loss": 0.7005, "step": 2397 }, { "epoch": 1.1896487526374582, "grad_norm": 0.13548057476768477, "learning_rate": 3.99527632622804e-06, "loss": 0.7066, "step": 2398 }, { "epoch": 1.1901452153406975, "grad_norm": 0.13722530722455226, "learning_rate": 3.994492950675949e-06, "loss": 0.7293, "step": 2399 }, { "epoch": 1.1906416780439368, "grad_norm": 0.14238980858246564, "learning_rate": 3.9937093467145725e-06, "loss": 0.7513, "step": 2400 }, { "epoch": 1.1911381407471764, "grad_norm": 0.14077263746379465, "learning_rate": 3.992925514463672e-06, "loss": 0.6797, "step": 2401 }, { "epoch": 1.1916346034504157, "grad_norm": 0.13906663141907152, "learning_rate": 3.992141454043045e-06, "loss": 0.705, "step": 2402 }, { "epoch": 1.1921310661536553, "grad_norm": 0.14643513675700848, "learning_rate": 3.99135716557252e-06, "loss": 0.7516, "step": 2403 }, { "epoch": 1.1926275288568946, "grad_norm": 0.1356991566400479, "learning_rate": 3.990572649171964e-06, "loss": 0.7296, "step": 2404 }, { "epoch": 1.193123991560134, "grad_norm": 0.13520242104672095, "learning_rate": 3.9897879049612795e-06, "loss": 0.7248, "step": 2405 }, { "epoch": 1.1936204542633735, "grad_norm": 0.14080671561882466, "learning_rate": 3.9890029330604e-06, "loss": 0.7011, "step": 2406 }, { "epoch": 1.1941169169666128, "grad_norm": 0.14134376264347104, "learning_rate": 3.988217733589296e-06, "loss": 0.7477, "step": 2407 }, { "epoch": 1.1946133796698524, "grad_norm": 0.13870900196918226, "learning_rate": 3.987432306667975e-06, "loss": 0.7606, "step": 2408 }, { "epoch": 1.1951098423730917, "grad_norm": 0.1498135326898326, "learning_rate": 3.986646652416473e-06, "loss": 0.7166, "step": 2409 }, { "epoch": 1.195606305076331, "grad_norm": 0.13666015376761742, "learning_rate": 3.985860770954867e-06, "loss": 0.6994, "step": 2410 }, { "epoch": 1.1961027677795706, "grad_norm": 0.14270391448274597, "learning_rate": 3.985074662403267e-06, "loss": 0.7084, "step": 2411 }, { "epoch": 1.19659923048281, "grad_norm": 0.13549585466845787, "learning_rate": 3.984288326881817e-06, "loss": 0.7065, "step": 2412 }, { "epoch": 1.1970956931860495, "grad_norm": 0.13648442428428606, "learning_rate": 3.983501764510693e-06, "loss": 0.7372, "step": 2413 }, { "epoch": 1.1975921558892888, "grad_norm": 0.14086948426447465, "learning_rate": 3.982714975410111e-06, "loss": 0.7205, "step": 2414 }, { "epoch": 1.1980886185925281, "grad_norm": 0.1325423947366576, "learning_rate": 3.9819279597003195e-06, "loss": 0.6983, "step": 2415 }, { "epoch": 1.1985850812957677, "grad_norm": 0.13634160260964182, "learning_rate": 3.9811407175015995e-06, "loss": 0.6579, "step": 2416 }, { "epoch": 1.199081543999007, "grad_norm": 0.13874737933942757, "learning_rate": 3.980353248934269e-06, "loss": 0.7378, "step": 2417 }, { "epoch": 1.1995780067022466, "grad_norm": 0.13788946383443576, "learning_rate": 3.9795655541186805e-06, "loss": 0.7417, "step": 2418 }, { "epoch": 1.200074469405486, "grad_norm": 0.13647093705243393, "learning_rate": 3.97877763317522e-06, "loss": 0.7199, "step": 2419 }, { "epoch": 1.2005709321087252, "grad_norm": 0.14052612906793216, "learning_rate": 3.977989486224309e-06, "loss": 0.7305, "step": 2420 }, { "epoch": 1.2010673948119648, "grad_norm": 0.14559651220194383, "learning_rate": 3.977201113386402e-06, "loss": 0.7112, "step": 2421 }, { "epoch": 1.2015638575152041, "grad_norm": 0.14825330957397817, "learning_rate": 3.97641251478199e-06, "loss": 0.798, "step": 2422 }, { "epoch": 1.2020603202184437, "grad_norm": 0.1411793849816043, "learning_rate": 3.975623690531598e-06, "loss": 0.775, "step": 2423 }, { "epoch": 1.202556782921683, "grad_norm": 0.13329544412504718, "learning_rate": 3.9748346407557845e-06, "loss": 0.7021, "step": 2424 }, { "epoch": 1.2030532456249223, "grad_norm": 0.1322912568134602, "learning_rate": 3.9740453655751435e-06, "loss": 0.7112, "step": 2425 }, { "epoch": 1.2035497083281619, "grad_norm": 0.1387834596545233, "learning_rate": 3.973255865110302e-06, "loss": 0.698, "step": 2426 }, { "epoch": 1.2040461710314012, "grad_norm": 0.14956785941966386, "learning_rate": 3.972466139481925e-06, "loss": 0.7206, "step": 2427 }, { "epoch": 1.2045426337346408, "grad_norm": 0.1322262204706779, "learning_rate": 3.971676188810707e-06, "loss": 0.7204, "step": 2428 }, { "epoch": 1.20503909643788, "grad_norm": 0.12896570477009364, "learning_rate": 3.9708860132173795e-06, "loss": 0.695, "step": 2429 }, { "epoch": 1.2055355591411194, "grad_norm": 0.13536258893402728, "learning_rate": 3.97009561282271e-06, "loss": 0.7283, "step": 2430 }, { "epoch": 1.206032021844359, "grad_norm": 0.13492371176463214, "learning_rate": 3.969304987747496e-06, "loss": 0.7073, "step": 2431 }, { "epoch": 1.2065284845475983, "grad_norm": 0.13134753297863244, "learning_rate": 3.9685141381125745e-06, "loss": 0.7089, "step": 2432 }, { "epoch": 1.2070249472508379, "grad_norm": 0.13554849626488005, "learning_rate": 3.967723064038812e-06, "loss": 0.735, "step": 2433 }, { "epoch": 1.2075214099540772, "grad_norm": 0.14349704386244447, "learning_rate": 3.966931765647112e-06, "loss": 0.7232, "step": 2434 }, { "epoch": 1.2080178726573165, "grad_norm": 0.1385163792731118, "learning_rate": 3.966140243058413e-06, "loss": 0.7218, "step": 2435 }, { "epoch": 1.208514335360556, "grad_norm": 0.14279978031188895, "learning_rate": 3.965348496393685e-06, "loss": 0.8045, "step": 2436 }, { "epoch": 1.2090107980637954, "grad_norm": 0.13858682489268342, "learning_rate": 3.964556525773935e-06, "loss": 0.7069, "step": 2437 }, { "epoch": 1.209507260767035, "grad_norm": 0.1275555575643711, "learning_rate": 3.963764331320201e-06, "loss": 0.7232, "step": 2438 }, { "epoch": 1.2100037234702743, "grad_norm": 0.13319214619249178, "learning_rate": 3.9629719131535595e-06, "loss": 0.7551, "step": 2439 }, { "epoch": 1.2105001861735136, "grad_norm": 0.1387567942220467, "learning_rate": 3.962179271395118e-06, "loss": 0.7367, "step": 2440 }, { "epoch": 1.2109966488767532, "grad_norm": 0.1373104804870825, "learning_rate": 3.961386406166019e-06, "loss": 0.7157, "step": 2441 }, { "epoch": 1.2114931115799925, "grad_norm": 0.13779972795149475, "learning_rate": 3.96059331758744e-06, "loss": 0.7441, "step": 2442 }, { "epoch": 1.211989574283232, "grad_norm": 0.13902570136362163, "learning_rate": 3.9598000057805905e-06, "loss": 0.7297, "step": 2443 }, { "epoch": 1.2124860369864714, "grad_norm": 0.14200167374497805, "learning_rate": 3.959006470866717e-06, "loss": 0.7664, "step": 2444 }, { "epoch": 1.2129824996897107, "grad_norm": 0.13182479218295906, "learning_rate": 3.958212712967097e-06, "loss": 0.7333, "step": 2445 }, { "epoch": 1.2134789623929503, "grad_norm": 0.13500957253720283, "learning_rate": 3.957418732203045e-06, "loss": 0.7128, "step": 2446 }, { "epoch": 1.2139754250961896, "grad_norm": 0.1364082757123999, "learning_rate": 3.9566245286959074e-06, "loss": 0.7516, "step": 2447 }, { "epoch": 1.2144718877994292, "grad_norm": 0.15584714093121874, "learning_rate": 3.955830102567066e-06, "loss": 0.7542, "step": 2448 }, { "epoch": 1.2149683505026685, "grad_norm": 0.13446548054487187, "learning_rate": 3.955035453937936e-06, "loss": 0.7358, "step": 2449 }, { "epoch": 1.2154648132059078, "grad_norm": 0.13145486309075377, "learning_rate": 3.954240582929965e-06, "loss": 0.7025, "step": 2450 }, { "epoch": 1.2159612759091474, "grad_norm": 0.13574735830422388, "learning_rate": 3.953445489664641e-06, "loss": 0.7401, "step": 2451 }, { "epoch": 1.2164577386123867, "grad_norm": 0.1366039565699845, "learning_rate": 3.952650174263476e-06, "loss": 0.7242, "step": 2452 }, { "epoch": 1.2169542013156263, "grad_norm": 0.13329175729167014, "learning_rate": 3.9518546368480235e-06, "loss": 0.701, "step": 2453 }, { "epoch": 1.2174506640188656, "grad_norm": 0.13635014090129313, "learning_rate": 3.951058877539869e-06, "loss": 0.7085, "step": 2454 }, { "epoch": 1.217947126722105, "grad_norm": 0.14550135826438018, "learning_rate": 3.95026289646063e-06, "loss": 0.7563, "step": 2455 }, { "epoch": 1.2184435894253445, "grad_norm": 0.13708956139933903, "learning_rate": 3.949466693731962e-06, "loss": 0.7396, "step": 2456 }, { "epoch": 1.2189400521285838, "grad_norm": 0.13788736852218864, "learning_rate": 3.948670269475549e-06, "loss": 0.7099, "step": 2457 }, { "epoch": 1.2194365148318234, "grad_norm": 0.1472672823984794, "learning_rate": 3.947873623813114e-06, "loss": 0.7551, "step": 2458 }, { "epoch": 1.2199329775350627, "grad_norm": 0.13567982397574574, "learning_rate": 3.94707675686641e-06, "loss": 0.7215, "step": 2459 }, { "epoch": 1.220429440238302, "grad_norm": 0.12785953951320136, "learning_rate": 3.946279668757226e-06, "loss": 0.6893, "step": 2460 }, { "epoch": 1.2209259029415416, "grad_norm": 0.139042104896544, "learning_rate": 3.945482359607383e-06, "loss": 0.7116, "step": 2461 }, { "epoch": 1.221422365644781, "grad_norm": 0.1310146478620868, "learning_rate": 3.94468482953874e-06, "loss": 0.6821, "step": 2462 }, { "epoch": 1.2219188283480205, "grad_norm": 0.14080053665645972, "learning_rate": 3.943887078673182e-06, "loss": 0.72, "step": 2463 }, { "epoch": 1.2224152910512598, "grad_norm": 0.1374193970967945, "learning_rate": 3.943089107132637e-06, "loss": 0.7277, "step": 2464 }, { "epoch": 1.2229117537544991, "grad_norm": 0.13142687505073025, "learning_rate": 3.942290915039059e-06, "loss": 0.7756, "step": 2465 }, { "epoch": 1.2234082164577387, "grad_norm": 0.1383208661807758, "learning_rate": 3.94149250251444e-06, "loss": 0.7427, "step": 2466 }, { "epoch": 1.223904679160978, "grad_norm": 0.13946709784881417, "learning_rate": 3.940693869680805e-06, "loss": 0.7091, "step": 2467 }, { "epoch": 1.2244011418642176, "grad_norm": 0.13594769238575358, "learning_rate": 3.939895016660212e-06, "loss": 0.7248, "step": 2468 }, { "epoch": 1.2248976045674569, "grad_norm": 0.13618933752020215, "learning_rate": 3.939095943574752e-06, "loss": 0.7535, "step": 2469 }, { "epoch": 1.2253940672706962, "grad_norm": 0.14274958557643477, "learning_rate": 3.938296650546552e-06, "loss": 0.7819, "step": 2470 }, { "epoch": 1.2258905299739358, "grad_norm": 0.13553582376705678, "learning_rate": 3.93749713769777e-06, "loss": 0.7561, "step": 2471 }, { "epoch": 1.226386992677175, "grad_norm": 0.14247983844696746, "learning_rate": 3.936697405150599e-06, "loss": 0.7169, "step": 2472 }, { "epoch": 1.2268834553804147, "grad_norm": 0.13401472437678552, "learning_rate": 3.935897453027265e-06, "loss": 0.7331, "step": 2473 }, { "epoch": 1.227379918083654, "grad_norm": 0.13467273748712041, "learning_rate": 3.935097281450027e-06, "loss": 0.7224, "step": 2474 }, { "epoch": 1.2278763807868933, "grad_norm": 0.13626379999904872, "learning_rate": 3.934296890541182e-06, "loss": 0.7443, "step": 2475 }, { "epoch": 1.2283728434901329, "grad_norm": 0.13003861644898487, "learning_rate": 3.933496280423054e-06, "loss": 0.6546, "step": 2476 }, { "epoch": 1.2288693061933722, "grad_norm": 0.1425981221162396, "learning_rate": 3.932695451218003e-06, "loss": 0.7141, "step": 2477 }, { "epoch": 1.2293657688966118, "grad_norm": 0.13560701136791695, "learning_rate": 3.931894403048424e-06, "loss": 0.7208, "step": 2478 }, { "epoch": 1.229862231599851, "grad_norm": 0.1401873943379598, "learning_rate": 3.931093136036744e-06, "loss": 0.7266, "step": 2479 }, { "epoch": 1.2303586943030904, "grad_norm": 0.13728694609128989, "learning_rate": 3.930291650305424e-06, "loss": 0.7134, "step": 2480 }, { "epoch": 1.23085515700633, "grad_norm": 0.13218011780198854, "learning_rate": 3.929489945976959e-06, "loss": 0.7239, "step": 2481 }, { "epoch": 1.2313516197095693, "grad_norm": 0.1410943689944911, "learning_rate": 3.928688023173875e-06, "loss": 0.7284, "step": 2482 }, { "epoch": 1.2318480824128089, "grad_norm": 0.1361830396710614, "learning_rate": 3.927885882018735e-06, "loss": 0.7211, "step": 2483 }, { "epoch": 1.2323445451160482, "grad_norm": 0.1351899227297423, "learning_rate": 3.927083522634132e-06, "loss": 0.7422, "step": 2484 }, { "epoch": 1.2328410078192875, "grad_norm": 0.1406973288937524, "learning_rate": 3.926280945142693e-06, "loss": 0.7229, "step": 2485 }, { "epoch": 1.233337470522527, "grad_norm": 0.13754991570942163, "learning_rate": 3.925478149667081e-06, "loss": 0.7677, "step": 2486 }, { "epoch": 1.2338339332257664, "grad_norm": 0.1384634788260933, "learning_rate": 3.92467513632999e-06, "loss": 0.7408, "step": 2487 }, { "epoch": 1.234330395929006, "grad_norm": 0.14392311337089933, "learning_rate": 3.923871905254146e-06, "loss": 0.7471, "step": 2488 }, { "epoch": 1.2348268586322453, "grad_norm": 0.1335035566492651, "learning_rate": 3.9230684565623135e-06, "loss": 0.6966, "step": 2489 }, { "epoch": 1.2353233213354846, "grad_norm": 0.132634223213086, "learning_rate": 3.922264790377283e-06, "loss": 0.6934, "step": 2490 }, { "epoch": 1.2358197840387242, "grad_norm": 0.13827031271642745, "learning_rate": 3.921460906821884e-06, "loss": 0.7098, "step": 2491 }, { "epoch": 1.2363162467419635, "grad_norm": 0.14451954689789367, "learning_rate": 3.920656806018977e-06, "loss": 0.788, "step": 2492 }, { "epoch": 1.236812709445203, "grad_norm": 0.13561380554064417, "learning_rate": 3.919852488091455e-06, "loss": 0.7527, "step": 2493 }, { "epoch": 1.2373091721484424, "grad_norm": 0.13501364970911942, "learning_rate": 3.9190479531622465e-06, "loss": 0.7612, "step": 2494 }, { "epoch": 1.2378056348516817, "grad_norm": 0.12818687643314722, "learning_rate": 3.91824320135431e-06, "loss": 0.6963, "step": 2495 }, { "epoch": 1.2383020975549213, "grad_norm": 0.13312040030918668, "learning_rate": 3.9174382327906415e-06, "loss": 0.7064, "step": 2496 }, { "epoch": 1.2387985602581606, "grad_norm": 0.13300761423052482, "learning_rate": 3.916633047594265e-06, "loss": 0.7077, "step": 2497 }, { "epoch": 1.2392950229614, "grad_norm": 0.13620840402675596, "learning_rate": 3.915827645888242e-06, "loss": 0.7187, "step": 2498 }, { "epoch": 1.2397914856646395, "grad_norm": 0.13426305401154814, "learning_rate": 3.915022027795663e-06, "loss": 0.69, "step": 2499 }, { "epoch": 1.2402879483678788, "grad_norm": 0.1464897445532207, "learning_rate": 3.914216193439657e-06, "loss": 0.8251, "step": 2500 }, { "epoch": 1.2407844110711184, "grad_norm": 0.13743582313648897, "learning_rate": 3.91341014294338e-06, "loss": 0.7444, "step": 2501 }, { "epoch": 1.2412808737743577, "grad_norm": 0.13874526596758197, "learning_rate": 3.912603876430025e-06, "loss": 0.708, "step": 2502 }, { "epoch": 1.241777336477597, "grad_norm": 0.13008996722051902, "learning_rate": 3.9117973940228166e-06, "loss": 0.7709, "step": 2503 }, { "epoch": 1.2422737991808366, "grad_norm": 0.1355480166714516, "learning_rate": 3.910990695845013e-06, "loss": 0.6943, "step": 2504 }, { "epoch": 1.242770261884076, "grad_norm": 0.14539960501531302, "learning_rate": 3.910183782019905e-06, "loss": 0.7631, "step": 2505 }, { "epoch": 1.2432667245873155, "grad_norm": 0.14083590125770834, "learning_rate": 3.909376652670818e-06, "loss": 0.7338, "step": 2506 }, { "epoch": 1.2437631872905548, "grad_norm": 0.1431658480547782, "learning_rate": 3.908569307921106e-06, "loss": 0.8085, "step": 2507 }, { "epoch": 1.2442596499937941, "grad_norm": 0.14578702074830277, "learning_rate": 3.90776174789416e-06, "loss": 0.7466, "step": 2508 }, { "epoch": 1.2447561126970337, "grad_norm": 0.13316331073208876, "learning_rate": 3.906953972713403e-06, "loss": 0.7069, "step": 2509 }, { "epoch": 1.245252575400273, "grad_norm": 0.13867830074059864, "learning_rate": 3.90614598250229e-06, "loss": 0.7267, "step": 2510 }, { "epoch": 1.2457490381035126, "grad_norm": 0.13076874574140407, "learning_rate": 3.905337777384308e-06, "loss": 0.698, "step": 2511 }, { "epoch": 1.246245500806752, "grad_norm": 0.13962139881766297, "learning_rate": 3.904529357482981e-06, "loss": 0.7147, "step": 2512 }, { "epoch": 1.2467419635099912, "grad_norm": 0.1462052190977223, "learning_rate": 3.9037207229218615e-06, "loss": 0.728, "step": 2513 }, { "epoch": 1.2472384262132308, "grad_norm": 0.1418049627962229, "learning_rate": 3.902911873824536e-06, "loss": 0.7354, "step": 2514 }, { "epoch": 1.24773488891647, "grad_norm": 0.14139015990729262, "learning_rate": 3.902102810314625e-06, "loss": 0.736, "step": 2515 }, { "epoch": 1.2482313516197097, "grad_norm": 0.13974548545259946, "learning_rate": 3.9012935325157805e-06, "loss": 0.735, "step": 2516 }, { "epoch": 1.248727814322949, "grad_norm": 0.14137472030628268, "learning_rate": 3.900484040551688e-06, "loss": 0.7538, "step": 2517 }, { "epoch": 1.2492242770261883, "grad_norm": 0.13631373740449088, "learning_rate": 3.899674334546064e-06, "loss": 0.6965, "step": 2518 }, { "epoch": 1.2497207397294279, "grad_norm": 0.13581041061864152, "learning_rate": 3.898864414622661e-06, "loss": 0.7229, "step": 2519 }, { "epoch": 1.2502172024326672, "grad_norm": 0.1351856956359364, "learning_rate": 3.89805428090526e-06, "loss": 0.6548, "step": 2520 }, { "epoch": 1.2502172024326672, "eval_loss": 0.742116391658783, "eval_runtime": 135.9502, "eval_samples_per_second": 223.266, "eval_steps_per_second": 27.915, "step": 2520 }, { "epoch": 1.2507136651359065, "grad_norm": 0.1384586104535219, "learning_rate": 3.897243933517679e-06, "loss": 0.7376, "step": 2521 }, { "epoch": 1.251210127839146, "grad_norm": 0.16315907780591418, "learning_rate": 3.896433372583766e-06, "loss": 0.7179, "step": 2522 }, { "epoch": 1.2517065905423854, "grad_norm": 0.13165553758104281, "learning_rate": 3.895622598227402e-06, "loss": 0.6968, "step": 2523 }, { "epoch": 1.252203053245625, "grad_norm": 0.1468700776665726, "learning_rate": 3.894811610572501e-06, "loss": 0.7961, "step": 2524 }, { "epoch": 1.2526995159488643, "grad_norm": 0.13484457344159942, "learning_rate": 3.894000409743009e-06, "loss": 0.6972, "step": 2525 }, { "epoch": 1.2531959786521036, "grad_norm": 0.14298803461222828, "learning_rate": 3.893188995862907e-06, "loss": 0.7434, "step": 2526 }, { "epoch": 1.2536924413553432, "grad_norm": 0.13247384088857173, "learning_rate": 3.892377369056203e-06, "loss": 0.697, "step": 2527 }, { "epoch": 1.2541889040585825, "grad_norm": 0.12855401841158223, "learning_rate": 3.8915655294469445e-06, "loss": 0.7368, "step": 2528 }, { "epoch": 1.254685366761822, "grad_norm": 0.1399262509376029, "learning_rate": 3.890753477159206e-06, "loss": 0.7499, "step": 2529 }, { "epoch": 1.2551818294650614, "grad_norm": 0.1532990862064815, "learning_rate": 3.8899412123170984e-06, "loss": 0.7841, "step": 2530 }, { "epoch": 1.2556782921683007, "grad_norm": 0.13912282728279968, "learning_rate": 3.889128735044762e-06, "loss": 0.7458, "step": 2531 }, { "epoch": 1.2561747548715403, "grad_norm": 0.1369771636022506, "learning_rate": 3.888316045466372e-06, "loss": 0.7154, "step": 2532 }, { "epoch": 1.2566712175747796, "grad_norm": 0.13198426469797134, "learning_rate": 3.887503143706134e-06, "loss": 0.674, "step": 2533 }, { "epoch": 1.2571676802780192, "grad_norm": 0.14611135628302505, "learning_rate": 3.886690029888287e-06, "loss": 0.7117, "step": 2534 }, { "epoch": 1.2576641429812585, "grad_norm": 0.1385484671417284, "learning_rate": 3.885876704137104e-06, "loss": 0.7399, "step": 2535 }, { "epoch": 1.2581606056844978, "grad_norm": 0.13166836823073533, "learning_rate": 3.885063166576886e-06, "loss": 0.7609, "step": 2536 }, { "epoch": 1.2586570683877374, "grad_norm": 0.1387003911974032, "learning_rate": 3.8842494173319726e-06, "loss": 0.725, "step": 2537 }, { "epoch": 1.2591535310909767, "grad_norm": 0.13305768328582457, "learning_rate": 3.883435456526728e-06, "loss": 0.7375, "step": 2538 }, { "epoch": 1.2596499937942163, "grad_norm": 0.13978466433671136, "learning_rate": 3.882621284285558e-06, "loss": 0.7691, "step": 2539 }, { "epoch": 1.2601464564974556, "grad_norm": 0.13757446247537838, "learning_rate": 3.881806900732893e-06, "loss": 0.7069, "step": 2540 }, { "epoch": 1.260642919200695, "grad_norm": 0.14032437189289912, "learning_rate": 3.880992305993198e-06, "loss": 0.7281, "step": 2541 }, { "epoch": 1.2611393819039345, "grad_norm": 0.1377868179608081, "learning_rate": 3.880177500190971e-06, "loss": 0.7587, "step": 2542 }, { "epoch": 1.2616358446071738, "grad_norm": 0.14861646208837478, "learning_rate": 3.8793624834507435e-06, "loss": 0.7251, "step": 2543 }, { "epoch": 1.2621323073104134, "grad_norm": 0.13517554466674267, "learning_rate": 3.8785472558970776e-06, "loss": 0.6974, "step": 2544 }, { "epoch": 1.2626287700136527, "grad_norm": 0.1309887066083378, "learning_rate": 3.877731817654566e-06, "loss": 0.7402, "step": 2545 }, { "epoch": 1.263125232716892, "grad_norm": 0.1396787950926557, "learning_rate": 3.876916168847836e-06, "loss": 0.7621, "step": 2546 }, { "epoch": 1.2636216954201316, "grad_norm": 0.14129831083302125, "learning_rate": 3.876100309601547e-06, "loss": 0.8199, "step": 2547 }, { "epoch": 1.264118158123371, "grad_norm": 0.14102270939055808, "learning_rate": 3.87528424004039e-06, "loss": 0.7137, "step": 2548 }, { "epoch": 1.2646146208266105, "grad_norm": 0.13270563152474793, "learning_rate": 3.874467960289088e-06, "loss": 0.6974, "step": 2549 }, { "epoch": 1.2651110835298498, "grad_norm": 0.14077175671585102, "learning_rate": 3.8736514704723956e-06, "loss": 0.7488, "step": 2550 }, { "epoch": 1.2656075462330891, "grad_norm": 0.14294951439526327, "learning_rate": 3.872834770715102e-06, "loss": 0.7407, "step": 2551 }, { "epoch": 1.2661040089363287, "grad_norm": 0.14108160173034903, "learning_rate": 3.872017861142024e-06, "loss": 0.7285, "step": 2552 }, { "epoch": 1.266600471639568, "grad_norm": 0.14172910962233856, "learning_rate": 3.871200741878015e-06, "loss": 0.7434, "step": 2553 }, { "epoch": 1.2670969343428076, "grad_norm": 0.1418472981583555, "learning_rate": 3.870383413047959e-06, "loss": 0.693, "step": 2554 }, { "epoch": 1.267593397046047, "grad_norm": 0.14824626937177632, "learning_rate": 3.86956587477677e-06, "loss": 0.7304, "step": 2555 }, { "epoch": 1.2680898597492862, "grad_norm": 0.14417792095901208, "learning_rate": 3.868748127189397e-06, "loss": 0.689, "step": 2556 }, { "epoch": 1.2685863224525258, "grad_norm": 0.13628181036537815, "learning_rate": 3.8679301704108176e-06, "loss": 0.7269, "step": 2557 }, { "epoch": 1.2690827851557651, "grad_norm": 0.1432671252492769, "learning_rate": 3.8671120045660456e-06, "loss": 0.7273, "step": 2558 }, { "epoch": 1.2695792478590047, "grad_norm": 0.13729463835110303, "learning_rate": 3.8662936297801235e-06, "loss": 0.7118, "step": 2559 }, { "epoch": 1.270075710562244, "grad_norm": 0.1458195417043056, "learning_rate": 3.865475046178127e-06, "loss": 0.7171, "step": 2560 }, { "epoch": 1.2705721732654833, "grad_norm": 0.13880022855949167, "learning_rate": 3.864656253885163e-06, "loss": 0.6906, "step": 2561 }, { "epoch": 1.2710686359687229, "grad_norm": 0.1349359231670376, "learning_rate": 3.863837253026372e-06, "loss": 0.7689, "step": 2562 }, { "epoch": 1.2715650986719622, "grad_norm": 0.14164412974060797, "learning_rate": 3.863018043726924e-06, "loss": 0.6878, "step": 2563 }, { "epoch": 1.2720615613752018, "grad_norm": 0.14323827765065936, "learning_rate": 3.862198626112023e-06, "loss": 0.7606, "step": 2564 }, { "epoch": 1.272558024078441, "grad_norm": 0.1473424236959722, "learning_rate": 3.861379000306902e-06, "loss": 0.7605, "step": 2565 }, { "epoch": 1.2730544867816804, "grad_norm": 0.1292191998705604, "learning_rate": 3.8605591664368295e-06, "loss": 0.7022, "step": 2566 }, { "epoch": 1.27355094948492, "grad_norm": 0.13465698019970399, "learning_rate": 3.859739124627103e-06, "loss": 0.6971, "step": 2567 }, { "epoch": 1.2740474121881593, "grad_norm": 0.13654253074049172, "learning_rate": 3.858918875003053e-06, "loss": 0.6928, "step": 2568 }, { "epoch": 1.2745438748913989, "grad_norm": 0.1379719697081152, "learning_rate": 3.858098417690042e-06, "loss": 0.717, "step": 2569 }, { "epoch": 1.2750403375946382, "grad_norm": 0.14175199937004399, "learning_rate": 3.857277752813463e-06, "loss": 0.7616, "step": 2570 }, { "epoch": 1.2755368002978775, "grad_norm": 0.13443150257007153, "learning_rate": 3.856456880498742e-06, "loss": 0.6973, "step": 2571 }, { "epoch": 1.276033263001117, "grad_norm": 0.14593077908065585, "learning_rate": 3.855635800871335e-06, "loss": 0.7569, "step": 2572 }, { "epoch": 1.2765297257043564, "grad_norm": 0.15100942741983306, "learning_rate": 3.854814514056734e-06, "loss": 0.7642, "step": 2573 }, { "epoch": 1.277026188407596, "grad_norm": 0.13642851129916278, "learning_rate": 3.853993020180456e-06, "loss": 0.7406, "step": 2574 }, { "epoch": 1.2775226511108353, "grad_norm": 0.1400663992803376, "learning_rate": 3.853171319368054e-06, "loss": 0.7454, "step": 2575 }, { "epoch": 1.2780191138140746, "grad_norm": 0.1319083373021114, "learning_rate": 3.852349411745113e-06, "loss": 0.74, "step": 2576 }, { "epoch": 1.2785155765173142, "grad_norm": 0.14724655880988557, "learning_rate": 3.851527297437247e-06, "loss": 0.7528, "step": 2577 }, { "epoch": 1.2790120392205535, "grad_norm": 0.13674211565816105, "learning_rate": 3.8507049765701045e-06, "loss": 0.6716, "step": 2578 }, { "epoch": 1.279508501923793, "grad_norm": 0.1382868822761622, "learning_rate": 3.849882449269363e-06, "loss": 0.746, "step": 2579 }, { "epoch": 1.2800049646270324, "grad_norm": 0.1390259790054548, "learning_rate": 3.849059715660732e-06, "loss": 0.7088, "step": 2580 }, { "epoch": 1.2805014273302717, "grad_norm": 0.1369367923225582, "learning_rate": 3.848236775869955e-06, "loss": 0.7699, "step": 2581 }, { "epoch": 1.2809978900335113, "grad_norm": 0.15971172103749032, "learning_rate": 3.847413630022804e-06, "loss": 0.746, "step": 2582 }, { "epoch": 1.2814943527367506, "grad_norm": 0.14277522047325522, "learning_rate": 3.846590278245083e-06, "loss": 0.7567, "step": 2583 }, { "epoch": 1.2819908154399902, "grad_norm": 0.14699081523439286, "learning_rate": 3.8457667206626306e-06, "loss": 0.7873, "step": 2584 }, { "epoch": 1.2824872781432295, "grad_norm": 0.14366187562768565, "learning_rate": 3.844942957401311e-06, "loss": 0.6949, "step": 2585 }, { "epoch": 1.2829837408464688, "grad_norm": 0.14181909585351757, "learning_rate": 3.844118988587025e-06, "loss": 0.7135, "step": 2586 }, { "epoch": 1.2834802035497084, "grad_norm": 0.13109014026406526, "learning_rate": 3.843294814345705e-06, "loss": 0.6893, "step": 2587 }, { "epoch": 1.2839766662529477, "grad_norm": 0.13263413654791315, "learning_rate": 3.8424704348033084e-06, "loss": 0.6963, "step": 2588 }, { "epoch": 1.2844731289561873, "grad_norm": 0.13552375577647977, "learning_rate": 3.841645850085831e-06, "loss": 0.718, "step": 2589 }, { "epoch": 1.2849695916594266, "grad_norm": 0.138899051483369, "learning_rate": 3.840821060319298e-06, "loss": 0.754, "step": 2590 }, { "epoch": 1.285466054362666, "grad_norm": 0.14600374894252893, "learning_rate": 3.839996065629764e-06, "loss": 0.7421, "step": 2591 }, { "epoch": 1.2859625170659055, "grad_norm": 0.14495501404251815, "learning_rate": 3.839170866143317e-06, "loss": 0.6984, "step": 2592 }, { "epoch": 1.2864589797691448, "grad_norm": 0.13341183715508106, "learning_rate": 3.838345461986074e-06, "loss": 0.7205, "step": 2593 }, { "epoch": 1.2869554424723844, "grad_norm": 0.13169443537805922, "learning_rate": 3.837519853284186e-06, "loss": 0.691, "step": 2594 }, { "epoch": 1.2874519051756237, "grad_norm": 0.13790138352749554, "learning_rate": 3.836694040163834e-06, "loss": 0.6926, "step": 2595 }, { "epoch": 1.287948367878863, "grad_norm": 0.1437529575160023, "learning_rate": 3.835868022751231e-06, "loss": 0.7934, "step": 2596 }, { "epoch": 1.2884448305821026, "grad_norm": 0.13306894590543736, "learning_rate": 3.835041801172619e-06, "loss": 0.6854, "step": 2597 }, { "epoch": 1.288941293285342, "grad_norm": 0.14395437206135459, "learning_rate": 3.834215375554275e-06, "loss": 0.7379, "step": 2598 }, { "epoch": 1.2894377559885815, "grad_norm": 0.1378707356978414, "learning_rate": 3.8333887460225015e-06, "loss": 0.7178, "step": 2599 }, { "epoch": 1.2899342186918208, "grad_norm": 0.13148423330705314, "learning_rate": 3.832561912703638e-06, "loss": 0.6924, "step": 2600 }, { "epoch": 1.2904306813950601, "grad_norm": 0.13730143809759457, "learning_rate": 3.831734875724052e-06, "loss": 0.7503, "step": 2601 }, { "epoch": 1.2909271440982997, "grad_norm": 0.13498579164231725, "learning_rate": 3.830907635210143e-06, "loss": 0.738, "step": 2602 }, { "epoch": 1.291423606801539, "grad_norm": 0.13053407240787798, "learning_rate": 3.830080191288342e-06, "loss": 0.7039, "step": 2603 }, { "epoch": 1.2919200695047786, "grad_norm": 0.13619974831779605, "learning_rate": 3.82925254408511e-06, "loss": 0.7462, "step": 2604 }, { "epoch": 1.2924165322080179, "grad_norm": 0.1415670740958942, "learning_rate": 3.828424693726939e-06, "loss": 0.7302, "step": 2605 }, { "epoch": 1.2929129949112572, "grad_norm": 0.14173779929791688, "learning_rate": 3.827596640340353e-06, "loss": 0.7732, "step": 2606 }, { "epoch": 1.2934094576144968, "grad_norm": 0.1359458995344357, "learning_rate": 3.826768384051907e-06, "loss": 0.7372, "step": 2607 }, { "epoch": 1.293905920317736, "grad_norm": 0.13416195193113473, "learning_rate": 3.825939924988187e-06, "loss": 0.7289, "step": 2608 }, { "epoch": 1.2944023830209757, "grad_norm": 0.1337826798946644, "learning_rate": 3.825111263275809e-06, "loss": 0.6944, "step": 2609 }, { "epoch": 1.294898845724215, "grad_norm": 0.13167566643050344, "learning_rate": 3.824282399041421e-06, "loss": 0.677, "step": 2610 }, { "epoch": 1.2953953084274543, "grad_norm": 0.1432765949213179, "learning_rate": 3.823453332411702e-06, "loss": 0.7226, "step": 2611 }, { "epoch": 1.2958917711306939, "grad_norm": 0.13409930276110446, "learning_rate": 3.8226240635133615e-06, "loss": 0.7633, "step": 2612 }, { "epoch": 1.2963882338339332, "grad_norm": 0.14108249887977412, "learning_rate": 3.8217945924731385e-06, "loss": 0.7139, "step": 2613 }, { "epoch": 1.2968846965371728, "grad_norm": 0.13605604217842482, "learning_rate": 3.8209649194178065e-06, "loss": 0.7486, "step": 2614 }, { "epoch": 1.297381159240412, "grad_norm": 0.13562858110835124, "learning_rate": 3.820135044474166e-06, "loss": 0.7403, "step": 2615 }, { "epoch": 1.2978776219436514, "grad_norm": 0.13982122188113263, "learning_rate": 3.81930496776905e-06, "loss": 0.7424, "step": 2616 }, { "epoch": 1.298374084646891, "grad_norm": 0.13772514732208074, "learning_rate": 3.818474689429324e-06, "loss": 0.7531, "step": 2617 }, { "epoch": 1.2988705473501303, "grad_norm": 0.13901119411689639, "learning_rate": 3.817644209581881e-06, "loss": 0.7395, "step": 2618 }, { "epoch": 1.2993670100533699, "grad_norm": 0.1344248019109414, "learning_rate": 3.8168135283536485e-06, "loss": 0.732, "step": 2619 }, { "epoch": 1.2998634727566092, "grad_norm": 0.13540832289022223, "learning_rate": 3.815982645871582e-06, "loss": 0.7063, "step": 2620 }, { "epoch": 1.3003599354598485, "grad_norm": 0.13397167892750497, "learning_rate": 3.815151562262666e-06, "loss": 0.7302, "step": 2621 }, { "epoch": 1.300856398163088, "grad_norm": 0.13514380521337327, "learning_rate": 3.8143202776539224e-06, "loss": 0.7267, "step": 2622 }, { "epoch": 1.3013528608663274, "grad_norm": 0.1334082478010419, "learning_rate": 3.8134887921723975e-06, "loss": 0.7107, "step": 2623 }, { "epoch": 1.301849323569567, "grad_norm": 0.1359740311554934, "learning_rate": 3.812657105945171e-06, "loss": 0.6509, "step": 2624 }, { "epoch": 1.3023457862728063, "grad_norm": 0.1445356509888893, "learning_rate": 3.8118252190993533e-06, "loss": 0.7013, "step": 2625 }, { "epoch": 1.3028422489760456, "grad_norm": 0.13788799234754298, "learning_rate": 3.810993131762083e-06, "loss": 0.7414, "step": 2626 }, { "epoch": 1.3033387116792852, "grad_norm": 0.13253611567442633, "learning_rate": 3.810160844060533e-06, "loss": 0.7793, "step": 2627 }, { "epoch": 1.3038351743825245, "grad_norm": 0.133375725717547, "learning_rate": 3.8093283561219063e-06, "loss": 0.7065, "step": 2628 }, { "epoch": 1.304331637085764, "grad_norm": 0.14059616928874114, "learning_rate": 3.808495668073432e-06, "loss": 0.7164, "step": 2629 }, { "epoch": 1.3048280997890034, "grad_norm": 0.1307516991478391, "learning_rate": 3.807662780042376e-06, "loss": 0.6775, "step": 2630 }, { "epoch": 1.3053245624922427, "grad_norm": 0.14152325120856557, "learning_rate": 3.806829692156031e-06, "loss": 0.7759, "step": 2631 }, { "epoch": 1.3058210251954823, "grad_norm": 0.12924559643345673, "learning_rate": 3.8059964045417196e-06, "loss": 0.6981, "step": 2632 }, { "epoch": 1.3063174878987216, "grad_norm": 0.1370915470558916, "learning_rate": 3.805162917326799e-06, "loss": 0.7678, "step": 2633 }, { "epoch": 1.3068139506019611, "grad_norm": 0.14767292411211091, "learning_rate": 3.8043292306386528e-06, "loss": 0.6807, "step": 2634 }, { "epoch": 1.3073104133052005, "grad_norm": 0.13677942779385033, "learning_rate": 3.8034953446046974e-06, "loss": 0.7533, "step": 2635 }, { "epoch": 1.3078068760084398, "grad_norm": 0.13632882033348248, "learning_rate": 3.8026612593523795e-06, "loss": 0.7246, "step": 2636 }, { "epoch": 1.3083033387116794, "grad_norm": 0.13657887532594618, "learning_rate": 3.801826975009173e-06, "loss": 0.7122, "step": 2637 }, { "epoch": 1.3087998014149187, "grad_norm": 0.13144574952305246, "learning_rate": 3.8009924917025864e-06, "loss": 0.727, "step": 2638 }, { "epoch": 1.3092962641181582, "grad_norm": 0.13554336516404342, "learning_rate": 3.8001578095601583e-06, "loss": 0.6958, "step": 2639 }, { "epoch": 1.3097927268213976, "grad_norm": 0.13859859321388554, "learning_rate": 3.799322928709455e-06, "loss": 0.7051, "step": 2640 }, { "epoch": 1.310289189524637, "grad_norm": 0.1389305915074386, "learning_rate": 3.7984878492780754e-06, "loss": 0.7113, "step": 2641 }, { "epoch": 1.3107856522278765, "grad_norm": 0.13940935351453346, "learning_rate": 3.797652571393647e-06, "loss": 0.7732, "step": 2642 }, { "epoch": 1.3112821149311158, "grad_norm": 0.13336829577684228, "learning_rate": 3.79681709518383e-06, "loss": 0.7421, "step": 2643 }, { "epoch": 1.3117785776343553, "grad_norm": 0.1338637968337137, "learning_rate": 3.7959814207763134e-06, "loss": 0.7025, "step": 2644 }, { "epoch": 1.3122750403375947, "grad_norm": 0.1322361693764335, "learning_rate": 3.7951455482988154e-06, "loss": 0.6567, "step": 2645 }, { "epoch": 1.312771503040834, "grad_norm": 0.13690317157186413, "learning_rate": 3.7943094778790866e-06, "loss": 0.7347, "step": 2646 }, { "epoch": 1.3132679657440733, "grad_norm": 0.13886094412827465, "learning_rate": 3.7934732096449066e-06, "loss": 0.7281, "step": 2647 }, { "epoch": 1.313764428447313, "grad_norm": 0.1433387530701494, "learning_rate": 3.792636743724085e-06, "loss": 0.7451, "step": 2648 }, { "epoch": 1.3142608911505524, "grad_norm": 0.13856198601421107, "learning_rate": 3.7918000802444644e-06, "loss": 0.7467, "step": 2649 }, { "epoch": 1.3147573538537918, "grad_norm": 0.13420789080751208, "learning_rate": 3.790963219333913e-06, "loss": 0.7276, "step": 2650 }, { "epoch": 1.315253816557031, "grad_norm": 0.1291136871280018, "learning_rate": 3.790126161120333e-06, "loss": 0.6971, "step": 2651 }, { "epoch": 1.3157502792602704, "grad_norm": 0.1290712942967362, "learning_rate": 3.789288905731655e-06, "loss": 0.7456, "step": 2652 }, { "epoch": 1.31624674196351, "grad_norm": 0.13062887909116458, "learning_rate": 3.78845145329584e-06, "loss": 0.7069, "step": 2653 }, { "epoch": 1.3167432046667495, "grad_norm": 0.13036053551686685, "learning_rate": 3.7876138039408784e-06, "loss": 0.7227, "step": 2654 }, { "epoch": 1.3172396673699889, "grad_norm": 0.13222431704901577, "learning_rate": 3.786775957794793e-06, "loss": 0.7432, "step": 2655 }, { "epoch": 1.3177361300732282, "grad_norm": 0.13104899861223418, "learning_rate": 3.7859379149856335e-06, "loss": 0.7501, "step": 2656 }, { "epoch": 1.3182325927764675, "grad_norm": 0.13397270135551584, "learning_rate": 3.7850996756414832e-06, "loss": 0.7326, "step": 2657 }, { "epoch": 1.318729055479707, "grad_norm": 0.13613150488476347, "learning_rate": 3.7842612398904515e-06, "loss": 0.7147, "step": 2658 }, { "epoch": 1.3192255181829466, "grad_norm": 0.14339490468621394, "learning_rate": 3.783422607860681e-06, "loss": 0.7324, "step": 2659 }, { "epoch": 1.319721980886186, "grad_norm": 0.13378917893428685, "learning_rate": 3.7825837796803438e-06, "loss": 0.7268, "step": 2660 }, { "epoch": 1.3202184435894253, "grad_norm": 0.13360282147099303, "learning_rate": 3.7817447554776397e-06, "loss": 0.7304, "step": 2661 }, { "epoch": 1.3207149062926646, "grad_norm": 0.13904280037330674, "learning_rate": 3.780905535380801e-06, "loss": 0.7572, "step": 2662 }, { "epoch": 1.3212113689959042, "grad_norm": 0.13011808148888576, "learning_rate": 3.78006611951809e-06, "loss": 0.6836, "step": 2663 }, { "epoch": 1.3217078316991435, "grad_norm": 0.13510584692748015, "learning_rate": 3.779226508017796e-06, "loss": 0.7061, "step": 2664 }, { "epoch": 1.322204294402383, "grad_norm": 0.1371054013890361, "learning_rate": 3.778386701008241e-06, "loss": 0.751, "step": 2665 }, { "epoch": 1.3227007571056224, "grad_norm": 0.13439056933714283, "learning_rate": 3.7775466986177763e-06, "loss": 0.7382, "step": 2666 }, { "epoch": 1.3231972198088617, "grad_norm": 0.13676259742243488, "learning_rate": 3.776706500974783e-06, "loss": 0.7439, "step": 2667 }, { "epoch": 1.3236936825121013, "grad_norm": 0.1384039615266504, "learning_rate": 3.775866108207671e-06, "loss": 0.7215, "step": 2668 }, { "epoch": 1.3241901452153406, "grad_norm": 0.14031483696025457, "learning_rate": 3.7750255204448817e-06, "loss": 0.7596, "step": 2669 }, { "epoch": 1.3246866079185802, "grad_norm": 0.13734515606529832, "learning_rate": 3.7741847378148845e-06, "loss": 0.7511, "step": 2670 }, { "epoch": 1.3251830706218195, "grad_norm": 0.130610747808502, "learning_rate": 3.7733437604461804e-06, "loss": 0.675, "step": 2671 }, { "epoch": 1.3256795333250588, "grad_norm": 0.13229432283223813, "learning_rate": 3.7725025884672987e-06, "loss": 0.7204, "step": 2672 }, { "epoch": 1.3261759960282984, "grad_norm": 0.13459703835228942, "learning_rate": 3.7716612220068004e-06, "loss": 0.6899, "step": 2673 }, { "epoch": 1.3266724587315377, "grad_norm": 0.13416054723437704, "learning_rate": 3.770819661193273e-06, "loss": 0.6853, "step": 2674 }, { "epoch": 1.3271689214347773, "grad_norm": 0.1361667362155641, "learning_rate": 3.7699779061553365e-06, "loss": 0.7009, "step": 2675 }, { "epoch": 1.3276653841380166, "grad_norm": 0.13873053615880002, "learning_rate": 3.7691359570216404e-06, "loss": 0.6775, "step": 2676 }, { "epoch": 1.328161846841256, "grad_norm": 0.13977475275303883, "learning_rate": 3.7682938139208615e-06, "loss": 0.7638, "step": 2677 }, { "epoch": 1.3286583095444955, "grad_norm": 0.13414664404632431, "learning_rate": 3.7674514769817083e-06, "loss": 0.7539, "step": 2678 }, { "epoch": 1.3291547722477348, "grad_norm": 0.14091362715741365, "learning_rate": 3.7666089463329196e-06, "loss": 0.746, "step": 2679 }, { "epoch": 1.3296512349509744, "grad_norm": 0.1470455702766219, "learning_rate": 3.765766222103262e-06, "loss": 0.7381, "step": 2680 }, { "epoch": 1.3301476976542137, "grad_norm": 0.13534175218735514, "learning_rate": 3.7649233044215314e-06, "loss": 0.7117, "step": 2681 }, { "epoch": 1.330644160357453, "grad_norm": 0.13478773220973986, "learning_rate": 3.764080193416556e-06, "loss": 0.7127, "step": 2682 }, { "epoch": 1.3311406230606926, "grad_norm": 0.14594060460245392, "learning_rate": 3.7632368892171916e-06, "loss": 0.7218, "step": 2683 }, { "epoch": 1.331637085763932, "grad_norm": 0.13227694228798864, "learning_rate": 3.7623933919523226e-06, "loss": 0.6883, "step": 2684 }, { "epoch": 1.3321335484671715, "grad_norm": 0.1331372361004289, "learning_rate": 3.761549701750865e-06, "loss": 0.7251, "step": 2685 }, { "epoch": 1.3326300111704108, "grad_norm": 0.13633213773374722, "learning_rate": 3.7607058187417624e-06, "loss": 0.6906, "step": 2686 }, { "epoch": 1.3331264738736501, "grad_norm": 0.14075233741360202, "learning_rate": 3.7598617430539886e-06, "loss": 0.7518, "step": 2687 }, { "epoch": 1.3336229365768897, "grad_norm": 0.14126613421524764, "learning_rate": 3.7590174748165487e-06, "loss": 0.7323, "step": 2688 }, { "epoch": 1.334119399280129, "grad_norm": 0.13817784028859825, "learning_rate": 3.758173014158475e-06, "loss": 0.7186, "step": 2689 }, { "epoch": 1.3346158619833686, "grad_norm": 0.14776049205061048, "learning_rate": 3.757328361208828e-06, "loss": 0.7606, "step": 2690 }, { "epoch": 1.335112324686608, "grad_norm": 0.1335821510798814, "learning_rate": 3.756483516096702e-06, "loss": 0.7053, "step": 2691 }, { "epoch": 1.3356087873898472, "grad_norm": 0.1383320848994581, "learning_rate": 3.7556384789512156e-06, "loss": 0.7501, "step": 2692 }, { "epoch": 1.3361052500930868, "grad_norm": 0.14093892215142187, "learning_rate": 3.754793249901521e-06, "loss": 0.7381, "step": 2693 }, { "epoch": 1.3366017127963261, "grad_norm": 0.13669736220023862, "learning_rate": 3.753947829076797e-06, "loss": 0.6916, "step": 2694 }, { "epoch": 1.3370981754995657, "grad_norm": 0.13362283897482466, "learning_rate": 3.7531022166062538e-06, "loss": 0.7439, "step": 2695 }, { "epoch": 1.337594638202805, "grad_norm": 0.13205258206921125, "learning_rate": 3.7522564126191276e-06, "loss": 0.6928, "step": 2696 }, { "epoch": 1.3380911009060443, "grad_norm": 0.13796484578752577, "learning_rate": 3.751410417244687e-06, "loss": 0.8034, "step": 2697 }, { "epoch": 1.3385875636092839, "grad_norm": 0.13414239398576636, "learning_rate": 3.7505642306122293e-06, "loss": 0.7181, "step": 2698 }, { "epoch": 1.3390840263125232, "grad_norm": 0.14145403843670953, "learning_rate": 3.7497178528510803e-06, "loss": 0.691, "step": 2699 }, { "epoch": 1.3395804890157628, "grad_norm": 0.1379866463002123, "learning_rate": 3.7488712840905955e-06, "loss": 0.7399, "step": 2700 }, { "epoch": 1.340076951719002, "grad_norm": 0.13204289710931297, "learning_rate": 3.7480245244601587e-06, "loss": 0.7253, "step": 2701 }, { "epoch": 1.3405734144222414, "grad_norm": 0.1397985609990718, "learning_rate": 3.747177574089184e-06, "loss": 0.7438, "step": 2702 }, { "epoch": 1.341069877125481, "grad_norm": 0.14161488684867624, "learning_rate": 3.746330433107114e-06, "loss": 0.7537, "step": 2703 }, { "epoch": 1.3415663398287203, "grad_norm": 0.14437747804639947, "learning_rate": 3.7454831016434206e-06, "loss": 0.7241, "step": 2704 }, { "epoch": 1.3420628025319599, "grad_norm": 0.13861952176789427, "learning_rate": 3.744635579827606e-06, "loss": 0.7398, "step": 2705 }, { "epoch": 1.3425592652351992, "grad_norm": 0.1399777307376627, "learning_rate": 3.7437878677891977e-06, "loss": 0.7055, "step": 2706 }, { "epoch": 1.3430557279384385, "grad_norm": 0.1370426735312947, "learning_rate": 3.742939965657757e-06, "loss": 0.7354, "step": 2707 }, { "epoch": 1.343552190641678, "grad_norm": 0.1457622692943864, "learning_rate": 3.7420918735628714e-06, "loss": 0.7592, "step": 2708 }, { "epoch": 1.3440486533449174, "grad_norm": 0.1301536298141118, "learning_rate": 3.741243591634159e-06, "loss": 0.7435, "step": 2709 }, { "epoch": 1.344545116048157, "grad_norm": 0.13359769838918073, "learning_rate": 3.7403951200012645e-06, "loss": 0.7271, "step": 2710 }, { "epoch": 1.3450415787513963, "grad_norm": 0.1361918708494249, "learning_rate": 3.7395464587938652e-06, "loss": 0.7421, "step": 2711 }, { "epoch": 1.3455380414546356, "grad_norm": 0.13334017361026135, "learning_rate": 3.738697608141664e-06, "loss": 0.6886, "step": 2712 }, { "epoch": 1.3460345041578752, "grad_norm": 0.1387171887689253, "learning_rate": 3.7378485681743935e-06, "loss": 0.6873, "step": 2713 }, { "epoch": 1.3465309668611145, "grad_norm": 0.14983174029057572, "learning_rate": 3.7369993390218172e-06, "loss": 0.7513, "step": 2714 }, { "epoch": 1.347027429564354, "grad_norm": 0.1424875756746005, "learning_rate": 3.736149920813726e-06, "loss": 0.6993, "step": 2715 }, { "epoch": 1.3475238922675934, "grad_norm": 0.13814327332815962, "learning_rate": 3.7353003136799394e-06, "loss": 0.7383, "step": 2716 }, { "epoch": 1.3480203549708327, "grad_norm": 0.13291744742349632, "learning_rate": 3.7344505177503064e-06, "loss": 0.6902, "step": 2717 }, { "epoch": 1.3485168176740723, "grad_norm": 0.1294966956204128, "learning_rate": 3.733600533154705e-06, "loss": 0.6753, "step": 2718 }, { "epoch": 1.3490132803773116, "grad_norm": 0.13384401858376338, "learning_rate": 3.732750360023041e-06, "loss": 0.7194, "step": 2719 }, { "epoch": 1.3495097430805512, "grad_norm": 0.1510726665603985, "learning_rate": 3.73189999848525e-06, "loss": 0.7286, "step": 2720 }, { "epoch": 1.3500062057837905, "grad_norm": 0.17389508520864652, "learning_rate": 3.7310494486712966e-06, "loss": 0.7334, "step": 2721 }, { "epoch": 1.3505026684870298, "grad_norm": 0.14457583333245747, "learning_rate": 3.730198710711173e-06, "loss": 0.7759, "step": 2722 }, { "epoch": 1.3509991311902694, "grad_norm": 0.1304606464207145, "learning_rate": 3.729347784734901e-06, "loss": 0.7004, "step": 2723 }, { "epoch": 1.3514955938935087, "grad_norm": 0.13664237040256322, "learning_rate": 3.7284966708725316e-06, "loss": 0.7148, "step": 2724 }, { "epoch": 1.3519920565967483, "grad_norm": 0.1319353306336256, "learning_rate": 3.727645369254144e-06, "loss": 0.7099, "step": 2725 }, { "epoch": 1.3524885192999876, "grad_norm": 0.13747258866291462, "learning_rate": 3.7267938800098454e-06, "loss": 0.7371, "step": 2726 }, { "epoch": 1.352984982003227, "grad_norm": 0.13461034808749692, "learning_rate": 3.7259422032697724e-06, "loss": 0.7092, "step": 2727 }, { "epoch": 1.3534814447064665, "grad_norm": 0.13212884182845225, "learning_rate": 3.7250903391640893e-06, "loss": 0.69, "step": 2728 }, { "epoch": 1.3539779074097058, "grad_norm": 0.14143560310569692, "learning_rate": 3.724238287822991e-06, "loss": 0.7601, "step": 2729 }, { "epoch": 1.3544743701129454, "grad_norm": 0.13608277738602498, "learning_rate": 3.723386049376699e-06, "loss": 0.7386, "step": 2730 }, { "epoch": 1.3549708328161847, "grad_norm": 0.13308627959339595, "learning_rate": 3.7225336239554655e-06, "loss": 0.6834, "step": 2731 }, { "epoch": 1.355467295519424, "grad_norm": 0.13859104882727302, "learning_rate": 3.72168101168957e-06, "loss": 0.7224, "step": 2732 }, { "epoch": 1.3559637582226636, "grad_norm": 0.13567729073653412, "learning_rate": 3.7208282127093197e-06, "loss": 0.6887, "step": 2733 }, { "epoch": 1.356460220925903, "grad_norm": 0.13365628178016073, "learning_rate": 3.7199752271450514e-06, "loss": 0.7077, "step": 2734 }, { "epoch": 1.3569566836291425, "grad_norm": 0.13570402215298025, "learning_rate": 3.71912205512713e-06, "loss": 0.709, "step": 2735 }, { "epoch": 1.3574531463323818, "grad_norm": 0.13867332477147382, "learning_rate": 3.71826869678595e-06, "loss": 0.7279, "step": 2736 }, { "epoch": 1.3579496090356211, "grad_norm": 0.14117778740626497, "learning_rate": 3.717415152251933e-06, "loss": 0.7077, "step": 2737 }, { "epoch": 1.3584460717388607, "grad_norm": 0.13314975615492863, "learning_rate": 3.71656142165553e-06, "loss": 0.7517, "step": 2738 }, { "epoch": 1.3589425344421, "grad_norm": 0.13579612596374233, "learning_rate": 3.7157075051272196e-06, "loss": 0.7127, "step": 2739 }, { "epoch": 1.3594389971453396, "grad_norm": 0.13878702089854905, "learning_rate": 3.71485340279751e-06, "loss": 0.7418, "step": 2740 }, { "epoch": 1.3599354598485789, "grad_norm": 0.14239282194169628, "learning_rate": 3.7139991147969363e-06, "loss": 0.7374, "step": 2741 }, { "epoch": 1.3604319225518182, "grad_norm": 0.14056420662523464, "learning_rate": 3.7131446412560624e-06, "loss": 0.7458, "step": 2742 }, { "epoch": 1.3609283852550578, "grad_norm": 0.13228428795282451, "learning_rate": 3.7122899823054815e-06, "loss": 0.7236, "step": 2743 }, { "epoch": 1.361424847958297, "grad_norm": 0.13281946270383405, "learning_rate": 3.7114351380758145e-06, "loss": 0.6975, "step": 2744 }, { "epoch": 1.3619213106615367, "grad_norm": 0.13670672603836637, "learning_rate": 3.71058010869771e-06, "loss": 0.7332, "step": 2745 }, { "epoch": 1.362417773364776, "grad_norm": 0.14071995024167427, "learning_rate": 3.7097248943018467e-06, "loss": 0.7082, "step": 2746 }, { "epoch": 1.3629142360680153, "grad_norm": 0.13414480406046128, "learning_rate": 3.7088694950189297e-06, "loss": 0.7126, "step": 2747 }, { "epoch": 1.3634106987712549, "grad_norm": 0.14516674208748181, "learning_rate": 3.7080139109796933e-06, "loss": 0.7322, "step": 2748 }, { "epoch": 1.3639071614744942, "grad_norm": 0.13482497758832893, "learning_rate": 3.7071581423148996e-06, "loss": 0.7323, "step": 2749 }, { "epoch": 1.3644036241777338, "grad_norm": 0.13236613491037902, "learning_rate": 3.7063021891553384e-06, "loss": 0.7301, "step": 2750 }, { "epoch": 1.364900086880973, "grad_norm": 0.13863145388334375, "learning_rate": 3.7054460516318302e-06, "loss": 0.6891, "step": 2751 }, { "epoch": 1.3653965495842124, "grad_norm": 0.1305802200578501, "learning_rate": 3.7045897298752196e-06, "loss": 0.754, "step": 2752 }, { "epoch": 1.365893012287452, "grad_norm": 0.1320898504801369, "learning_rate": 3.703733224016384e-06, "loss": 0.7232, "step": 2753 }, { "epoch": 1.3663894749906913, "grad_norm": 0.1382071992088785, "learning_rate": 3.7028765341862256e-06, "loss": 0.6633, "step": 2754 }, { "epoch": 1.3668859376939309, "grad_norm": 0.13239411486453712, "learning_rate": 3.702019660515675e-06, "loss": 0.6814, "step": 2755 }, { "epoch": 1.3673824003971702, "grad_norm": 0.13222359579600135, "learning_rate": 3.7011626031356924e-06, "loss": 0.7755, "step": 2756 }, { "epoch": 1.3678788631004095, "grad_norm": 0.15096634032752865, "learning_rate": 3.7003053621772655e-06, "loss": 0.7288, "step": 2757 }, { "epoch": 1.368375325803649, "grad_norm": 0.13367062633330973, "learning_rate": 3.699447937771409e-06, "loss": 0.7281, "step": 2758 }, { "epoch": 1.3688717885068884, "grad_norm": 0.13930251553734874, "learning_rate": 3.698590330049167e-06, "loss": 0.7536, "step": 2759 }, { "epoch": 1.369368251210128, "grad_norm": 0.1333947524333695, "learning_rate": 3.697732539141611e-06, "loss": 0.7267, "step": 2760 }, { "epoch": 1.3698647139133673, "grad_norm": 0.13373105854598036, "learning_rate": 3.6968745651798404e-06, "loss": 0.7416, "step": 2761 }, { "epoch": 1.3703611766166066, "grad_norm": 0.15126475875952416, "learning_rate": 3.6960164082949827e-06, "loss": 0.755, "step": 2762 }, { "epoch": 1.3708576393198462, "grad_norm": 0.32186904582209985, "learning_rate": 3.6951580686181944e-06, "loss": 0.7316, "step": 2763 }, { "epoch": 1.3713541020230855, "grad_norm": 0.1458070721124448, "learning_rate": 3.6942995462806574e-06, "loss": 0.7678, "step": 2764 }, { "epoch": 1.371850564726325, "grad_norm": 0.13289186710198236, "learning_rate": 3.693440841413585e-06, "loss": 0.7255, "step": 2765 }, { "epoch": 1.3723470274295644, "grad_norm": 0.14607683472265695, "learning_rate": 3.6925819541482142e-06, "loss": 0.7241, "step": 2766 }, { "epoch": 1.3728434901328037, "grad_norm": 0.13565639692027404, "learning_rate": 3.691722884615814e-06, "loss": 0.7772, "step": 2767 }, { "epoch": 1.3733399528360433, "grad_norm": 0.1395165105118244, "learning_rate": 3.690863632947678e-06, "loss": 0.7336, "step": 2768 }, { "epoch": 1.3738364155392826, "grad_norm": 0.13691536096706877, "learning_rate": 3.69000419927513e-06, "loss": 0.7331, "step": 2769 }, { "epoch": 1.3743328782425221, "grad_norm": 0.13120428601535297, "learning_rate": 3.6891445837295215e-06, "loss": 0.7361, "step": 2770 }, { "epoch": 1.3748293409457615, "grad_norm": 0.13462822014791612, "learning_rate": 3.6882847864422287e-06, "loss": 0.7201, "step": 2771 }, { "epoch": 1.3753258036490008, "grad_norm": 0.15260067191313678, "learning_rate": 3.687424807544659e-06, "loss": 0.7355, "step": 2772 }, { "epoch": 1.3758222663522404, "grad_norm": 0.1356268994256984, "learning_rate": 3.686564647168247e-06, "loss": 0.6913, "step": 2773 }, { "epoch": 1.3763187290554797, "grad_norm": 0.1317137965843151, "learning_rate": 3.6857043054444534e-06, "loss": 0.6801, "step": 2774 }, { "epoch": 1.3768151917587192, "grad_norm": 0.13155836504147503, "learning_rate": 3.6848437825047678e-06, "loss": 0.701, "step": 2775 }, { "epoch": 1.3773116544619586, "grad_norm": 0.1271126456023588, "learning_rate": 3.6839830784807086e-06, "loss": 0.6839, "step": 2776 }, { "epoch": 1.377808117165198, "grad_norm": 0.14089310593605184, "learning_rate": 3.6831221935038185e-06, "loss": 0.7237, "step": 2777 }, { "epoch": 1.3783045798684375, "grad_norm": 0.13973014329965902, "learning_rate": 3.682261127705671e-06, "loss": 0.6941, "step": 2778 }, { "epoch": 1.3788010425716768, "grad_norm": 0.12910171833031905, "learning_rate": 3.6813998812178665e-06, "loss": 0.6799, "step": 2779 }, { "epoch": 1.3792975052749163, "grad_norm": 0.13356773763189392, "learning_rate": 3.680538454172033e-06, "loss": 0.6942, "step": 2780 }, { "epoch": 1.3797939679781557, "grad_norm": 0.14461559430138288, "learning_rate": 3.6796768466998256e-06, "loss": 0.728, "step": 2781 }, { "epoch": 1.380290430681395, "grad_norm": 0.14044495895998144, "learning_rate": 3.678815058932926e-06, "loss": 0.7567, "step": 2782 }, { "epoch": 1.3807868933846346, "grad_norm": 0.13731082360762506, "learning_rate": 3.6779530910030455e-06, "loss": 0.715, "step": 2783 }, { "epoch": 1.381283356087874, "grad_norm": 0.1445938769341219, "learning_rate": 3.6770909430419216e-06, "loss": 0.7439, "step": 2784 }, { "epoch": 1.3817798187911134, "grad_norm": 0.13607123879339897, "learning_rate": 3.6762286151813207e-06, "loss": 0.7089, "step": 2785 }, { "epoch": 1.3822762814943528, "grad_norm": 0.1425234259681637, "learning_rate": 3.6753661075530363e-06, "loss": 0.7555, "step": 2786 }, { "epoch": 1.382772744197592, "grad_norm": 0.13691972240341396, "learning_rate": 3.6745034202888868e-06, "loss": 0.6762, "step": 2787 }, { "epoch": 1.3832692069008314, "grad_norm": 0.1359578557968317, "learning_rate": 3.6736405535207215e-06, "loss": 0.6988, "step": 2788 }, { "epoch": 1.383765669604071, "grad_norm": 0.14251525701942244, "learning_rate": 3.672777507380416e-06, "loss": 0.8063, "step": 2789 }, { "epoch": 1.3842621323073105, "grad_norm": 0.1361191613459699, "learning_rate": 3.671914281999872e-06, "loss": 0.7197, "step": 2790 }, { "epoch": 1.3847585950105499, "grad_norm": 0.13112963174458994, "learning_rate": 3.6710508775110204e-06, "loss": 0.7006, "step": 2791 }, { "epoch": 1.3852550577137892, "grad_norm": 0.13997501686830185, "learning_rate": 3.670187294045819e-06, "loss": 0.7215, "step": 2792 }, { "epoch": 1.3857515204170285, "grad_norm": 0.13735338515895265, "learning_rate": 3.6693235317362513e-06, "loss": 0.6828, "step": 2793 }, { "epoch": 1.386247983120268, "grad_norm": 0.13804648308059947, "learning_rate": 3.6684595907143307e-06, "loss": 0.6705, "step": 2794 }, { "epoch": 1.3867444458235076, "grad_norm": 0.14859762749120925, "learning_rate": 3.6675954711120964e-06, "loss": 0.816, "step": 2795 }, { "epoch": 1.387240908526747, "grad_norm": 0.13538015684912, "learning_rate": 3.666731173061616e-06, "loss": 0.7397, "step": 2796 }, { "epoch": 1.3877373712299863, "grad_norm": 0.1310415765785277, "learning_rate": 3.6658666966949823e-06, "loss": 0.6973, "step": 2797 }, { "epoch": 1.3882338339332256, "grad_norm": 0.13540679229302185, "learning_rate": 3.665002042144318e-06, "loss": 0.7459, "step": 2798 }, { "epoch": 1.3887302966364652, "grad_norm": 0.14371652911821897, "learning_rate": 3.6641372095417703e-06, "loss": 0.7883, "step": 2799 }, { "epoch": 1.3892267593397047, "grad_norm": 0.13703857411739195, "learning_rate": 3.663272199019516e-06, "loss": 0.6883, "step": 2800 }, { "epoch": 1.389723222042944, "grad_norm": 0.13173661216385477, "learning_rate": 3.662407010709757e-06, "loss": 0.6969, "step": 2801 }, { "epoch": 1.3902196847461834, "grad_norm": 0.12981624688845955, "learning_rate": 3.661541644744725e-06, "loss": 0.6822, "step": 2802 }, { "epoch": 1.3907161474494227, "grad_norm": 0.13538269173616568, "learning_rate": 3.660676101256676e-06, "loss": 0.6703, "step": 2803 }, { "epoch": 1.3912126101526623, "grad_norm": 0.13513544679451667, "learning_rate": 3.659810380377895e-06, "loss": 0.7126, "step": 2804 }, { "epoch": 1.3917090728559016, "grad_norm": 0.13634117970671397, "learning_rate": 3.6589444822406938e-06, "loss": 0.7628, "step": 2805 }, { "epoch": 1.3922055355591412, "grad_norm": 0.13469088579151892, "learning_rate": 3.6580784069774104e-06, "loss": 0.6827, "step": 2806 }, { "epoch": 1.3927019982623805, "grad_norm": 0.14097471598925498, "learning_rate": 3.657212154720411e-06, "loss": 0.762, "step": 2807 }, { "epoch": 1.3931984609656198, "grad_norm": 0.13688984690687683, "learning_rate": 3.656345725602089e-06, "loss": 0.723, "step": 2808 }, { "epoch": 1.3936949236688594, "grad_norm": 0.13703297547170337, "learning_rate": 3.6554791197548624e-06, "loss": 0.7123, "step": 2809 }, { "epoch": 1.3941913863720987, "grad_norm": 0.13555651922489106, "learning_rate": 3.654612337311179e-06, "loss": 0.7277, "step": 2810 }, { "epoch": 1.3946878490753383, "grad_norm": 0.13430071149047185, "learning_rate": 3.6537453784035133e-06, "loss": 0.7369, "step": 2811 }, { "epoch": 1.3951843117785776, "grad_norm": 0.1342596803538824, "learning_rate": 3.6528782431643652e-06, "loss": 0.7032, "step": 2812 }, { "epoch": 1.395680774481817, "grad_norm": 0.14006820605549658, "learning_rate": 3.6520109317262624e-06, "loss": 0.711, "step": 2813 }, { "epoch": 1.3961772371850565, "grad_norm": 0.13345080058461264, "learning_rate": 3.65114344422176e-06, "loss": 0.7336, "step": 2814 }, { "epoch": 1.3966736998882958, "grad_norm": 0.13603896308716434, "learning_rate": 3.6502757807834392e-06, "loss": 0.7395, "step": 2815 }, { "epoch": 1.3971701625915354, "grad_norm": 0.15103712063012376, "learning_rate": 3.6494079415439087e-06, "loss": 0.7604, "step": 2816 }, { "epoch": 1.3976666252947747, "grad_norm": 0.14018161536599755, "learning_rate": 3.6485399266358033e-06, "loss": 0.748, "step": 2817 }, { "epoch": 1.398163087998014, "grad_norm": 0.13994970770354836, "learning_rate": 3.6476717361917867e-06, "loss": 0.7203, "step": 2818 }, { "epoch": 1.3986595507012536, "grad_norm": 0.1316527442596197, "learning_rate": 3.6468033703445456e-06, "loss": 0.7076, "step": 2819 }, { "epoch": 1.399156013404493, "grad_norm": 0.1369640741315148, "learning_rate": 3.645934829226797e-06, "loss": 0.7325, "step": 2820 }, { "epoch": 1.3996524761077325, "grad_norm": 0.12996187838882917, "learning_rate": 3.6450661129712837e-06, "loss": 0.7624, "step": 2821 }, { "epoch": 1.4001489388109718, "grad_norm": 0.1481187395661545, "learning_rate": 3.644197221710775e-06, "loss": 0.7542, "step": 2822 }, { "epoch": 1.4006454015142111, "grad_norm": 0.13864790575291003, "learning_rate": 3.6433281555780666e-06, "loss": 0.7592, "step": 2823 }, { "epoch": 1.4011418642174507, "grad_norm": 0.13723959229827493, "learning_rate": 3.6424589147059817e-06, "loss": 0.7052, "step": 2824 }, { "epoch": 1.40163832692069, "grad_norm": 0.14601293853858663, "learning_rate": 3.641589499227369e-06, "loss": 0.7408, "step": 2825 }, { "epoch": 1.4021347896239296, "grad_norm": 0.1381724745946492, "learning_rate": 3.6407199092751055e-06, "loss": 0.7931, "step": 2826 }, { "epoch": 1.402631252327169, "grad_norm": 0.14149071737598315, "learning_rate": 3.6398501449820937e-06, "loss": 0.6858, "step": 2827 }, { "epoch": 1.4031277150304082, "grad_norm": 0.13297864017352057, "learning_rate": 3.638980206481264e-06, "loss": 0.7066, "step": 2828 }, { "epoch": 1.4036241777336478, "grad_norm": 0.13269406137410125, "learning_rate": 3.638110093905572e-06, "loss": 0.7504, "step": 2829 }, { "epoch": 1.4041206404368871, "grad_norm": 0.13910245211978362, "learning_rate": 3.6372398073880006e-06, "loss": 0.7451, "step": 2830 }, { "epoch": 1.4046171031401267, "grad_norm": 0.13944456861859922, "learning_rate": 3.636369347061558e-06, "loss": 0.7568, "step": 2831 }, { "epoch": 1.405113565843366, "grad_norm": 0.13566144602099214, "learning_rate": 3.6354987130592814e-06, "loss": 0.7047, "step": 2832 }, { "epoch": 1.4056100285466053, "grad_norm": 0.13220561100847655, "learning_rate": 3.634627905514232e-06, "loss": 0.7132, "step": 2833 }, { "epoch": 1.4061064912498449, "grad_norm": 0.13332119323744976, "learning_rate": 3.6337569245595007e-06, "loss": 0.6862, "step": 2834 }, { "epoch": 1.4066029539530842, "grad_norm": 0.14000698566512068, "learning_rate": 3.632885770328202e-06, "loss": 0.7582, "step": 2835 }, { "epoch": 1.4070994166563238, "grad_norm": 0.1366951831127539, "learning_rate": 3.6320144429534764e-06, "loss": 0.7625, "step": 2836 }, { "epoch": 1.407595879359563, "grad_norm": 0.13797275616542223, "learning_rate": 3.631142942568495e-06, "loss": 0.715, "step": 2837 }, { "epoch": 1.4080923420628024, "grad_norm": 0.13373290920899406, "learning_rate": 3.630271269306451e-06, "loss": 0.6955, "step": 2838 }, { "epoch": 1.408588804766042, "grad_norm": 0.13580255325284915, "learning_rate": 3.629399423300566e-06, "loss": 0.7029, "step": 2839 }, { "epoch": 1.4090852674692813, "grad_norm": 0.13617036873069288, "learning_rate": 3.628527404684088e-06, "loss": 0.7225, "step": 2840 }, { "epoch": 1.4095817301725209, "grad_norm": 0.1303825802583835, "learning_rate": 3.6276552135902897e-06, "loss": 0.7193, "step": 2841 }, { "epoch": 1.4100781928757602, "grad_norm": 0.13392854436162474, "learning_rate": 3.626782850152473e-06, "loss": 0.6943, "step": 2842 }, { "epoch": 1.4105746555789995, "grad_norm": 0.13806544562439935, "learning_rate": 3.625910314503965e-06, "loss": 0.7319, "step": 2843 }, { "epoch": 1.411071118282239, "grad_norm": 0.14112019047589364, "learning_rate": 3.625037606778117e-06, "loss": 0.7376, "step": 2844 }, { "epoch": 1.4115675809854784, "grad_norm": 0.1293965423295043, "learning_rate": 3.62416472710831e-06, "loss": 0.6752, "step": 2845 }, { "epoch": 1.412064043688718, "grad_norm": 0.13458946982919687, "learning_rate": 3.6232916756279497e-06, "loss": 0.7362, "step": 2846 }, { "epoch": 1.4125605063919573, "grad_norm": 0.13524959671009265, "learning_rate": 3.6224184524704665e-06, "loss": 0.7239, "step": 2847 }, { "epoch": 1.4130569690951966, "grad_norm": 0.13614099418088066, "learning_rate": 3.6215450577693196e-06, "loss": 0.6924, "step": 2848 }, { "epoch": 1.4135534317984362, "grad_norm": 0.1392998440162914, "learning_rate": 3.6206714916579925e-06, "loss": 0.7753, "step": 2849 }, { "epoch": 1.4140498945016755, "grad_norm": 0.1391830902646412, "learning_rate": 3.6197977542699974e-06, "loss": 0.7125, "step": 2850 }, { "epoch": 1.414546357204915, "grad_norm": 0.13496515125842937, "learning_rate": 3.6189238457388704e-06, "loss": 0.7242, "step": 2851 }, { "epoch": 1.4150428199081544, "grad_norm": 0.13540843902758287, "learning_rate": 3.6180497661981733e-06, "loss": 0.7189, "step": 2852 }, { "epoch": 1.4155392826113937, "grad_norm": 0.1387877546863783, "learning_rate": 3.617175515781497e-06, "loss": 0.7011, "step": 2853 }, { "epoch": 1.4160357453146333, "grad_norm": 0.132229382194975, "learning_rate": 3.6163010946224552e-06, "loss": 0.7071, "step": 2854 }, { "epoch": 1.4165322080178726, "grad_norm": 0.13602711318145774, "learning_rate": 3.615426502854689e-06, "loss": 0.6891, "step": 2855 }, { "epoch": 1.4170286707211122, "grad_norm": 0.13600373841927585, "learning_rate": 3.6145517406118673e-06, "loss": 0.7299, "step": 2856 }, { "epoch": 1.4175251334243515, "grad_norm": 0.1324714432015326, "learning_rate": 3.613676808027682e-06, "loss": 0.715, "step": 2857 }, { "epoch": 1.4180215961275908, "grad_norm": 0.132452376428356, "learning_rate": 3.6128017052358535e-06, "loss": 0.7385, "step": 2858 }, { "epoch": 1.4185180588308304, "grad_norm": 0.13456443333348478, "learning_rate": 3.6119264323701257e-06, "loss": 0.7005, "step": 2859 }, { "epoch": 1.4190145215340697, "grad_norm": 0.13156032063720458, "learning_rate": 3.611050989564272e-06, "loss": 0.7155, "step": 2860 }, { "epoch": 1.4195109842373093, "grad_norm": 0.1342914875531962, "learning_rate": 3.6101753769520885e-06, "loss": 0.7072, "step": 2861 }, { "epoch": 1.4200074469405486, "grad_norm": 0.13305702646687323, "learning_rate": 3.6092995946673996e-06, "loss": 0.7399, "step": 2862 }, { "epoch": 1.420503909643788, "grad_norm": 0.1429132617790171, "learning_rate": 3.608423642844053e-06, "loss": 0.7405, "step": 2863 }, { "epoch": 1.4210003723470275, "grad_norm": 0.14321697351882565, "learning_rate": 3.607547521615926e-06, "loss": 0.7462, "step": 2864 }, { "epoch": 1.4214968350502668, "grad_norm": 0.13562736080521035, "learning_rate": 3.6066712311169173e-06, "loss": 0.7409, "step": 2865 }, { "epoch": 1.4219932977535064, "grad_norm": 0.1411773380972186, "learning_rate": 3.6057947714809555e-06, "loss": 0.7401, "step": 2866 }, { "epoch": 1.4224897604567457, "grad_norm": 0.1339410402597349, "learning_rate": 3.6049181428419935e-06, "loss": 0.7163, "step": 2867 }, { "epoch": 1.422986223159985, "grad_norm": 0.13416362073643048, "learning_rate": 3.6040413453340085e-06, "loss": 0.6948, "step": 2868 }, { "epoch": 1.4234826858632246, "grad_norm": 0.13676566176317306, "learning_rate": 3.6031643790910066e-06, "loss": 0.6875, "step": 2869 }, { "epoch": 1.423979148566464, "grad_norm": 0.1313168534405185, "learning_rate": 3.602287244247017e-06, "loss": 0.6881, "step": 2870 }, { "epoch": 1.4244756112697035, "grad_norm": 0.13677303706245855, "learning_rate": 3.6014099409360955e-06, "loss": 0.7393, "step": 2871 }, { "epoch": 1.4249720739729428, "grad_norm": 0.14114326056602117, "learning_rate": 3.6005324692923242e-06, "loss": 0.6949, "step": 2872 }, { "epoch": 1.4254685366761821, "grad_norm": 0.13688796725400607, "learning_rate": 3.5996548294498113e-06, "loss": 0.7323, "step": 2873 }, { "epoch": 1.4259649993794217, "grad_norm": 0.13491954912768386, "learning_rate": 3.598777021542689e-06, "loss": 0.7144, "step": 2874 }, { "epoch": 1.426461462082661, "grad_norm": 0.13401282593444483, "learning_rate": 3.5978990457051165e-06, "loss": 0.6912, "step": 2875 }, { "epoch": 1.4269579247859006, "grad_norm": 0.1402341000199709, "learning_rate": 3.597020902071278e-06, "loss": 0.7434, "step": 2876 }, { "epoch": 1.4274543874891399, "grad_norm": 0.14610852875970756, "learning_rate": 3.596142590775385e-06, "loss": 0.7442, "step": 2877 }, { "epoch": 1.4279508501923792, "grad_norm": 0.1395377838118118, "learning_rate": 3.5952641119516725e-06, "loss": 0.7266, "step": 2878 }, { "epoch": 1.4284473128956188, "grad_norm": 0.13617309441688394, "learning_rate": 3.594385465734401e-06, "loss": 0.7328, "step": 2879 }, { "epoch": 1.428943775598858, "grad_norm": 0.14331534212615055, "learning_rate": 3.5935066522578576e-06, "loss": 0.7336, "step": 2880 }, { "epoch": 1.4294402383020977, "grad_norm": 0.13574693488844447, "learning_rate": 3.592627671656356e-06, "loss": 0.6804, "step": 2881 }, { "epoch": 1.429936701005337, "grad_norm": 0.14742713291670548, "learning_rate": 3.5917485240642336e-06, "loss": 0.7212, "step": 2882 }, { "epoch": 1.4304331637085763, "grad_norm": 0.14811510498423192, "learning_rate": 3.590869209615854e-06, "loss": 0.7231, "step": 2883 }, { "epoch": 1.4309296264118159, "grad_norm": 0.13396810414200716, "learning_rate": 3.589989728445607e-06, "loss": 0.7543, "step": 2884 }, { "epoch": 1.4314260891150552, "grad_norm": 0.13821113307135227, "learning_rate": 3.589110080687907e-06, "loss": 0.7313, "step": 2885 }, { "epoch": 1.4319225518182948, "grad_norm": 0.1403140258389397, "learning_rate": 3.588230266477193e-06, "loss": 0.7028, "step": 2886 }, { "epoch": 1.432419014521534, "grad_norm": 0.13675009572376795, "learning_rate": 3.5873502859479316e-06, "loss": 0.7236, "step": 2887 }, { "epoch": 1.4329154772247734, "grad_norm": 0.129493586039917, "learning_rate": 3.5864701392346125e-06, "loss": 0.7098, "step": 2888 }, { "epoch": 1.433411939928013, "grad_norm": 0.13454419198703996, "learning_rate": 3.5855898264717535e-06, "loss": 0.6686, "step": 2889 }, { "epoch": 1.4339084026312523, "grad_norm": 0.1347116060515282, "learning_rate": 3.5847093477938955e-06, "loss": 0.7205, "step": 2890 }, { "epoch": 1.4344048653344919, "grad_norm": 0.13371886505328962, "learning_rate": 3.583828703335606e-06, "loss": 0.7308, "step": 2891 }, { "epoch": 1.4349013280377312, "grad_norm": 0.1312328405818565, "learning_rate": 3.5829478932314763e-06, "loss": 0.7005, "step": 2892 }, { "epoch": 1.4353977907409705, "grad_norm": 0.13178342567206527, "learning_rate": 3.582066917616126e-06, "loss": 0.7287, "step": 2893 }, { "epoch": 1.43589425344421, "grad_norm": 0.13275451628062787, "learning_rate": 3.5811857766241966e-06, "loss": 0.6984, "step": 2894 }, { "epoch": 1.4363907161474494, "grad_norm": 0.1341420790665092, "learning_rate": 3.5803044703903566e-06, "loss": 0.6854, "step": 2895 }, { "epoch": 1.436887178850689, "grad_norm": 0.13503560969815714, "learning_rate": 3.579422999049299e-06, "loss": 0.7089, "step": 2896 }, { "epoch": 1.4373836415539283, "grad_norm": 0.14368925529103932, "learning_rate": 3.578541362735744e-06, "loss": 0.7332, "step": 2897 }, { "epoch": 1.4378801042571676, "grad_norm": 0.13993715166267706, "learning_rate": 3.5776595615844343e-06, "loss": 0.7407, "step": 2898 }, { "epoch": 1.4383765669604072, "grad_norm": 0.13262872626301542, "learning_rate": 3.5767775957301402e-06, "loss": 0.7408, "step": 2899 }, { "epoch": 1.4388730296636465, "grad_norm": 0.12963084057116644, "learning_rate": 3.575895465307655e-06, "loss": 0.7321, "step": 2900 }, { "epoch": 1.439369492366886, "grad_norm": 0.13223114216083978, "learning_rate": 3.5750131704517987e-06, "loss": 0.7259, "step": 2901 }, { "epoch": 1.4398659550701254, "grad_norm": 0.13545769906603244, "learning_rate": 3.574130711297416e-06, "loss": 0.6985, "step": 2902 }, { "epoch": 1.4403624177733647, "grad_norm": 0.14269902147683536, "learning_rate": 3.5732480879793763e-06, "loss": 0.7081, "step": 2903 }, { "epoch": 1.4408588804766043, "grad_norm": 0.12939717891557706, "learning_rate": 3.572365300632574e-06, "loss": 0.7087, "step": 2904 }, { "epoch": 1.4413553431798436, "grad_norm": 0.13198711044968622, "learning_rate": 3.5714823493919305e-06, "loss": 0.6999, "step": 2905 }, { "epoch": 1.4418518058830831, "grad_norm": 0.14765136086907135, "learning_rate": 3.570599234392389e-06, "loss": 0.7115, "step": 2906 }, { "epoch": 1.4423482685863225, "grad_norm": 0.13571957777801233, "learning_rate": 3.569715955768921e-06, "loss": 0.6962, "step": 2907 }, { "epoch": 1.4428447312895618, "grad_norm": 0.13574730889325312, "learning_rate": 3.56883251365652e-06, "loss": 0.7506, "step": 2908 }, { "epoch": 1.4433411939928014, "grad_norm": 0.13592878384629192, "learning_rate": 3.5679489081902073e-06, "loss": 0.7426, "step": 2909 }, { "epoch": 1.4438376566960407, "grad_norm": 0.12847140272775823, "learning_rate": 3.5670651395050273e-06, "loss": 0.6982, "step": 2910 }, { "epoch": 1.4443341193992802, "grad_norm": 0.13102794138380155, "learning_rate": 3.5661812077360496e-06, "loss": 0.7805, "step": 2911 }, { "epoch": 1.4448305821025196, "grad_norm": 0.13544839042143206, "learning_rate": 3.5652971130183696e-06, "loss": 0.7568, "step": 2912 }, { "epoch": 1.445327044805759, "grad_norm": 0.12813227644476155, "learning_rate": 3.564412855487106e-06, "loss": 0.7082, "step": 2913 }, { "epoch": 1.4458235075089985, "grad_norm": 0.14071968051452866, "learning_rate": 3.5635284352774035e-06, "loss": 0.789, "step": 2914 }, { "epoch": 1.4463199702122378, "grad_norm": 0.13520452750443218, "learning_rate": 3.5626438525244335e-06, "loss": 0.7401, "step": 2915 }, { "epoch": 1.4468164329154773, "grad_norm": 0.142978764949799, "learning_rate": 3.5617591073633877e-06, "loss": 0.7518, "step": 2916 }, { "epoch": 1.4473128956187167, "grad_norm": 0.13847892621534574, "learning_rate": 3.560874199929487e-06, "loss": 0.7487, "step": 2917 }, { "epoch": 1.447809358321956, "grad_norm": 0.13593385078802972, "learning_rate": 3.5599891303579747e-06, "loss": 0.6684, "step": 2918 }, { "epoch": 1.4483058210251956, "grad_norm": 0.13672639592332414, "learning_rate": 3.559103898784119e-06, "loss": 0.7517, "step": 2919 }, { "epoch": 1.448802283728435, "grad_norm": 0.13336311824180638, "learning_rate": 3.5582185053432137e-06, "loss": 0.7068, "step": 2920 }, { "epoch": 1.4492987464316744, "grad_norm": 0.13281209912714664, "learning_rate": 3.5573329501705777e-06, "loss": 0.7424, "step": 2921 }, { "epoch": 1.4497952091349138, "grad_norm": 0.13043603194523082, "learning_rate": 3.556447233401553e-06, "loss": 0.7195, "step": 2922 }, { "epoch": 1.450291671838153, "grad_norm": 0.138625753926843, "learning_rate": 3.5555613551715072e-06, "loss": 0.7468, "step": 2923 }, { "epoch": 1.4507881345413927, "grad_norm": 0.14020220328471075, "learning_rate": 3.554675315615833e-06, "loss": 0.731, "step": 2924 }, { "epoch": 1.451284597244632, "grad_norm": 0.13627514864142962, "learning_rate": 3.5537891148699476e-06, "loss": 0.7456, "step": 2925 }, { "epoch": 1.4517810599478715, "grad_norm": 0.12926303570402706, "learning_rate": 3.552902753069293e-06, "loss": 0.7147, "step": 2926 }, { "epoch": 1.4522775226511109, "grad_norm": 0.14055188911284178, "learning_rate": 3.552016230349334e-06, "loss": 0.7487, "step": 2927 }, { "epoch": 1.4527739853543502, "grad_norm": 0.14061422002541768, "learning_rate": 3.551129546845561e-06, "loss": 0.7029, "step": 2928 }, { "epoch": 1.4532704480575895, "grad_norm": 0.13754407040631086, "learning_rate": 3.550242702693491e-06, "loss": 0.7129, "step": 2929 }, { "epoch": 1.453766910760829, "grad_norm": 0.1326488941997002, "learning_rate": 3.549355698028663e-06, "loss": 0.7334, "step": 2930 }, { "epoch": 1.4542633734640686, "grad_norm": 0.14133250168355752, "learning_rate": 3.5484685329866424e-06, "loss": 0.7082, "step": 2931 }, { "epoch": 1.454759836167308, "grad_norm": 0.13403821729170973, "learning_rate": 3.547581207703017e-06, "loss": 0.7646, "step": 2932 }, { "epoch": 1.4552562988705473, "grad_norm": 0.13637800149605267, "learning_rate": 3.5466937223134007e-06, "loss": 0.7302, "step": 2933 }, { "epoch": 1.4557527615737866, "grad_norm": 0.13875702141124502, "learning_rate": 3.5458060769534317e-06, "loss": 0.6984, "step": 2934 }, { "epoch": 1.4562492242770262, "grad_norm": 0.13654925956282576, "learning_rate": 3.5449182717587717e-06, "loss": 0.7609, "step": 2935 }, { "epoch": 1.4567456869802657, "grad_norm": 0.13693125562691924, "learning_rate": 3.5440303068651077e-06, "loss": 0.7749, "step": 2936 }, { "epoch": 1.457242149683505, "grad_norm": 0.13851101958423012, "learning_rate": 3.5431421824081512e-06, "loss": 0.7404, "step": 2937 }, { "epoch": 1.4577386123867444, "grad_norm": 0.1439809260358722, "learning_rate": 3.542253898523638e-06, "loss": 0.7923, "step": 2938 }, { "epoch": 1.4582350750899837, "grad_norm": 0.13805419183906129, "learning_rate": 3.5413654553473274e-06, "loss": 0.7017, "step": 2939 }, { "epoch": 1.4587315377932233, "grad_norm": 0.13835157639154178, "learning_rate": 3.5404768530150035e-06, "loss": 0.7551, "step": 2940 }, { "epoch": 1.4592280004964628, "grad_norm": 0.13287184038076372, "learning_rate": 3.539588091662476e-06, "loss": 0.712, "step": 2941 }, { "epoch": 1.4597244631997022, "grad_norm": 0.1452095077228309, "learning_rate": 3.5386991714255775e-06, "loss": 0.7842, "step": 2942 }, { "epoch": 1.4602209259029415, "grad_norm": 0.1314235216478171, "learning_rate": 3.537810092440165e-06, "loss": 0.6679, "step": 2943 }, { "epoch": 1.4607173886061808, "grad_norm": 0.13906999072318538, "learning_rate": 3.536920854842119e-06, "loss": 0.7524, "step": 2944 }, { "epoch": 1.4612138513094204, "grad_norm": 0.13429900107538834, "learning_rate": 3.5360314587673463e-06, "loss": 0.7309, "step": 2945 }, { "epoch": 1.4617103140126597, "grad_norm": 0.141994678409815, "learning_rate": 3.5351419043517764e-06, "loss": 0.7338, "step": 2946 }, { "epoch": 1.4622067767158993, "grad_norm": 0.13636727486493982, "learning_rate": 3.5342521917313643e-06, "loss": 0.6874, "step": 2947 }, { "epoch": 1.4627032394191386, "grad_norm": 0.14177928006977158, "learning_rate": 3.5333623210420877e-06, "loss": 0.7831, "step": 2948 }, { "epoch": 1.463199702122378, "grad_norm": 0.13586560334697423, "learning_rate": 3.532472292419949e-06, "loss": 0.7399, "step": 2949 }, { "epoch": 1.4636961648256175, "grad_norm": 0.1395384541698917, "learning_rate": 3.531582106000975e-06, "loss": 0.7093, "step": 2950 }, { "epoch": 1.4641926275288568, "grad_norm": 0.1302253949626591, "learning_rate": 3.5306917619212157e-06, "loss": 0.7005, "step": 2951 }, { "epoch": 1.4646890902320964, "grad_norm": 0.13335922133133257, "learning_rate": 3.5298012603167463e-06, "loss": 0.7291, "step": 2952 }, { "epoch": 1.4651855529353357, "grad_norm": 0.1377977708057461, "learning_rate": 3.528910601323666e-06, "loss": 0.7324, "step": 2953 }, { "epoch": 1.465682015638575, "grad_norm": 0.13572427845757098, "learning_rate": 3.5280197850780986e-06, "loss": 0.723, "step": 2954 }, { "epoch": 1.4661784783418146, "grad_norm": 0.13536174132092055, "learning_rate": 3.527128811716189e-06, "loss": 0.7521, "step": 2955 }, { "epoch": 1.466674941045054, "grad_norm": 0.13617464941765656, "learning_rate": 3.5262376813741095e-06, "loss": 0.6934, "step": 2956 }, { "epoch": 1.4671714037482935, "grad_norm": 0.12812292562730554, "learning_rate": 3.525346394188055e-06, "loss": 0.7028, "step": 2957 }, { "epoch": 1.4676678664515328, "grad_norm": 0.13368438695645662, "learning_rate": 3.524454950294244e-06, "loss": 0.6785, "step": 2958 }, { "epoch": 1.4681643291547721, "grad_norm": 0.14014375660987655, "learning_rate": 3.523563349828921e-06, "loss": 0.7574, "step": 2959 }, { "epoch": 1.4686607918580117, "grad_norm": 0.13236384889227554, "learning_rate": 3.5226715929283507e-06, "loss": 0.7025, "step": 2960 }, { "epoch": 1.469157254561251, "grad_norm": 0.13657143100492303, "learning_rate": 3.521779679728824e-06, "loss": 0.7412, "step": 2961 }, { "epoch": 1.4696537172644906, "grad_norm": 0.14598345092076667, "learning_rate": 3.5208876103666566e-06, "loss": 0.7279, "step": 2962 }, { "epoch": 1.47015017996773, "grad_norm": 0.13362251541331926, "learning_rate": 3.519995384978187e-06, "loss": 0.7256, "step": 2963 }, { "epoch": 1.4706466426709692, "grad_norm": 0.1405944370949059, "learning_rate": 3.5191030036997774e-06, "loss": 0.7497, "step": 2964 }, { "epoch": 1.4711431053742088, "grad_norm": 0.13130473785305005, "learning_rate": 3.5182104666678136e-06, "loss": 0.7074, "step": 2965 }, { "epoch": 1.4716395680774481, "grad_norm": 0.1382341947573506, "learning_rate": 3.517317774018706e-06, "loss": 0.7273, "step": 2966 }, { "epoch": 1.4721360307806877, "grad_norm": 0.1398909288694717, "learning_rate": 3.516424925888887e-06, "loss": 0.7298, "step": 2967 }, { "epoch": 1.472632493483927, "grad_norm": 0.13083337585357746, "learning_rate": 3.515531922414816e-06, "loss": 0.6801, "step": 2968 }, { "epoch": 1.4731289561871663, "grad_norm": 0.13314911288815096, "learning_rate": 3.514638763732974e-06, "loss": 0.6965, "step": 2969 }, { "epoch": 1.4736254188904059, "grad_norm": 0.14032556414048164, "learning_rate": 3.5137454499798646e-06, "loss": 0.7974, "step": 2970 }, { "epoch": 1.4741218815936452, "grad_norm": 0.13813913057905594, "learning_rate": 3.5128519812920176e-06, "loss": 0.7583, "step": 2971 }, { "epoch": 1.4746183442968848, "grad_norm": 0.13702265237609512, "learning_rate": 3.5119583578059845e-06, "loss": 0.703, "step": 2972 }, { "epoch": 1.475114807000124, "grad_norm": 0.13982120405929221, "learning_rate": 3.5110645796583425e-06, "loss": 0.7253, "step": 2973 }, { "epoch": 1.4756112697033634, "grad_norm": 0.1364392260235214, "learning_rate": 3.5101706469856913e-06, "loss": 0.748, "step": 2974 }, { "epoch": 1.476107732406603, "grad_norm": 0.140027409645103, "learning_rate": 3.509276559924653e-06, "loss": 0.7313, "step": 2975 }, { "epoch": 1.4766041951098423, "grad_norm": 0.1320770359244831, "learning_rate": 3.5083823186118748e-06, "loss": 0.6947, "step": 2976 }, { "epoch": 1.4771006578130819, "grad_norm": 0.12822200062993674, "learning_rate": 3.5074879231840274e-06, "loss": 0.6715, "step": 2977 }, { "epoch": 1.4775971205163212, "grad_norm": 0.13286535524254892, "learning_rate": 3.5065933737778054e-06, "loss": 0.7157, "step": 2978 }, { "epoch": 1.4780935832195605, "grad_norm": 0.13858890088438325, "learning_rate": 3.505698670529925e-06, "loss": 0.7208, "step": 2979 }, { "epoch": 1.4785900459228, "grad_norm": 0.13027070434072718, "learning_rate": 3.5048038135771285e-06, "loss": 0.7217, "step": 2980 }, { "epoch": 1.4790865086260394, "grad_norm": 0.1413449153673528, "learning_rate": 3.5039088030561798e-06, "loss": 0.7565, "step": 2981 }, { "epoch": 1.479582971329279, "grad_norm": 0.1400056680073501, "learning_rate": 3.503013639103867e-06, "loss": 0.7495, "step": 2982 }, { "epoch": 1.4800794340325183, "grad_norm": 0.13165330464949065, "learning_rate": 3.502118321857001e-06, "loss": 0.7036, "step": 2983 }, { "epoch": 1.4805758967357576, "grad_norm": 0.13774638206096473, "learning_rate": 3.5012228514524177e-06, "loss": 0.7311, "step": 2984 }, { "epoch": 1.4810723594389972, "grad_norm": 0.13581207084716526, "learning_rate": 3.5003272280269745e-06, "loss": 0.682, "step": 2985 }, { "epoch": 1.4815688221422365, "grad_norm": 0.13429634165776128, "learning_rate": 3.4994314517175544e-06, "loss": 0.6948, "step": 2986 }, { "epoch": 1.482065284845476, "grad_norm": 0.13400596652450936, "learning_rate": 3.4985355226610613e-06, "loss": 0.7803, "step": 2987 }, { "epoch": 1.4825617475487154, "grad_norm": 0.13658283943813354, "learning_rate": 3.4976394409944236e-06, "loss": 0.6891, "step": 2988 }, { "epoch": 1.4830582102519547, "grad_norm": 0.1434351594908228, "learning_rate": 3.4967432068545933e-06, "loss": 0.706, "step": 2989 }, { "epoch": 1.4835546729551943, "grad_norm": 0.14046517002064254, "learning_rate": 3.4958468203785454e-06, "loss": 0.7323, "step": 2990 }, { "epoch": 1.4840511356584336, "grad_norm": 0.14214435255887042, "learning_rate": 3.4949502817032787e-06, "loss": 0.7598, "step": 2991 }, { "epoch": 1.4845475983616732, "grad_norm": 0.13754977790132406, "learning_rate": 3.4940535909658134e-06, "loss": 0.7143, "step": 2992 }, { "epoch": 1.4850440610649125, "grad_norm": 0.14453237890736265, "learning_rate": 3.493156748303196e-06, "loss": 0.7702, "step": 2993 }, { "epoch": 1.4855405237681518, "grad_norm": 0.1373786250860733, "learning_rate": 3.4922597538524925e-06, "loss": 0.732, "step": 2994 }, { "epoch": 1.4860369864713914, "grad_norm": 0.1325135135773225, "learning_rate": 3.491362607750796e-06, "loss": 0.6744, "step": 2995 }, { "epoch": 1.4865334491746307, "grad_norm": 0.13625614276317374, "learning_rate": 3.4904653101352204e-06, "loss": 0.7523, "step": 2996 }, { "epoch": 1.4870299118778703, "grad_norm": 0.13518539082417194, "learning_rate": 3.4895678611429027e-06, "loss": 0.7093, "step": 2997 }, { "epoch": 1.4875263745811096, "grad_norm": 0.13657072146153515, "learning_rate": 3.4886702609110045e-06, "loss": 0.717, "step": 2998 }, { "epoch": 1.488022837284349, "grad_norm": 0.13163907731177207, "learning_rate": 3.487772509576709e-06, "loss": 0.7427, "step": 2999 }, { "epoch": 1.4885192999875885, "grad_norm": 0.1352649941460766, "learning_rate": 3.4868746072772224e-06, "loss": 0.7275, "step": 3000 }, { "epoch": 1.4890157626908278, "grad_norm": 0.13558793748009995, "learning_rate": 3.4859765541497758e-06, "loss": 0.7283, "step": 3001 }, { "epoch": 1.4895122253940674, "grad_norm": 0.1364033885783727, "learning_rate": 3.485078350331622e-06, "loss": 0.7014, "step": 3002 }, { "epoch": 1.4900086880973067, "grad_norm": 0.13933690940861296, "learning_rate": 3.4841799959600364e-06, "loss": 0.744, "step": 3003 }, { "epoch": 1.490505150800546, "grad_norm": 0.14140035289104919, "learning_rate": 3.4832814911723187e-06, "loss": 0.7211, "step": 3004 }, { "epoch": 1.4910016135037856, "grad_norm": 0.13562997010154254, "learning_rate": 3.482382836105791e-06, "loss": 0.7335, "step": 3005 }, { "epoch": 1.491498076207025, "grad_norm": 0.13945900902072886, "learning_rate": 3.481484030897798e-06, "loss": 0.8175, "step": 3006 }, { "epoch": 1.4919945389102645, "grad_norm": 0.13803655176116866, "learning_rate": 3.4805850756857083e-06, "loss": 0.7692, "step": 3007 }, { "epoch": 1.4924910016135038, "grad_norm": 0.14038571658779134, "learning_rate": 3.479685970606912e-06, "loss": 0.7348, "step": 3008 }, { "epoch": 1.4929874643167431, "grad_norm": 0.13668694894227656, "learning_rate": 3.478786715798823e-06, "loss": 0.7535, "step": 3009 }, { "epoch": 1.4934839270199827, "grad_norm": 0.12831253400925277, "learning_rate": 3.4778873113988776e-06, "loss": 0.7013, "step": 3010 }, { "epoch": 1.493980389723222, "grad_norm": 0.13510790471247616, "learning_rate": 3.4769877575445366e-06, "loss": 0.7007, "step": 3011 }, { "epoch": 1.4944768524264616, "grad_norm": 0.13836794462260757, "learning_rate": 3.4760880543732816e-06, "loss": 0.6941, "step": 3012 }, { "epoch": 1.4949733151297009, "grad_norm": 0.13929710909188023, "learning_rate": 3.4751882020226174e-06, "loss": 0.7835, "step": 3013 }, { "epoch": 1.4954697778329402, "grad_norm": 0.13277890430638015, "learning_rate": 3.4742882006300734e-06, "loss": 0.6666, "step": 3014 }, { "epoch": 1.4959662405361798, "grad_norm": 0.13390371855032038, "learning_rate": 3.4733880503331983e-06, "loss": 0.7052, "step": 3015 }, { "epoch": 1.496462703239419, "grad_norm": 0.13663110970590517, "learning_rate": 3.4724877512695677e-06, "loss": 0.7179, "step": 3016 }, { "epoch": 1.4969591659426587, "grad_norm": 0.13773347718243625, "learning_rate": 3.4715873035767766e-06, "loss": 0.7086, "step": 3017 }, { "epoch": 1.497455628645898, "grad_norm": 0.1363564450553434, "learning_rate": 3.4706867073924446e-06, "loss": 0.7173, "step": 3018 }, { "epoch": 1.4979520913491373, "grad_norm": 0.13186824374836328, "learning_rate": 3.469785962854213e-06, "loss": 0.7346, "step": 3019 }, { "epoch": 1.4984485540523769, "grad_norm": 0.13838548862045663, "learning_rate": 3.4688850700997455e-06, "loss": 0.723, "step": 3020 }, { "epoch": 1.4989450167556162, "grad_norm": 0.13121107819663974, "learning_rate": 3.467984029266731e-06, "loss": 0.7178, "step": 3021 }, { "epoch": 1.4994414794588558, "grad_norm": 0.1359428642141061, "learning_rate": 3.467082840492878e-06, "loss": 0.731, "step": 3022 }, { "epoch": 1.499937942162095, "grad_norm": 0.13804903069761093, "learning_rate": 3.4661815039159186e-06, "loss": 0.6986, "step": 3023 }, { "epoch": 1.5004344048653344, "grad_norm": 0.12815687067739065, "learning_rate": 3.465280019673608e-06, "loss": 0.7029, "step": 3024 }, { "epoch": 1.5004344048653344, "eval_loss": 0.7363704442977905, "eval_runtime": 135.8597, "eval_samples_per_second": 223.414, "eval_steps_per_second": 27.933, "step": 3024 }, { "epoch": 1.5009308675685737, "grad_norm": 0.1470067433495751, "learning_rate": 3.4643783879037235e-06, "loss": 0.7213, "step": 3025 }, { "epoch": 1.5014273302718133, "grad_norm": 0.21715999444549708, "learning_rate": 3.4634766087440645e-06, "loss": 0.7743, "step": 3026 }, { "epoch": 1.5019237929750529, "grad_norm": 0.13601624552016667, "learning_rate": 3.4625746823324545e-06, "loss": 0.7205, "step": 3027 }, { "epoch": 1.5024202556782922, "grad_norm": 0.1323109309363181, "learning_rate": 3.4616726088067383e-06, "loss": 0.7679, "step": 3028 }, { "epoch": 1.5029167183815315, "grad_norm": 0.13051111527820897, "learning_rate": 3.460770388304782e-06, "loss": 0.7117, "step": 3029 }, { "epoch": 1.5034131810847708, "grad_norm": 0.1354078149158313, "learning_rate": 3.459868020964478e-06, "loss": 0.7352, "step": 3030 }, { "epoch": 1.5039096437880104, "grad_norm": 0.14053113861350988, "learning_rate": 3.4589655069237367e-06, "loss": 0.7575, "step": 3031 }, { "epoch": 1.50440610649125, "grad_norm": 0.13146810822091834, "learning_rate": 3.4580628463204936e-06, "loss": 0.7172, "step": 3032 }, { "epoch": 1.5049025691944893, "grad_norm": 0.13001154124097078, "learning_rate": 3.457160039292705e-06, "loss": 0.6829, "step": 3033 }, { "epoch": 1.5053990318977286, "grad_norm": 0.13574315079485202, "learning_rate": 3.456257085978352e-06, "loss": 0.7401, "step": 3034 }, { "epoch": 1.505895494600968, "grad_norm": 0.13467407064352518, "learning_rate": 3.4553539865154362e-06, "loss": 0.7276, "step": 3035 }, { "epoch": 1.5063919573042075, "grad_norm": 0.16132459867477508, "learning_rate": 3.4544507410419807e-06, "loss": 0.7567, "step": 3036 }, { "epoch": 1.506888420007447, "grad_norm": 0.13133038324487403, "learning_rate": 3.453547349696033e-06, "loss": 0.7089, "step": 3037 }, { "epoch": 1.5073848827106864, "grad_norm": 0.13434120206254171, "learning_rate": 3.4526438126156624e-06, "loss": 0.7183, "step": 3038 }, { "epoch": 1.5078813454139257, "grad_norm": 0.133739491071614, "learning_rate": 3.45174012993896e-06, "loss": 0.6883, "step": 3039 }, { "epoch": 1.508377808117165, "grad_norm": 0.13292833480778746, "learning_rate": 3.450836301804038e-06, "loss": 0.7354, "step": 3040 }, { "epoch": 1.5088742708204046, "grad_norm": 0.1310771402662206, "learning_rate": 3.449932328349033e-06, "loss": 0.7078, "step": 3041 }, { "epoch": 1.5093707335236441, "grad_norm": 0.13464111175584303, "learning_rate": 3.449028209712102e-06, "loss": 0.6777, "step": 3042 }, { "epoch": 1.5098671962268835, "grad_norm": 0.13372159063346198, "learning_rate": 3.4481239460314252e-06, "loss": 0.7078, "step": 3043 }, { "epoch": 1.5103636589301228, "grad_norm": 0.14328077784905502, "learning_rate": 3.4472195374452067e-06, "loss": 0.7541, "step": 3044 }, { "epoch": 1.5108601216333621, "grad_norm": 0.14108614619902352, "learning_rate": 3.446314984091669e-06, "loss": 0.7537, "step": 3045 }, { "epoch": 1.5113565843366017, "grad_norm": 0.1387413397672071, "learning_rate": 3.445410286109059e-06, "loss": 0.7313, "step": 3046 }, { "epoch": 1.5118530470398412, "grad_norm": 0.1437298602735984, "learning_rate": 3.444505443635645e-06, "loss": 0.7198, "step": 3047 }, { "epoch": 1.5123495097430806, "grad_norm": 0.14561460770522325, "learning_rate": 3.4436004568097177e-06, "loss": 0.7763, "step": 3048 }, { "epoch": 1.51284597244632, "grad_norm": 0.1437698294167037, "learning_rate": 3.4426953257695904e-06, "loss": 0.7282, "step": 3049 }, { "epoch": 1.5133424351495592, "grad_norm": 0.13602577897519388, "learning_rate": 3.4417900506535977e-06, "loss": 0.7059, "step": 3050 }, { "epoch": 1.5138388978527988, "grad_norm": 0.13627196775475553, "learning_rate": 3.440884631600096e-06, "loss": 0.6885, "step": 3051 }, { "epoch": 1.5143353605560383, "grad_norm": 0.13016712398288627, "learning_rate": 3.4399790687474638e-06, "loss": 0.7099, "step": 3052 }, { "epoch": 1.5148318232592777, "grad_norm": 0.1420534555060904, "learning_rate": 3.4390733622341025e-06, "loss": 0.7624, "step": 3053 }, { "epoch": 1.515328285962517, "grad_norm": 0.13710013215332392, "learning_rate": 3.438167512198436e-06, "loss": 0.7549, "step": 3054 }, { "epoch": 1.5158247486657563, "grad_norm": 0.14905485277450392, "learning_rate": 3.437261518778906e-06, "loss": 0.7153, "step": 3055 }, { "epoch": 1.516321211368996, "grad_norm": 0.13714440652108204, "learning_rate": 3.4363553821139822e-06, "loss": 0.7061, "step": 3056 }, { "epoch": 1.5168176740722354, "grad_norm": 0.1448142948924188, "learning_rate": 3.4354491023421503e-06, "loss": 0.7118, "step": 3057 }, { "epoch": 1.5173141367754748, "grad_norm": 0.13221928060116708, "learning_rate": 3.434542679601922e-06, "loss": 0.6995, "step": 3058 }, { "epoch": 1.517810599478714, "grad_norm": 0.1406755984922586, "learning_rate": 3.4336361140318298e-06, "loss": 0.7159, "step": 3059 }, { "epoch": 1.5183070621819534, "grad_norm": 0.13490595450975446, "learning_rate": 3.432729405770427e-06, "loss": 0.7348, "step": 3060 }, { "epoch": 1.518803524885193, "grad_norm": 0.1331655447314572, "learning_rate": 3.4318225549562907e-06, "loss": 0.6946, "step": 3061 }, { "epoch": 1.5192999875884325, "grad_norm": 0.1376254608523503, "learning_rate": 3.4309155617280164e-06, "loss": 0.7276, "step": 3062 }, { "epoch": 1.5197964502916719, "grad_norm": 0.131419562336429, "learning_rate": 3.430008426224225e-06, "loss": 0.7282, "step": 3063 }, { "epoch": 1.5202929129949112, "grad_norm": 0.13982511067051417, "learning_rate": 3.429101148583557e-06, "loss": 0.7345, "step": 3064 }, { "epoch": 1.5207893756981505, "grad_norm": 0.13655012597581628, "learning_rate": 3.4281937289446753e-06, "loss": 0.7052, "step": 3065 }, { "epoch": 1.52128583840139, "grad_norm": 0.13360080661036083, "learning_rate": 3.4272861674462653e-06, "loss": 0.6868, "step": 3066 }, { "epoch": 1.5217823011046296, "grad_norm": 0.12748341437271118, "learning_rate": 3.426378464227032e-06, "loss": 0.6866, "step": 3067 }, { "epoch": 1.522278763807869, "grad_norm": 0.13955206652734053, "learning_rate": 3.425470619425704e-06, "loss": 0.723, "step": 3068 }, { "epoch": 1.5227752265111083, "grad_norm": 0.1419812477049388, "learning_rate": 3.42456263318103e-06, "loss": 0.7752, "step": 3069 }, { "epoch": 1.5232716892143476, "grad_norm": 0.1301543894648006, "learning_rate": 3.4236545056317828e-06, "loss": 0.7152, "step": 3070 }, { "epoch": 1.5237681519175872, "grad_norm": 0.13531435292463873, "learning_rate": 3.422746236916753e-06, "loss": 0.7006, "step": 3071 }, { "epoch": 1.5242646146208267, "grad_norm": 0.1316850500436868, "learning_rate": 3.4218378271747566e-06, "loss": 0.706, "step": 3072 }, { "epoch": 1.524761077324066, "grad_norm": 0.13593788426927872, "learning_rate": 3.420929276544629e-06, "loss": 0.6914, "step": 3073 }, { "epoch": 1.5252575400273054, "grad_norm": 0.13157963484148716, "learning_rate": 3.420020585165227e-06, "loss": 0.6814, "step": 3074 }, { "epoch": 1.5257540027305447, "grad_norm": 0.13002115412677262, "learning_rate": 3.4191117531754296e-06, "loss": 0.7078, "step": 3075 }, { "epoch": 1.5262504654337843, "grad_norm": 0.13527130246956548, "learning_rate": 3.418202780714138e-06, "loss": 0.7036, "step": 3076 }, { "epoch": 1.5267469281370238, "grad_norm": 0.13262041773411234, "learning_rate": 3.4172936679202745e-06, "loss": 0.7757, "step": 3077 }, { "epoch": 1.5272433908402632, "grad_norm": 0.13085661822705708, "learning_rate": 3.4163844149327807e-06, "loss": 0.737, "step": 3078 }, { "epoch": 1.5277398535435025, "grad_norm": 0.13709896836317348, "learning_rate": 3.4154750218906226e-06, "loss": 0.7503, "step": 3079 }, { "epoch": 1.5282363162467418, "grad_norm": 0.15213123399903336, "learning_rate": 3.414565488932785e-06, "loss": 0.6988, "step": 3080 }, { "epoch": 1.5287327789499814, "grad_norm": 0.13278188283664377, "learning_rate": 3.4136558161982767e-06, "loss": 0.6908, "step": 3081 }, { "epoch": 1.529229241653221, "grad_norm": 0.1414124845528321, "learning_rate": 3.4127460038261274e-06, "loss": 0.7579, "step": 3082 }, { "epoch": 1.5297257043564603, "grad_norm": 0.1327347365759449, "learning_rate": 3.411836051955385e-06, "loss": 0.7004, "step": 3083 }, { "epoch": 1.5302221670596996, "grad_norm": 0.13080333502954117, "learning_rate": 3.410925960725123e-06, "loss": 0.6939, "step": 3084 }, { "epoch": 1.530718629762939, "grad_norm": 0.13257078672902534, "learning_rate": 3.4100157302744324e-06, "loss": 0.6948, "step": 3085 }, { "epoch": 1.5312150924661785, "grad_norm": 0.1322459345068561, "learning_rate": 3.4091053607424295e-06, "loss": 0.7182, "step": 3086 }, { "epoch": 1.531711555169418, "grad_norm": 0.14457430144423755, "learning_rate": 3.408194852268248e-06, "loss": 0.7577, "step": 3087 }, { "epoch": 1.5322080178726574, "grad_norm": 0.13598763735719102, "learning_rate": 3.4072842049910458e-06, "loss": 0.7082, "step": 3088 }, { "epoch": 1.5327044805758967, "grad_norm": 0.13563192470184163, "learning_rate": 3.406373419049999e-06, "loss": 0.7036, "step": 3089 }, { "epoch": 1.533200943279136, "grad_norm": 0.13408648564933517, "learning_rate": 3.4054624945843083e-06, "loss": 0.7111, "step": 3090 }, { "epoch": 1.5336974059823756, "grad_norm": 0.13291448319089896, "learning_rate": 3.404551431733193e-06, "loss": 0.692, "step": 3091 }, { "epoch": 1.5341938686856151, "grad_norm": 0.1357471952977748, "learning_rate": 3.403640230635895e-06, "loss": 0.7497, "step": 3092 }, { "epoch": 1.5346903313888545, "grad_norm": 0.13026474673855643, "learning_rate": 3.402728891431677e-06, "loss": 0.7322, "step": 3093 }, { "epoch": 1.5351867940920938, "grad_norm": 0.1344691776866103, "learning_rate": 3.4018174142598215e-06, "loss": 0.6748, "step": 3094 }, { "epoch": 1.5356832567953331, "grad_norm": 0.1275077838340116, "learning_rate": 3.400905799259634e-06, "loss": 0.6254, "step": 3095 }, { "epoch": 1.5361797194985727, "grad_norm": 0.13405200474821982, "learning_rate": 3.3999940465704397e-06, "loss": 0.7132, "step": 3096 }, { "epoch": 1.5366761822018122, "grad_norm": 0.137409082156887, "learning_rate": 3.3990821563315857e-06, "loss": 0.7572, "step": 3097 }, { "epoch": 1.5371726449050516, "grad_norm": 0.13337458486706275, "learning_rate": 3.3981701286824396e-06, "loss": 0.7658, "step": 3098 }, { "epoch": 1.537669107608291, "grad_norm": 0.14360779977581084, "learning_rate": 3.3972579637623916e-06, "loss": 0.7088, "step": 3099 }, { "epoch": 1.5381655703115302, "grad_norm": 0.1298485707521375, "learning_rate": 3.396345661710849e-06, "loss": 0.6769, "step": 3100 }, { "epoch": 1.5386620330147698, "grad_norm": 0.13722224767161476, "learning_rate": 3.3954332226672444e-06, "loss": 0.7531, "step": 3101 }, { "epoch": 1.5391584957180093, "grad_norm": 0.13819578335401508, "learning_rate": 3.394520646771029e-06, "loss": 0.764, "step": 3102 }, { "epoch": 1.5396549584212487, "grad_norm": 0.13810759011008472, "learning_rate": 3.3936079341616746e-06, "loss": 0.7278, "step": 3103 }, { "epoch": 1.540151421124488, "grad_norm": 0.13089937223200337, "learning_rate": 3.392695084978677e-06, "loss": 0.7161, "step": 3104 }, { "epoch": 1.5406478838277273, "grad_norm": 0.13279609037952766, "learning_rate": 3.391782099361547e-06, "loss": 0.7043, "step": 3105 }, { "epoch": 1.5411443465309669, "grad_norm": 0.1398313704093573, "learning_rate": 3.390868977449822e-06, "loss": 0.7615, "step": 3106 }, { "epoch": 1.5416408092342064, "grad_norm": 0.13318449528960383, "learning_rate": 3.3899557193830585e-06, "loss": 0.6915, "step": 3107 }, { "epoch": 1.5421372719374458, "grad_norm": 0.13626189356645965, "learning_rate": 3.389042325300832e-06, "loss": 0.7317, "step": 3108 }, { "epoch": 1.542633734640685, "grad_norm": 0.1354355510287649, "learning_rate": 3.3881287953427423e-06, "loss": 0.684, "step": 3109 }, { "epoch": 1.5431301973439244, "grad_norm": 0.13700779510437183, "learning_rate": 3.387215129648405e-06, "loss": 0.7707, "step": 3110 }, { "epoch": 1.543626660047164, "grad_norm": 0.13797047616141508, "learning_rate": 3.386301328357461e-06, "loss": 0.7168, "step": 3111 }, { "epoch": 1.5441231227504035, "grad_norm": 0.14189987866803816, "learning_rate": 3.3853873916095693e-06, "loss": 0.6903, "step": 3112 }, { "epoch": 1.5446195854536429, "grad_norm": 0.1373537496866223, "learning_rate": 3.3844733195444108e-06, "loss": 0.7168, "step": 3113 }, { "epoch": 1.5451160481568822, "grad_norm": 0.13354628494592605, "learning_rate": 3.383559112301687e-06, "loss": 0.7073, "step": 3114 }, { "epoch": 1.5456125108601215, "grad_norm": 0.134865493027683, "learning_rate": 3.38264477002112e-06, "loss": 0.7345, "step": 3115 }, { "epoch": 1.546108973563361, "grad_norm": 0.13796959049690674, "learning_rate": 3.3817302928424517e-06, "loss": 0.7315, "step": 3116 }, { "epoch": 1.5466054362666006, "grad_norm": 0.1321638322323823, "learning_rate": 3.380815680905446e-06, "loss": 0.6957, "step": 3117 }, { "epoch": 1.54710189896984, "grad_norm": 0.13809644723238076, "learning_rate": 3.3799009343498863e-06, "loss": 0.7913, "step": 3118 }, { "epoch": 1.5475983616730793, "grad_norm": 0.14219819418095606, "learning_rate": 3.3789860533155764e-06, "loss": 0.738, "step": 3119 }, { "epoch": 1.5480948243763186, "grad_norm": 0.1405366068876724, "learning_rate": 3.3780710379423425e-06, "loss": 0.7318, "step": 3120 }, { "epoch": 1.5485912870795582, "grad_norm": 0.12933097234296537, "learning_rate": 3.3771558883700286e-06, "loss": 0.7257, "step": 3121 }, { "epoch": 1.5490877497827977, "grad_norm": 0.13488996517159482, "learning_rate": 3.3762406047385012e-06, "loss": 0.6776, "step": 3122 }, { "epoch": 1.549584212486037, "grad_norm": 0.1393952179684061, "learning_rate": 3.3753251871876467e-06, "loss": 0.7573, "step": 3123 }, { "epoch": 1.5500806751892764, "grad_norm": 0.1386055398625771, "learning_rate": 3.3744096358573724e-06, "loss": 0.7472, "step": 3124 }, { "epoch": 1.5505771378925157, "grad_norm": 0.1360908123346939, "learning_rate": 3.3734939508876057e-06, "loss": 0.7527, "step": 3125 }, { "epoch": 1.5510736005957553, "grad_norm": 0.13138200688447002, "learning_rate": 3.3725781324182945e-06, "loss": 0.6892, "step": 3126 }, { "epoch": 1.5515700632989948, "grad_norm": 0.13489013837525454, "learning_rate": 3.3716621805894056e-06, "loss": 0.7711, "step": 3127 }, { "epoch": 1.5520665260022342, "grad_norm": 0.13712844460423768, "learning_rate": 3.370746095540928e-06, "loss": 0.7255, "step": 3128 }, { "epoch": 1.5525629887054735, "grad_norm": 0.13677616322194255, "learning_rate": 3.369829877412871e-06, "loss": 0.7349, "step": 3129 }, { "epoch": 1.5530594514087128, "grad_norm": 0.13423740883277105, "learning_rate": 3.368913526345265e-06, "loss": 0.7294, "step": 3130 }, { "epoch": 1.5535559141119524, "grad_norm": 0.1328411152620118, "learning_rate": 3.3679970424781584e-06, "loss": 0.709, "step": 3131 }, { "epoch": 1.5540523768151917, "grad_norm": 0.14012966529834792, "learning_rate": 3.3670804259516203e-06, "loss": 0.7004, "step": 3132 }, { "epoch": 1.5545488395184313, "grad_norm": 0.13291614080398312, "learning_rate": 3.366163676905742e-06, "loss": 0.7144, "step": 3133 }, { "epoch": 1.5550453022216706, "grad_norm": 0.13260952957552452, "learning_rate": 3.365246795480634e-06, "loss": 0.7659, "step": 3134 }, { "epoch": 1.55554176492491, "grad_norm": 0.135098720690184, "learning_rate": 3.3643297818164263e-06, "loss": 0.6968, "step": 3135 }, { "epoch": 1.5560382276281495, "grad_norm": 0.1337151472755321, "learning_rate": 3.3634126360532694e-06, "loss": 0.7579, "step": 3136 }, { "epoch": 1.5565346903313888, "grad_norm": 0.13823809792997804, "learning_rate": 3.362495358331336e-06, "loss": 0.7366, "step": 3137 }, { "epoch": 1.5570311530346284, "grad_norm": 0.13540112581495553, "learning_rate": 3.3615779487908147e-06, "loss": 0.778, "step": 3138 }, { "epoch": 1.5575276157378677, "grad_norm": 0.12755996566178962, "learning_rate": 3.3606604075719187e-06, "loss": 0.6915, "step": 3139 }, { "epoch": 1.558024078441107, "grad_norm": 0.131901346948097, "learning_rate": 3.359742734814879e-06, "loss": 0.6949, "step": 3140 }, { "epoch": 1.5585205411443466, "grad_norm": 0.13117046874293734, "learning_rate": 3.358824930659948e-06, "loss": 0.6904, "step": 3141 }, { "epoch": 1.559017003847586, "grad_norm": 0.13765163137556422, "learning_rate": 3.3579069952473964e-06, "loss": 0.7074, "step": 3142 }, { "epoch": 1.5595134665508255, "grad_norm": 0.143697911811937, "learning_rate": 3.3569889287175155e-06, "loss": 0.71, "step": 3143 }, { "epoch": 1.5600099292540648, "grad_norm": 0.13255298712174185, "learning_rate": 3.356070731210618e-06, "loss": 0.7144, "step": 3144 }, { "epoch": 1.5605063919573041, "grad_norm": 0.1390079209743311, "learning_rate": 3.3551524028670348e-06, "loss": 0.7387, "step": 3145 }, { "epoch": 1.5610028546605437, "grad_norm": 0.15837144226586045, "learning_rate": 3.3542339438271184e-06, "loss": 0.8018, "step": 3146 }, { "epoch": 1.561499317363783, "grad_norm": 0.13346525456733854, "learning_rate": 3.35331535423124e-06, "loss": 0.7318, "step": 3147 }, { "epoch": 1.5619957800670226, "grad_norm": 0.1303144364718177, "learning_rate": 3.352396634219792e-06, "loss": 0.691, "step": 3148 }, { "epoch": 1.5624922427702619, "grad_norm": 0.13463555403818014, "learning_rate": 3.351477783933186e-06, "loss": 0.7917, "step": 3149 }, { "epoch": 1.5629887054735012, "grad_norm": 0.13535091861217688, "learning_rate": 3.3505588035118517e-06, "loss": 0.7196, "step": 3150 }, { "epoch": 1.5634851681767408, "grad_norm": 0.1263572778973972, "learning_rate": 3.3496396930962437e-06, "loss": 0.6855, "step": 3151 }, { "epoch": 1.56398163087998, "grad_norm": 0.18384206339467457, "learning_rate": 3.3487204528268302e-06, "loss": 0.7055, "step": 3152 }, { "epoch": 1.5644780935832197, "grad_norm": 0.14081009970447547, "learning_rate": 3.347801082844105e-06, "loss": 0.7067, "step": 3153 }, { "epoch": 1.564974556286459, "grad_norm": 0.13263091004679445, "learning_rate": 3.3468815832885772e-06, "loss": 0.6618, "step": 3154 }, { "epoch": 1.5654710189896983, "grad_norm": 0.1440081195073848, "learning_rate": 3.3459619543007772e-06, "loss": 0.7223, "step": 3155 }, { "epoch": 1.5659674816929379, "grad_norm": 0.13271016860764132, "learning_rate": 3.345042196021257e-06, "loss": 0.7639, "step": 3156 }, { "epoch": 1.5664639443961772, "grad_norm": 0.13353122154660912, "learning_rate": 3.3441223085905873e-06, "loss": 0.7087, "step": 3157 }, { "epoch": 1.5669604070994168, "grad_norm": 0.13011358405310236, "learning_rate": 3.3432022921493555e-06, "loss": 0.723, "step": 3158 }, { "epoch": 1.567456869802656, "grad_norm": 0.13401305719243067, "learning_rate": 3.342282146838175e-06, "loss": 0.6941, "step": 3159 }, { "epoch": 1.5679533325058954, "grad_norm": 0.13781349363786358, "learning_rate": 3.3413618727976718e-06, "loss": 0.7399, "step": 3160 }, { "epoch": 1.568449795209135, "grad_norm": 0.1374012136941711, "learning_rate": 3.3404414701684966e-06, "loss": 0.6933, "step": 3161 }, { "epoch": 1.5689462579123743, "grad_norm": 0.1387539946595465, "learning_rate": 3.3395209390913184e-06, "loss": 0.7569, "step": 3162 }, { "epoch": 1.5694427206156139, "grad_norm": 0.13114107541423803, "learning_rate": 3.338600279706826e-06, "loss": 0.7204, "step": 3163 }, { "epoch": 1.5699391833188532, "grad_norm": 0.14071169984330964, "learning_rate": 3.3376794921557266e-06, "loss": 0.7178, "step": 3164 }, { "epoch": 1.5704356460220925, "grad_norm": 0.13592040074130213, "learning_rate": 3.3367585765787476e-06, "loss": 0.688, "step": 3165 }, { "epoch": 1.5709321087253318, "grad_norm": 0.14138854126933367, "learning_rate": 3.3358375331166364e-06, "loss": 0.7637, "step": 3166 }, { "epoch": 1.5714285714285714, "grad_norm": 0.13989023590057628, "learning_rate": 3.3349163619101606e-06, "loss": 0.7293, "step": 3167 }, { "epoch": 1.571925034131811, "grad_norm": 0.14193624544675174, "learning_rate": 3.333995063100105e-06, "loss": 0.6964, "step": 3168 }, { "epoch": 1.5724214968350503, "grad_norm": 0.12940528095135467, "learning_rate": 3.333073636827277e-06, "loss": 0.7047, "step": 3169 }, { "epoch": 1.5729179595382896, "grad_norm": 0.13154882534410767, "learning_rate": 3.3321520832325e-06, "loss": 0.7167, "step": 3170 }, { "epoch": 1.573414422241529, "grad_norm": 0.13169987947643613, "learning_rate": 3.33123040245662e-06, "loss": 0.6984, "step": 3171 }, { "epoch": 1.5739108849447685, "grad_norm": 0.12901406346373048, "learning_rate": 3.3303085946405006e-06, "loss": 0.7369, "step": 3172 }, { "epoch": 1.574407347648008, "grad_norm": 0.1390711912220589, "learning_rate": 3.329386659925025e-06, "loss": 0.7334, "step": 3173 }, { "epoch": 1.5749038103512474, "grad_norm": 0.13614690824410902, "learning_rate": 3.3284645984510977e-06, "loss": 0.7191, "step": 3174 }, { "epoch": 1.5754002730544867, "grad_norm": 0.1354970816221801, "learning_rate": 3.32754241035964e-06, "loss": 0.7187, "step": 3175 }, { "epoch": 1.575896735757726, "grad_norm": 0.13457360527562395, "learning_rate": 3.3266200957915927e-06, "loss": 0.713, "step": 3176 }, { "epoch": 1.5763931984609656, "grad_norm": 0.1348370258972551, "learning_rate": 3.3256976548879183e-06, "loss": 0.7231, "step": 3177 }, { "epoch": 1.5768896611642051, "grad_norm": 0.13666530116217873, "learning_rate": 3.3247750877895955e-06, "loss": 0.7021, "step": 3178 }, { "epoch": 1.5773861238674445, "grad_norm": 0.13309005504337973, "learning_rate": 3.3238523946376256e-06, "loss": 0.6904, "step": 3179 }, { "epoch": 1.5778825865706838, "grad_norm": 0.14080349679221404, "learning_rate": 3.322929575573026e-06, "loss": 0.7235, "step": 3180 }, { "epoch": 1.5783790492739231, "grad_norm": 0.13251312434856302, "learning_rate": 3.322006630736836e-06, "loss": 0.701, "step": 3181 }, { "epoch": 1.5788755119771627, "grad_norm": 0.13612624607173604, "learning_rate": 3.321083560270112e-06, "loss": 0.7359, "step": 3182 }, { "epoch": 1.5793719746804022, "grad_norm": 0.13678666091572295, "learning_rate": 3.3201603643139314e-06, "loss": 0.7353, "step": 3183 }, { "epoch": 1.5798684373836416, "grad_norm": 0.1369846073186433, "learning_rate": 3.319237043009389e-06, "loss": 0.7178, "step": 3184 }, { "epoch": 1.580364900086881, "grad_norm": 0.13713704995068668, "learning_rate": 3.3183135964976003e-06, "loss": 0.7221, "step": 3185 }, { "epoch": 1.5808613627901202, "grad_norm": 0.13486305893810832, "learning_rate": 3.3173900249196986e-06, "loss": 0.7516, "step": 3186 }, { "epoch": 1.5813578254933598, "grad_norm": 0.13246788290751252, "learning_rate": 3.3164663284168382e-06, "loss": 0.6897, "step": 3187 }, { "epoch": 1.5818542881965993, "grad_norm": 0.1338514865785925, "learning_rate": 3.3155425071301894e-06, "loss": 0.7198, "step": 3188 }, { "epoch": 1.5823507508998387, "grad_norm": 0.1408007323666459, "learning_rate": 3.3146185612009453e-06, "loss": 0.695, "step": 3189 }, { "epoch": 1.582847213603078, "grad_norm": 0.131260440618678, "learning_rate": 3.313694490770316e-06, "loss": 0.7185, "step": 3190 }, { "epoch": 1.5833436763063173, "grad_norm": 0.13685270748076497, "learning_rate": 3.31277029597953e-06, "loss": 0.7291, "step": 3191 }, { "epoch": 1.583840139009557, "grad_norm": 0.13649871749420334, "learning_rate": 3.311845976969836e-06, "loss": 0.7442, "step": 3192 }, { "epoch": 1.5843366017127964, "grad_norm": 0.14178510091564836, "learning_rate": 3.3109215338825008e-06, "loss": 0.7721, "step": 3193 }, { "epoch": 1.5848330644160358, "grad_norm": 0.13415549660130519, "learning_rate": 3.3099969668588117e-06, "loss": 0.7347, "step": 3194 }, { "epoch": 1.585329527119275, "grad_norm": 0.1391423380776298, "learning_rate": 3.309072276040074e-06, "loss": 0.7648, "step": 3195 }, { "epoch": 1.5858259898225144, "grad_norm": 0.13047621822203795, "learning_rate": 3.3081474615676106e-06, "loss": 0.7229, "step": 3196 }, { "epoch": 1.586322452525754, "grad_norm": 0.14349775839749804, "learning_rate": 3.307222523582766e-06, "loss": 0.7207, "step": 3197 }, { "epoch": 1.5868189152289935, "grad_norm": 0.1333296259536943, "learning_rate": 3.3062974622269006e-06, "loss": 0.7344, "step": 3198 }, { "epoch": 1.5873153779322329, "grad_norm": 0.1336681573376393, "learning_rate": 3.3053722776413978e-06, "loss": 0.6781, "step": 3199 }, { "epoch": 1.5878118406354722, "grad_norm": 0.13336081958471946, "learning_rate": 3.304446969967654e-06, "loss": 0.6875, "step": 3200 }, { "epoch": 1.5883083033387115, "grad_norm": 0.1301759528310653, "learning_rate": 3.3035215393470896e-06, "loss": 0.6865, "step": 3201 }, { "epoch": 1.588804766041951, "grad_norm": 0.13281805769994326, "learning_rate": 3.3025959859211416e-06, "loss": 0.7184, "step": 3202 }, { "epoch": 1.5893012287451906, "grad_norm": 0.1350945524620567, "learning_rate": 3.3016703098312653e-06, "loss": 0.7637, "step": 3203 }, { "epoch": 1.58979769144843, "grad_norm": 0.13780227036057782, "learning_rate": 3.3007445112189362e-06, "loss": 0.6973, "step": 3204 }, { "epoch": 1.5902941541516693, "grad_norm": 0.1367630069339855, "learning_rate": 3.2998185902256475e-06, "loss": 0.724, "step": 3205 }, { "epoch": 1.5907906168549086, "grad_norm": 0.13924121360508623, "learning_rate": 3.298892546992912e-06, "loss": 0.7636, "step": 3206 }, { "epoch": 1.5912870795581482, "grad_norm": 0.13353110373070412, "learning_rate": 3.29796638166226e-06, "loss": 0.7253, "step": 3207 }, { "epoch": 1.5917835422613877, "grad_norm": 0.13363257815429952, "learning_rate": 3.29704009437524e-06, "loss": 0.709, "step": 3208 }, { "epoch": 1.592280004964627, "grad_norm": 0.1323634598863784, "learning_rate": 3.2961136852734215e-06, "loss": 0.7379, "step": 3209 }, { "epoch": 1.5927764676678664, "grad_norm": 0.1397078632401545, "learning_rate": 3.295187154498391e-06, "loss": 0.7352, "step": 3210 }, { "epoch": 1.5932729303711057, "grad_norm": 0.13431457529442403, "learning_rate": 3.2942605021917535e-06, "loss": 0.7324, "step": 3211 }, { "epoch": 1.5937693930743453, "grad_norm": 0.1365499136783772, "learning_rate": 3.2933337284951338e-06, "loss": 0.7463, "step": 3212 }, { "epoch": 1.5942658557775848, "grad_norm": 0.13865059264043936, "learning_rate": 3.2924068335501734e-06, "loss": 0.7572, "step": 3213 }, { "epoch": 1.5947623184808242, "grad_norm": 0.13306898436599435, "learning_rate": 3.291479817498534e-06, "loss": 0.6667, "step": 3214 }, { "epoch": 1.5952587811840635, "grad_norm": 0.12929290122854256, "learning_rate": 3.2905526804818954e-06, "loss": 0.7137, "step": 3215 }, { "epoch": 1.5957552438873028, "grad_norm": 0.1372438954972776, "learning_rate": 3.2896254226419543e-06, "loss": 0.7392, "step": 3216 }, { "epoch": 1.5962517065905424, "grad_norm": 0.13554630669851628, "learning_rate": 3.2886980441204287e-06, "loss": 0.7278, "step": 3217 }, { "epoch": 1.596748169293782, "grad_norm": 0.1304930159730209, "learning_rate": 3.2877705450590525e-06, "loss": 0.6764, "step": 3218 }, { "epoch": 1.5972446319970213, "grad_norm": 0.13425983285516158, "learning_rate": 3.2868429255995792e-06, "loss": 0.7731, "step": 3219 }, { "epoch": 1.5977410947002606, "grad_norm": 0.13109350786854748, "learning_rate": 3.2859151858837807e-06, "loss": 0.7339, "step": 3220 }, { "epoch": 1.5982375574035, "grad_norm": 0.13818419724327707, "learning_rate": 3.2849873260534472e-06, "loss": 0.7421, "step": 3221 }, { "epoch": 1.5987340201067395, "grad_norm": 0.13351674846911626, "learning_rate": 3.2840593462503878e-06, "loss": 0.7282, "step": 3222 }, { "epoch": 1.599230482809979, "grad_norm": 0.1375585476076764, "learning_rate": 3.283131246616429e-06, "loss": 0.6996, "step": 3223 }, { "epoch": 1.5997269455132184, "grad_norm": 0.13392787493113698, "learning_rate": 3.2822030272934154e-06, "loss": 0.7567, "step": 3224 }, { "epoch": 1.6002234082164577, "grad_norm": 0.1319504916436644, "learning_rate": 3.2812746884232106e-06, "loss": 0.6982, "step": 3225 }, { "epoch": 1.600719870919697, "grad_norm": 0.13878346494318192, "learning_rate": 3.2803462301476962e-06, "loss": 0.7176, "step": 3226 }, { "epoch": 1.6012163336229366, "grad_norm": 0.13949329751448514, "learning_rate": 3.2794176526087723e-06, "loss": 0.7316, "step": 3227 }, { "epoch": 1.6017127963261761, "grad_norm": 0.13219100886136206, "learning_rate": 3.278488955948358e-06, "loss": 0.6974, "step": 3228 }, { "epoch": 1.6022092590294155, "grad_norm": 0.12840250780955081, "learning_rate": 3.2775601403083886e-06, "loss": 0.7009, "step": 3229 }, { "epoch": 1.6027057217326548, "grad_norm": 0.13490025484599139, "learning_rate": 3.276631205830818e-06, "loss": 0.6955, "step": 3230 }, { "epoch": 1.6032021844358941, "grad_norm": 0.1433135689367546, "learning_rate": 3.2757021526576216e-06, "loss": 0.7346, "step": 3231 }, { "epoch": 1.6036986471391337, "grad_norm": 0.1594064448899915, "learning_rate": 3.2747729809307878e-06, "loss": 0.7767, "step": 3232 }, { "epoch": 1.6041951098423732, "grad_norm": 0.13512124092823508, "learning_rate": 3.2738436907923267e-06, "loss": 0.732, "step": 3233 }, { "epoch": 1.6046915725456126, "grad_norm": 0.13435293487480612, "learning_rate": 3.2729142823842645e-06, "loss": 0.7113, "step": 3234 }, { "epoch": 1.605188035248852, "grad_norm": 0.1319079246448511, "learning_rate": 3.2719847558486474e-06, "loss": 0.7377, "step": 3235 }, { "epoch": 1.6056844979520912, "grad_norm": 0.1279027378230854, "learning_rate": 3.271055111327538e-06, "loss": 0.7124, "step": 3236 }, { "epoch": 1.6061809606553308, "grad_norm": 0.13463254886388712, "learning_rate": 3.2701253489630175e-06, "loss": 0.699, "step": 3237 }, { "epoch": 1.6066774233585703, "grad_norm": 0.1366952695028082, "learning_rate": 3.2691954688971862e-06, "loss": 0.7035, "step": 3238 }, { "epoch": 1.6071738860618097, "grad_norm": 0.13669325319793554, "learning_rate": 3.2682654712721605e-06, "loss": 0.7521, "step": 3239 }, { "epoch": 1.607670348765049, "grad_norm": 0.13083810989888964, "learning_rate": 3.2673353562300753e-06, "loss": 0.7283, "step": 3240 }, { "epoch": 1.6081668114682883, "grad_norm": 0.14043885513177584, "learning_rate": 3.266405123913084e-06, "loss": 0.7589, "step": 3241 }, { "epoch": 1.6086632741715279, "grad_norm": 0.13900658899664461, "learning_rate": 3.265474774463358e-06, "loss": 0.7308, "step": 3242 }, { "epoch": 1.6091597368747674, "grad_norm": 0.14121156080517872, "learning_rate": 3.264544308023086e-06, "loss": 0.7011, "step": 3243 }, { "epoch": 1.6096561995780068, "grad_norm": 0.13303533153675115, "learning_rate": 3.2636137247344756e-06, "loss": 0.6789, "step": 3244 }, { "epoch": 1.610152662281246, "grad_norm": 0.1345546966620587, "learning_rate": 3.2626830247397507e-06, "loss": 0.7209, "step": 3245 }, { "epoch": 1.6106491249844854, "grad_norm": 0.13372939850048826, "learning_rate": 3.261752208181154e-06, "loss": 0.7131, "step": 3246 }, { "epoch": 1.611145587687725, "grad_norm": 0.13637214131020378, "learning_rate": 3.260821275200947e-06, "loss": 0.7006, "step": 3247 }, { "epoch": 1.6116420503909645, "grad_norm": 0.133959325485992, "learning_rate": 3.2598902259414055e-06, "loss": 0.71, "step": 3248 }, { "epoch": 1.6121385130942039, "grad_norm": 0.1317569091674975, "learning_rate": 3.258959060544828e-06, "loss": 0.6934, "step": 3249 }, { "epoch": 1.6126349757974432, "grad_norm": 0.13549906434028006, "learning_rate": 3.258027779153527e-06, "loss": 0.7516, "step": 3250 }, { "epoch": 1.6131314385006825, "grad_norm": 0.13646477270544233, "learning_rate": 3.2570963819098333e-06, "loss": 0.7309, "step": 3251 }, { "epoch": 1.613627901203922, "grad_norm": 0.13531221765549428, "learning_rate": 3.2561648689560972e-06, "loss": 0.7276, "step": 3252 }, { "epoch": 1.6141243639071616, "grad_norm": 0.13706437632515092, "learning_rate": 3.2552332404346847e-06, "loss": 0.7446, "step": 3253 }, { "epoch": 1.614620826610401, "grad_norm": 0.12982692323683104, "learning_rate": 3.2543014964879814e-06, "loss": 0.6706, "step": 3254 }, { "epoch": 1.6151172893136403, "grad_norm": 0.1417142856105007, "learning_rate": 3.2533696372583886e-06, "loss": 0.7691, "step": 3255 }, { "epoch": 1.6156137520168796, "grad_norm": 0.1338299722054298, "learning_rate": 3.2524376628883254e-06, "loss": 0.7318, "step": 3256 }, { "epoch": 1.6161102147201192, "grad_norm": 0.13244784626065542, "learning_rate": 3.25150557352023e-06, "loss": 0.6839, "step": 3257 }, { "epoch": 1.6166066774233587, "grad_norm": 0.1373652541161793, "learning_rate": 3.250573369296557e-06, "loss": 0.7263, "step": 3258 }, { "epoch": 1.617103140126598, "grad_norm": 0.13733039620980356, "learning_rate": 3.249641050359779e-06, "loss": 0.7403, "step": 3259 }, { "epoch": 1.6175996028298374, "grad_norm": 0.14578749260154306, "learning_rate": 3.248708616852387e-06, "loss": 0.789, "step": 3260 }, { "epoch": 1.6180960655330767, "grad_norm": 0.137192295052107, "learning_rate": 3.247776068916887e-06, "loss": 0.7158, "step": 3261 }, { "epoch": 1.6185925282363163, "grad_norm": 0.1343518895157817, "learning_rate": 3.246843406695804e-06, "loss": 0.6879, "step": 3262 }, { "epoch": 1.6190889909395558, "grad_norm": 0.13371425918634516, "learning_rate": 3.245910630331682e-06, "loss": 0.7908, "step": 3263 }, { "epoch": 1.6195854536427952, "grad_norm": 0.13354797872889265, "learning_rate": 3.2449777399670788e-06, "loss": 0.6948, "step": 3264 }, { "epoch": 1.6200819163460345, "grad_norm": 0.13151382649489246, "learning_rate": 3.2440447357445737e-06, "loss": 0.7567, "step": 3265 }, { "epoch": 1.6205783790492738, "grad_norm": 0.13595368534380006, "learning_rate": 3.243111617806761e-06, "loss": 0.7589, "step": 3266 }, { "epoch": 1.6210748417525134, "grad_norm": 0.1365032790773031, "learning_rate": 3.2421783862962513e-06, "loss": 0.7405, "step": 3267 }, { "epoch": 1.621571304455753, "grad_norm": 0.13435638512861106, "learning_rate": 3.2412450413556753e-06, "loss": 0.7066, "step": 3268 }, { "epoch": 1.6220677671589923, "grad_norm": 0.13947202661774769, "learning_rate": 3.24031158312768e-06, "loss": 0.7538, "step": 3269 }, { "epoch": 1.6225642298622316, "grad_norm": 0.13439183598377127, "learning_rate": 3.2393780117549294e-06, "loss": 0.7072, "step": 3270 }, { "epoch": 1.623060692565471, "grad_norm": 0.13125548459747396, "learning_rate": 3.238444327380105e-06, "loss": 0.7033, "step": 3271 }, { "epoch": 1.6235571552687105, "grad_norm": 0.13335877452614855, "learning_rate": 3.2375105301459046e-06, "loss": 0.7374, "step": 3272 }, { "epoch": 1.6240536179719498, "grad_norm": 0.13797996283169242, "learning_rate": 3.236576620195045e-06, "loss": 0.7282, "step": 3273 }, { "epoch": 1.6245500806751894, "grad_norm": 0.1346903208785757, "learning_rate": 3.2356425976702587e-06, "loss": 0.7005, "step": 3274 }, { "epoch": 1.6250465433784287, "grad_norm": 0.13164913588291022, "learning_rate": 3.2347084627142967e-06, "loss": 0.712, "step": 3275 }, { "epoch": 1.625543006081668, "grad_norm": 0.13395069471794036, "learning_rate": 3.233774215469927e-06, "loss": 0.6553, "step": 3276 }, { "epoch": 1.6260394687849076, "grad_norm": 0.12946118362950973, "learning_rate": 3.2328398560799327e-06, "loss": 0.7068, "step": 3277 }, { "epoch": 1.626535931488147, "grad_norm": 0.13651890789151064, "learning_rate": 3.231905384687117e-06, "loss": 0.7263, "step": 3278 }, { "epoch": 1.6270323941913865, "grad_norm": 0.13320408589478813, "learning_rate": 3.2309708014342987e-06, "loss": 0.659, "step": 3279 }, { "epoch": 1.6275288568946258, "grad_norm": 0.13232211632916635, "learning_rate": 3.2300361064643133e-06, "loss": 0.695, "step": 3280 }, { "epoch": 1.6280253195978651, "grad_norm": 0.13567576909886284, "learning_rate": 3.229101299920014e-06, "loss": 0.7111, "step": 3281 }, { "epoch": 1.6285217823011047, "grad_norm": 0.13518794018443017, "learning_rate": 3.228166381944272e-06, "loss": 0.7415, "step": 3282 }, { "epoch": 1.629018245004344, "grad_norm": 0.13083493674868937, "learning_rate": 3.227231352679973e-06, "loss": 0.7146, "step": 3283 }, { "epoch": 1.6295147077075836, "grad_norm": 0.14297032801648918, "learning_rate": 3.2262962122700224e-06, "loss": 0.7503, "step": 3284 }, { "epoch": 1.6300111704108229, "grad_norm": 0.13219562864852205, "learning_rate": 3.2253609608573412e-06, "loss": 0.7098, "step": 3285 }, { "epoch": 1.6305076331140622, "grad_norm": 0.1356006220123195, "learning_rate": 3.2244255985848664e-06, "loss": 0.7158, "step": 3286 }, { "epoch": 1.6310040958173018, "grad_norm": 0.13516073020936664, "learning_rate": 3.2234901255955554e-06, "loss": 0.7443, "step": 3287 }, { "epoch": 1.631500558520541, "grad_norm": 0.13223315868550364, "learning_rate": 3.222554542032379e-06, "loss": 0.7013, "step": 3288 }, { "epoch": 1.6319970212237807, "grad_norm": 0.1291224602102518, "learning_rate": 3.221618848038326e-06, "loss": 0.725, "step": 3289 }, { "epoch": 1.63249348392702, "grad_norm": 0.13959827562469576, "learning_rate": 3.220683043756402e-06, "loss": 0.7296, "step": 3290 }, { "epoch": 1.6329899466302593, "grad_norm": 0.14159509303723317, "learning_rate": 3.2197471293296297e-06, "loss": 0.7486, "step": 3291 }, { "epoch": 1.6334864093334989, "grad_norm": 0.13293711515989248, "learning_rate": 3.2188111049010496e-06, "loss": 0.7258, "step": 3292 }, { "epoch": 1.6339828720367382, "grad_norm": 0.1399399476886841, "learning_rate": 3.217874970613717e-06, "loss": 0.7762, "step": 3293 }, { "epoch": 1.6344793347399778, "grad_norm": 0.13066912497887148, "learning_rate": 3.216938726610705e-06, "loss": 0.682, "step": 3294 }, { "epoch": 1.634975797443217, "grad_norm": 0.13206917222232278, "learning_rate": 3.2160023730351047e-06, "loss": 0.6876, "step": 3295 }, { "epoch": 1.6354722601464564, "grad_norm": 0.12881195914477797, "learning_rate": 3.2150659100300215e-06, "loss": 0.7009, "step": 3296 }, { "epoch": 1.635968722849696, "grad_norm": 0.1354978102327594, "learning_rate": 3.214129337738579e-06, "loss": 0.7216, "step": 3297 }, { "epoch": 1.6364651855529353, "grad_norm": 0.13272278700868193, "learning_rate": 3.2131926563039184e-06, "loss": 0.7671, "step": 3298 }, { "epoch": 1.6369616482561749, "grad_norm": 0.13268702786871522, "learning_rate": 3.2122558658691943e-06, "loss": 0.7306, "step": 3299 }, { "epoch": 1.6374581109594142, "grad_norm": 0.14693293257842704, "learning_rate": 3.2113189665775812e-06, "loss": 0.7262, "step": 3300 }, { "epoch": 1.6379545736626535, "grad_norm": 0.13647387480975237, "learning_rate": 3.2103819585722696e-06, "loss": 0.7228, "step": 3301 }, { "epoch": 1.638451036365893, "grad_norm": 0.13489368002148763, "learning_rate": 3.2094448419964657e-06, "loss": 0.72, "step": 3302 }, { "epoch": 1.6389474990691324, "grad_norm": 0.1334332663332396, "learning_rate": 3.208507616993393e-06, "loss": 0.6933, "step": 3303 }, { "epoch": 1.639443961772372, "grad_norm": 0.13159985839475669, "learning_rate": 3.2075702837062915e-06, "loss": 0.6915, "step": 3304 }, { "epoch": 1.6399404244756113, "grad_norm": 0.13257761470120857, "learning_rate": 3.2066328422784166e-06, "loss": 0.733, "step": 3305 }, { "epoch": 1.6404368871788506, "grad_norm": 0.13649241457619013, "learning_rate": 3.2056952928530417e-06, "loss": 0.7765, "step": 3306 }, { "epoch": 1.64093334988209, "grad_norm": 0.12816321098365577, "learning_rate": 3.204757635573456e-06, "loss": 0.6676, "step": 3307 }, { "epoch": 1.6414298125853295, "grad_norm": 0.1357189860674346, "learning_rate": 3.2038198705829658e-06, "loss": 0.7106, "step": 3308 }, { "epoch": 1.641926275288569, "grad_norm": 0.12905046251866562, "learning_rate": 3.202881998024894e-06, "loss": 0.6678, "step": 3309 }, { "epoch": 1.6424227379918084, "grad_norm": 0.13424725400492285, "learning_rate": 3.2019440180425774e-06, "loss": 0.6833, "step": 3310 }, { "epoch": 1.6429192006950477, "grad_norm": 0.1310598204136392, "learning_rate": 3.201005930779374e-06, "loss": 0.7341, "step": 3311 }, { "epoch": 1.643415663398287, "grad_norm": 0.14192929179095823, "learning_rate": 3.2000677363786525e-06, "loss": 0.6982, "step": 3312 }, { "epoch": 1.6439121261015266, "grad_norm": 0.12782211475807093, "learning_rate": 3.199129434983803e-06, "loss": 0.6782, "step": 3313 }, { "epoch": 1.6444085888047661, "grad_norm": 0.13517571601842548, "learning_rate": 3.1981910267382294e-06, "loss": 0.7526, "step": 3314 }, { "epoch": 1.6449050515080055, "grad_norm": 0.13283442693929196, "learning_rate": 3.197252511785351e-06, "loss": 0.7385, "step": 3315 }, { "epoch": 1.6454015142112448, "grad_norm": 0.129952438407659, "learning_rate": 3.196313890268606e-06, "loss": 0.6833, "step": 3316 }, { "epoch": 1.6458979769144841, "grad_norm": 0.13333153615133936, "learning_rate": 3.1953751623314476e-06, "loss": 0.7165, "step": 3317 }, { "epoch": 1.6463944396177237, "grad_norm": 0.139666486016696, "learning_rate": 3.1944363281173445e-06, "loss": 0.7806, "step": 3318 }, { "epoch": 1.6468909023209632, "grad_norm": 0.13280138772226624, "learning_rate": 3.1934973877697843e-06, "loss": 0.7155, "step": 3319 }, { "epoch": 1.6473873650242026, "grad_norm": 0.13553005489923173, "learning_rate": 3.1925583414322677e-06, "loss": 0.7518, "step": 3320 }, { "epoch": 1.647883827727442, "grad_norm": 0.13507802161305021, "learning_rate": 3.191619189248312e-06, "loss": 0.7639, "step": 3321 }, { "epoch": 1.6483802904306812, "grad_norm": 0.13564619513742512, "learning_rate": 3.1906799313614526e-06, "loss": 0.7453, "step": 3322 }, { "epoch": 1.6488767531339208, "grad_norm": 0.13874442942300055, "learning_rate": 3.1897405679152403e-06, "loss": 0.6741, "step": 3323 }, { "epoch": 1.6493732158371603, "grad_norm": 0.14111170898428813, "learning_rate": 3.1888010990532412e-06, "loss": 0.7146, "step": 3324 }, { "epoch": 1.6498696785403997, "grad_norm": 0.1284924018943679, "learning_rate": 3.1878615249190386e-06, "loss": 0.6811, "step": 3325 }, { "epoch": 1.650366141243639, "grad_norm": 0.14995165389634305, "learning_rate": 3.18692184565623e-06, "loss": 0.6953, "step": 3326 }, { "epoch": 1.6508626039468783, "grad_norm": 0.13386502577631518, "learning_rate": 3.1859820614084326e-06, "loss": 0.7294, "step": 3327 }, { "epoch": 1.651359066650118, "grad_norm": 0.13753075751861904, "learning_rate": 3.1850421723192748e-06, "loss": 0.7097, "step": 3328 }, { "epoch": 1.6518555293533574, "grad_norm": 0.1372523640506848, "learning_rate": 3.1841021785324057e-06, "loss": 0.7533, "step": 3329 }, { "epoch": 1.6523519920565968, "grad_norm": 0.13132741612801554, "learning_rate": 3.183162080191487e-06, "loss": 0.7468, "step": 3330 }, { "epoch": 1.652848454759836, "grad_norm": 0.14093059894911422, "learning_rate": 3.1822218774401974e-06, "loss": 0.7054, "step": 3331 }, { "epoch": 1.6533449174630754, "grad_norm": 0.13503393548333384, "learning_rate": 3.1812815704222326e-06, "loss": 0.7092, "step": 3332 }, { "epoch": 1.653841380166315, "grad_norm": 0.13795156477927883, "learning_rate": 3.1803411592813036e-06, "loss": 0.7342, "step": 3333 }, { "epoch": 1.6543378428695545, "grad_norm": 0.14715484138992083, "learning_rate": 3.179400644161137e-06, "loss": 0.7639, "step": 3334 }, { "epoch": 1.6548343055727939, "grad_norm": 0.13505583893408066, "learning_rate": 3.1784600252054752e-06, "loss": 0.7639, "step": 3335 }, { "epoch": 1.6553307682760332, "grad_norm": 0.13398674733013766, "learning_rate": 3.1775193025580774e-06, "loss": 0.7302, "step": 3336 }, { "epoch": 1.6558272309792725, "grad_norm": 0.13122340668034163, "learning_rate": 3.176578476362716e-06, "loss": 0.6994, "step": 3337 }, { "epoch": 1.656323693682512, "grad_norm": 0.1392905054739376, "learning_rate": 3.1756375467631832e-06, "loss": 0.743, "step": 3338 }, { "epoch": 1.6568201563857516, "grad_norm": 0.13237002846348273, "learning_rate": 3.1746965139032846e-06, "loss": 0.7088, "step": 3339 }, { "epoch": 1.657316619088991, "grad_norm": 0.1342680889751068, "learning_rate": 3.1737553779268415e-06, "loss": 0.742, "step": 3340 }, { "epoch": 1.6578130817922303, "grad_norm": 0.13437620364569894, "learning_rate": 3.172814138977692e-06, "loss": 0.7017, "step": 3341 }, { "epoch": 1.6583095444954696, "grad_norm": 0.13152990564641498, "learning_rate": 3.1718727971996893e-06, "loss": 0.6658, "step": 3342 }, { "epoch": 1.6588060071987092, "grad_norm": 0.13578569658737435, "learning_rate": 3.1709313527367025e-06, "loss": 0.7434, "step": 3343 }, { "epoch": 1.6593024699019487, "grad_norm": 0.13282327497087398, "learning_rate": 3.1699898057326156e-06, "loss": 0.684, "step": 3344 }, { "epoch": 1.659798932605188, "grad_norm": 0.1266967797135504, "learning_rate": 3.169048156331329e-06, "loss": 0.7131, "step": 3345 }, { "epoch": 1.6602953953084274, "grad_norm": 0.14302577618653753, "learning_rate": 3.168106404676759e-06, "loss": 0.7456, "step": 3346 }, { "epoch": 1.6607918580116667, "grad_norm": 0.1366996921041571, "learning_rate": 3.167164550912838e-06, "loss": 0.7763, "step": 3347 }, { "epoch": 1.6612883207149063, "grad_norm": 0.13073297302259157, "learning_rate": 3.1662225951835124e-06, "loss": 0.7044, "step": 3348 }, { "epoch": 1.6617847834181458, "grad_norm": 0.12656041564587794, "learning_rate": 3.1652805376327446e-06, "loss": 0.6735, "step": 3349 }, { "epoch": 1.6622812461213852, "grad_norm": 0.13902637524276387, "learning_rate": 3.164338378404514e-06, "loss": 0.7789, "step": 3350 }, { "epoch": 1.6627777088246245, "grad_norm": 0.13433714640026365, "learning_rate": 3.163396117642814e-06, "loss": 0.7237, "step": 3351 }, { "epoch": 1.6632741715278638, "grad_norm": 0.13306259899090125, "learning_rate": 3.162453755491655e-06, "loss": 0.6549, "step": 3352 }, { "epoch": 1.6637706342311034, "grad_norm": 0.14000124931833177, "learning_rate": 3.1615112920950604e-06, "loss": 0.7011, "step": 3353 }, { "epoch": 1.664267096934343, "grad_norm": 0.13277684994304378, "learning_rate": 3.160568727597071e-06, "loss": 0.7056, "step": 3354 }, { "epoch": 1.6647635596375823, "grad_norm": 0.13574274974818243, "learning_rate": 3.159626062141743e-06, "loss": 0.6961, "step": 3355 }, { "epoch": 1.6652600223408216, "grad_norm": 0.13515486910318286, "learning_rate": 3.158683295873148e-06, "loss": 0.7505, "step": 3356 }, { "epoch": 1.665756485044061, "grad_norm": 0.13117466652194615, "learning_rate": 3.157740428935373e-06, "loss": 0.6759, "step": 3357 }, { "epoch": 1.6662529477473005, "grad_norm": 0.13734801985031647, "learning_rate": 3.156797461472518e-06, "loss": 0.7327, "step": 3358 }, { "epoch": 1.66674941045054, "grad_norm": 0.12921730687145758, "learning_rate": 3.155854393628704e-06, "loss": 0.7142, "step": 3359 }, { "epoch": 1.6672458731537794, "grad_norm": 0.13410715617644764, "learning_rate": 3.15491122554806e-06, "loss": 0.6928, "step": 3360 }, { "epoch": 1.6677423358570187, "grad_norm": 0.13115756337495957, "learning_rate": 3.1539679573747366e-06, "loss": 0.7292, "step": 3361 }, { "epoch": 1.668238798560258, "grad_norm": 0.13270364362867004, "learning_rate": 3.1530245892528964e-06, "loss": 0.6981, "step": 3362 }, { "epoch": 1.6687352612634976, "grad_norm": 0.1312122707358505, "learning_rate": 3.1520811213267187e-06, "loss": 0.7039, "step": 3363 }, { "epoch": 1.6692317239667371, "grad_norm": 0.13612808788926317, "learning_rate": 3.151137553740396e-06, "loss": 0.7128, "step": 3364 }, { "epoch": 1.6697281866699765, "grad_norm": 0.1356721052844853, "learning_rate": 3.150193886638139e-06, "loss": 0.7021, "step": 3365 }, { "epoch": 1.6702246493732158, "grad_norm": 0.1334498879831998, "learning_rate": 3.149250120164171e-06, "loss": 0.6959, "step": 3366 }, { "epoch": 1.6707211120764551, "grad_norm": 0.12993420392929775, "learning_rate": 3.148306254462733e-06, "loss": 0.7035, "step": 3367 }, { "epoch": 1.6712175747796947, "grad_norm": 0.12953639610818973, "learning_rate": 3.1473622896780787e-06, "loss": 0.6884, "step": 3368 }, { "epoch": 1.6717140374829342, "grad_norm": 0.13434437759235057, "learning_rate": 3.1464182259544774e-06, "loss": 0.7411, "step": 3369 }, { "epoch": 1.6722105001861736, "grad_norm": 0.1351221908451605, "learning_rate": 3.1454740634362146e-06, "loss": 0.7518, "step": 3370 }, { "epoch": 1.672706962889413, "grad_norm": 0.13151338326848652, "learning_rate": 3.144529802267591e-06, "loss": 0.7119, "step": 3371 }, { "epoch": 1.6732034255926522, "grad_norm": 0.1330736698831485, "learning_rate": 3.1435854425929207e-06, "loss": 0.7364, "step": 3372 }, { "epoch": 1.6736998882958918, "grad_norm": 0.13120786063832604, "learning_rate": 3.142640984556536e-06, "loss": 0.6817, "step": 3373 }, { "epoch": 1.6741963509991313, "grad_norm": 0.13199790092665806, "learning_rate": 3.1416964283027796e-06, "loss": 0.6751, "step": 3374 }, { "epoch": 1.6746928137023707, "grad_norm": 0.13247768384510822, "learning_rate": 3.1407517739760135e-06, "loss": 0.7246, "step": 3375 }, { "epoch": 1.67518927640561, "grad_norm": 0.15212250506043967, "learning_rate": 3.139807021720613e-06, "loss": 0.7784, "step": 3376 }, { "epoch": 1.6756857391088493, "grad_norm": 0.13118756683065375, "learning_rate": 3.1388621716809664e-06, "loss": 0.6729, "step": 3377 }, { "epoch": 1.6761822018120889, "grad_norm": 0.1288392412087169, "learning_rate": 3.137917224001481e-06, "loss": 0.6989, "step": 3378 }, { "epoch": 1.6766786645153284, "grad_norm": 0.1315769406124471, "learning_rate": 3.136972178826576e-06, "loss": 0.6975, "step": 3379 }, { "epoch": 1.6771751272185678, "grad_norm": 0.13147429747221198, "learning_rate": 3.136027036300687e-06, "loss": 0.733, "step": 3380 }, { "epoch": 1.677671589921807, "grad_norm": 0.12989489332738152, "learning_rate": 3.1350817965682624e-06, "loss": 0.7395, "step": 3381 }, { "epoch": 1.6781680526250464, "grad_norm": 0.1352912050224614, "learning_rate": 3.1341364597737684e-06, "loss": 0.7523, "step": 3382 }, { "epoch": 1.678664515328286, "grad_norm": 0.13840497131463406, "learning_rate": 3.1331910260616845e-06, "loss": 0.7422, "step": 3383 }, { "epoch": 1.6791609780315255, "grad_norm": 0.132998592069353, "learning_rate": 3.132245495576505e-06, "loss": 0.7312, "step": 3384 }, { "epoch": 1.6796574407347649, "grad_norm": 0.12905667482053407, "learning_rate": 3.1312998684627383e-06, "loss": 0.721, "step": 3385 }, { "epoch": 1.6801539034380042, "grad_norm": 0.1399236151235144, "learning_rate": 3.130354144864909e-06, "loss": 0.733, "step": 3386 }, { "epoch": 1.6806503661412435, "grad_norm": 0.13157662254402422, "learning_rate": 3.1294083249275546e-06, "loss": 0.727, "step": 3387 }, { "epoch": 1.681146828844483, "grad_norm": 0.13230069517151366, "learning_rate": 3.1284624087952307e-06, "loss": 0.7103, "step": 3388 }, { "epoch": 1.6816432915477226, "grad_norm": 0.134244822451022, "learning_rate": 3.1275163966125042e-06, "loss": 0.7265, "step": 3389 }, { "epoch": 1.682139754250962, "grad_norm": 0.134846550069988, "learning_rate": 3.1265702885239573e-06, "loss": 0.6948, "step": 3390 }, { "epoch": 1.6826362169542013, "grad_norm": 0.1312183196880194, "learning_rate": 3.125624084674188e-06, "loss": 0.7065, "step": 3391 }, { "epoch": 1.6831326796574406, "grad_norm": 0.13279036282716666, "learning_rate": 3.12467778520781e-06, "loss": 0.7109, "step": 3392 }, { "epoch": 1.6836291423606802, "grad_norm": 0.13146686432062818, "learning_rate": 3.1237313902694467e-06, "loss": 0.6808, "step": 3393 }, { "epoch": 1.6841256050639197, "grad_norm": 0.13576130928931848, "learning_rate": 3.122784900003742e-06, "loss": 0.7479, "step": 3394 }, { "epoch": 1.684622067767159, "grad_norm": 0.1337831193034485, "learning_rate": 3.121838314555351e-06, "loss": 0.8127, "step": 3395 }, { "epoch": 1.6851185304703984, "grad_norm": 0.1388930824488273, "learning_rate": 3.120891634068944e-06, "loss": 0.7343, "step": 3396 }, { "epoch": 1.6856149931736377, "grad_norm": 0.13614435991184165, "learning_rate": 3.1199448586892056e-06, "loss": 0.7719, "step": 3397 }, { "epoch": 1.6861114558768773, "grad_norm": 0.1361801580234787, "learning_rate": 3.1189979885608358e-06, "loss": 0.7374, "step": 3398 }, { "epoch": 1.6866079185801168, "grad_norm": 0.13142547998982623, "learning_rate": 3.118051023828548e-06, "loss": 0.738, "step": 3399 }, { "epoch": 1.6871043812833562, "grad_norm": 0.1357277571483877, "learning_rate": 3.1171039646370714e-06, "loss": 0.7191, "step": 3400 }, { "epoch": 1.6876008439865955, "grad_norm": 0.12800485874343553, "learning_rate": 3.1161568111311487e-06, "loss": 0.7144, "step": 3401 }, { "epoch": 1.6880973066898348, "grad_norm": 0.12514438054459623, "learning_rate": 3.115209563455536e-06, "loss": 0.6717, "step": 3402 }, { "epoch": 1.6885937693930744, "grad_norm": 0.13334147860764942, "learning_rate": 3.1142622217550054e-06, "loss": 0.7519, "step": 3403 }, { "epoch": 1.689090232096314, "grad_norm": 0.13393223913614705, "learning_rate": 3.1133147861743435e-06, "loss": 0.6909, "step": 3404 }, { "epoch": 1.6895866947995533, "grad_norm": 0.1430945896299099, "learning_rate": 3.112367256858351e-06, "loss": 0.7288, "step": 3405 }, { "epoch": 1.6900831575027926, "grad_norm": 0.13874258590546748, "learning_rate": 3.111419633951841e-06, "loss": 0.7006, "step": 3406 }, { "epoch": 1.690579620206032, "grad_norm": 0.12943087282029672, "learning_rate": 3.110471917599643e-06, "loss": 0.6984, "step": 3407 }, { "epoch": 1.6910760829092715, "grad_norm": 0.12968650358300085, "learning_rate": 3.109524107946602e-06, "loss": 0.6686, "step": 3408 }, { "epoch": 1.691572545612511, "grad_norm": 0.1273962713358661, "learning_rate": 3.1085762051375727e-06, "loss": 0.6858, "step": 3409 }, { "epoch": 1.6920690083157504, "grad_norm": 0.13601298863571853, "learning_rate": 3.107628209317429e-06, "loss": 0.7531, "step": 3410 }, { "epoch": 1.6925654710189897, "grad_norm": 0.13179660551493383, "learning_rate": 3.1066801206310558e-06, "loss": 0.7169, "step": 3411 }, { "epoch": 1.693061933722229, "grad_norm": 0.1262797410728671, "learning_rate": 3.105731939223354e-06, "loss": 0.6924, "step": 3412 }, { "epoch": 1.6935583964254686, "grad_norm": 0.12881341815960923, "learning_rate": 3.1047836652392367e-06, "loss": 0.704, "step": 3413 }, { "epoch": 1.694054859128708, "grad_norm": 0.13333244805830557, "learning_rate": 3.103835298823633e-06, "loss": 0.7076, "step": 3414 }, { "epoch": 1.6945513218319475, "grad_norm": 0.13343352452172727, "learning_rate": 3.1028868401214862e-06, "loss": 0.7251, "step": 3415 }, { "epoch": 1.6950477845351868, "grad_norm": 0.1310649697161129, "learning_rate": 3.101938289277753e-06, "loss": 0.7048, "step": 3416 }, { "epoch": 1.6955442472384261, "grad_norm": 0.13399637642806747, "learning_rate": 3.1009896464374027e-06, "loss": 0.7047, "step": 3417 }, { "epoch": 1.6960407099416657, "grad_norm": 0.14811556928768213, "learning_rate": 3.100040911745421e-06, "loss": 0.7407, "step": 3418 }, { "epoch": 1.696537172644905, "grad_norm": 0.13730816941238383, "learning_rate": 3.0990920853468076e-06, "loss": 0.7355, "step": 3419 }, { "epoch": 1.6970336353481446, "grad_norm": 0.13592416626522716, "learning_rate": 3.098143167386574e-06, "loss": 0.672, "step": 3420 }, { "epoch": 1.6975300980513839, "grad_norm": 0.13560020957046776, "learning_rate": 3.0971941580097476e-06, "loss": 0.7311, "step": 3421 }, { "epoch": 1.6980265607546232, "grad_norm": 0.12963432619444307, "learning_rate": 3.0962450573613705e-06, "loss": 0.7721, "step": 3422 }, { "epoch": 1.6985230234578628, "grad_norm": 0.13067867842531036, "learning_rate": 3.0952958655864957e-06, "loss": 0.7049, "step": 3423 }, { "epoch": 1.699019486161102, "grad_norm": 0.13236415877160046, "learning_rate": 3.0943465828301935e-06, "loss": 0.7503, "step": 3424 }, { "epoch": 1.6995159488643417, "grad_norm": 0.1347895104957468, "learning_rate": 3.0933972092375447e-06, "loss": 0.7277, "step": 3425 }, { "epoch": 1.700012411567581, "grad_norm": 0.13020223927968744, "learning_rate": 3.0924477449536467e-06, "loss": 0.6961, "step": 3426 }, { "epoch": 1.7005088742708203, "grad_norm": 0.12869798181832637, "learning_rate": 3.0914981901236113e-06, "loss": 0.7275, "step": 3427 }, { "epoch": 1.7010053369740599, "grad_norm": 0.1285881035433599, "learning_rate": 3.0905485448925603e-06, "loss": 0.6905, "step": 3428 }, { "epoch": 1.7015017996772992, "grad_norm": 0.13590269430618224, "learning_rate": 3.0895988094056333e-06, "loss": 0.7392, "step": 3429 }, { "epoch": 1.7019982623805388, "grad_norm": 0.1321536527714624, "learning_rate": 3.088648983807982e-06, "loss": 0.7341, "step": 3430 }, { "epoch": 1.702494725083778, "grad_norm": 0.13823633479008848, "learning_rate": 3.087699068244771e-06, "loss": 0.7409, "step": 3431 }, { "epoch": 1.7029911877870174, "grad_norm": 0.1342374430839832, "learning_rate": 3.0867490628611817e-06, "loss": 0.7252, "step": 3432 }, { "epoch": 1.703487650490257, "grad_norm": 0.13031168815094007, "learning_rate": 3.0857989678024057e-06, "loss": 0.7556, "step": 3433 }, { "epoch": 1.7039841131934963, "grad_norm": 0.132695870959402, "learning_rate": 3.08484878321365e-06, "loss": 0.7524, "step": 3434 }, { "epoch": 1.7044805758967358, "grad_norm": 0.13160792019629425, "learning_rate": 3.083898509240134e-06, "loss": 0.6997, "step": 3435 }, { "epoch": 1.7049770385999752, "grad_norm": 0.13528254474745396, "learning_rate": 3.0829481460270937e-06, "loss": 0.7547, "step": 3436 }, { "epoch": 1.7054735013032145, "grad_norm": 0.12969198957971417, "learning_rate": 3.0819976937197767e-06, "loss": 0.6953, "step": 3437 }, { "epoch": 1.705969964006454, "grad_norm": 0.13427548997210034, "learning_rate": 3.0810471524634432e-06, "loss": 0.7189, "step": 3438 }, { "epoch": 1.7064664267096934, "grad_norm": 0.1380792625189435, "learning_rate": 3.080096522403369e-06, "loss": 0.757, "step": 3439 }, { "epoch": 1.706962889412933, "grad_norm": 0.12798109709025035, "learning_rate": 3.079145803684843e-06, "loss": 0.731, "step": 3440 }, { "epoch": 1.7074593521161723, "grad_norm": 0.13775152334069862, "learning_rate": 3.078194996453166e-06, "loss": 0.7227, "step": 3441 }, { "epoch": 1.7079558148194116, "grad_norm": 0.15389555123946358, "learning_rate": 3.0772441008536545e-06, "loss": 0.7067, "step": 3442 }, { "epoch": 1.7084522775226512, "grad_norm": 0.13139328016325508, "learning_rate": 3.0762931170316384e-06, "loss": 0.7064, "step": 3443 }, { "epoch": 1.7089487402258905, "grad_norm": 0.14093931277261854, "learning_rate": 3.075342045132459e-06, "loss": 0.7493, "step": 3444 }, { "epoch": 1.70944520292913, "grad_norm": 0.1339425299662374, "learning_rate": 3.0743908853014726e-06, "loss": 0.7333, "step": 3445 }, { "epoch": 1.7099416656323694, "grad_norm": 0.13334111715937227, "learning_rate": 3.073439637684049e-06, "loss": 0.6911, "step": 3446 }, { "epoch": 1.7104381283356087, "grad_norm": 0.13485035304461898, "learning_rate": 3.0724883024255713e-06, "loss": 0.7105, "step": 3447 }, { "epoch": 1.710934591038848, "grad_norm": 0.13875744459588266, "learning_rate": 3.071536879671436e-06, "loss": 0.6832, "step": 3448 }, { "epoch": 1.7114310537420876, "grad_norm": 0.12935014733112105, "learning_rate": 3.0705853695670535e-06, "loss": 0.6948, "step": 3449 }, { "epoch": 1.7119275164453271, "grad_norm": 0.13892908729449366, "learning_rate": 3.0696337722578444e-06, "loss": 0.7187, "step": 3450 }, { "epoch": 1.7124239791485665, "grad_norm": 0.1311918745816494, "learning_rate": 3.0686820878892472e-06, "loss": 0.698, "step": 3451 }, { "epoch": 1.7129204418518058, "grad_norm": 0.13310007177839275, "learning_rate": 3.067730316606711e-06, "loss": 0.7454, "step": 3452 }, { "epoch": 1.7134169045550451, "grad_norm": 0.1407962461572666, "learning_rate": 3.066778458555698e-06, "loss": 0.7591, "step": 3453 }, { "epoch": 1.7139133672582847, "grad_norm": 0.13091646981705454, "learning_rate": 3.065826513881686e-06, "loss": 0.7167, "step": 3454 }, { "epoch": 1.7144098299615242, "grad_norm": 0.13923461390544403, "learning_rate": 3.0648744827301636e-06, "loss": 0.7779, "step": 3455 }, { "epoch": 1.7149062926647636, "grad_norm": 0.1344779145205169, "learning_rate": 3.0639223652466337e-06, "loss": 0.7465, "step": 3456 }, { "epoch": 1.715402755368003, "grad_norm": 0.12825800482719335, "learning_rate": 3.0629701615766116e-06, "loss": 0.6914, "step": 3457 }, { "epoch": 1.7158992180712422, "grad_norm": 0.13450044556444432, "learning_rate": 3.0620178718656272e-06, "loss": 0.7268, "step": 3458 }, { "epoch": 1.7163956807744818, "grad_norm": 0.13374849327239205, "learning_rate": 3.061065496259222e-06, "loss": 0.6687, "step": 3459 }, { "epoch": 1.7168921434777213, "grad_norm": 0.1350801681751466, "learning_rate": 3.060113034902952e-06, "loss": 0.6999, "step": 3460 }, { "epoch": 1.7173886061809607, "grad_norm": 0.13712402938215143, "learning_rate": 3.0591604879423858e-06, "loss": 0.7099, "step": 3461 }, { "epoch": 1.7178850688842, "grad_norm": 0.1305628583173919, "learning_rate": 3.058207855523104e-06, "loss": 0.6846, "step": 3462 }, { "epoch": 1.7183815315874393, "grad_norm": 0.13419917510098936, "learning_rate": 3.057255137790701e-06, "loss": 0.7718, "step": 3463 }, { "epoch": 1.718877994290679, "grad_norm": 0.13680810835604765, "learning_rate": 3.056302334890786e-06, "loss": 0.7589, "step": 3464 }, { "epoch": 1.7193744569939184, "grad_norm": 0.1336240292540668, "learning_rate": 3.0553494469689792e-06, "loss": 0.6608, "step": 3465 }, { "epoch": 1.7198709196971578, "grad_norm": 0.1396188299452781, "learning_rate": 3.054396474170913e-06, "loss": 0.749, "step": 3466 }, { "epoch": 1.720367382400397, "grad_norm": 0.13274972442712532, "learning_rate": 3.0534434166422346e-06, "loss": 0.7062, "step": 3467 }, { "epoch": 1.7208638451036364, "grad_norm": 0.1381067587560184, "learning_rate": 3.052490274528604e-06, "loss": 0.7976, "step": 3468 }, { "epoch": 1.721360307806876, "grad_norm": 0.1415613666037084, "learning_rate": 3.0515370479756936e-06, "loss": 0.6759, "step": 3469 }, { "epoch": 1.7218567705101155, "grad_norm": 0.136114676951742, "learning_rate": 3.0505837371291885e-06, "loss": 0.6921, "step": 3470 }, { "epoch": 1.7223532332133549, "grad_norm": 0.1356442686679228, "learning_rate": 3.0496303421347872e-06, "loss": 0.6642, "step": 3471 }, { "epoch": 1.7228496959165942, "grad_norm": 0.1376358359049435, "learning_rate": 3.0486768631382012e-06, "loss": 0.7658, "step": 3472 }, { "epoch": 1.7233461586198335, "grad_norm": 0.1392338301585101, "learning_rate": 3.047723300285153e-06, "loss": 0.7241, "step": 3473 }, { "epoch": 1.723842621323073, "grad_norm": 0.1366470624963273, "learning_rate": 3.046769653721381e-06, "loss": 0.6675, "step": 3474 }, { "epoch": 1.7243390840263126, "grad_norm": 0.13307292368981993, "learning_rate": 3.045815923592634e-06, "loss": 0.6673, "step": 3475 }, { "epoch": 1.724835546729552, "grad_norm": 0.1357966319706256, "learning_rate": 3.0448621100446753e-06, "loss": 0.7633, "step": 3476 }, { "epoch": 1.7253320094327913, "grad_norm": 0.14547912410392153, "learning_rate": 3.0439082132232785e-06, "loss": 0.7257, "step": 3477 }, { "epoch": 1.7258284721360306, "grad_norm": 0.13325165665610728, "learning_rate": 3.0429542332742322e-06, "loss": 0.7046, "step": 3478 }, { "epoch": 1.7263249348392702, "grad_norm": 0.1455990009531011, "learning_rate": 3.042000170343337e-06, "loss": 0.7195, "step": 3479 }, { "epoch": 1.7268213975425097, "grad_norm": 0.13234995456293744, "learning_rate": 3.0410460245764066e-06, "loss": 0.6786, "step": 3480 }, { "epoch": 1.727317860245749, "grad_norm": 0.13558320267676813, "learning_rate": 3.0400917961192667e-06, "loss": 0.7187, "step": 3481 }, { "epoch": 1.7278143229489884, "grad_norm": 0.1309367275550789, "learning_rate": 3.0391374851177547e-06, "loss": 0.724, "step": 3482 }, { "epoch": 1.7283107856522277, "grad_norm": 0.12849728447833228, "learning_rate": 3.0381830917177225e-06, "loss": 0.6885, "step": 3483 }, { "epoch": 1.7288072483554673, "grad_norm": 0.13668857277562277, "learning_rate": 3.0372286160650334e-06, "loss": 0.7577, "step": 3484 }, { "epoch": 1.7293037110587068, "grad_norm": 0.13650280495720618, "learning_rate": 3.036274058305565e-06, "loss": 0.7419, "step": 3485 }, { "epoch": 1.7298001737619462, "grad_norm": 0.1294860482203652, "learning_rate": 3.0353194185852052e-06, "loss": 0.7442, "step": 3486 }, { "epoch": 1.7302966364651855, "grad_norm": 0.13841166664415844, "learning_rate": 3.0343646970498554e-06, "loss": 0.7568, "step": 3487 }, { "epoch": 1.7307930991684248, "grad_norm": 0.1293180189453792, "learning_rate": 3.0334098938454293e-06, "loss": 0.7047, "step": 3488 }, { "epoch": 1.7312895618716644, "grad_norm": 0.13258057280123206, "learning_rate": 3.0324550091178536e-06, "loss": 0.7315, "step": 3489 }, { "epoch": 1.731786024574904, "grad_norm": 0.13503628372669735, "learning_rate": 3.031500043013067e-06, "loss": 0.7302, "step": 3490 }, { "epoch": 1.7322824872781433, "grad_norm": 0.13290187674269438, "learning_rate": 3.0305449956770206e-06, "loss": 0.7343, "step": 3491 }, { "epoch": 1.7327789499813826, "grad_norm": 0.13540856252868388, "learning_rate": 3.0295898672556785e-06, "loss": 0.7448, "step": 3492 }, { "epoch": 1.733275412684622, "grad_norm": 0.13564514354413926, "learning_rate": 3.0286346578950165e-06, "loss": 0.703, "step": 3493 }, { "epoch": 1.7337718753878615, "grad_norm": 0.1377806848578677, "learning_rate": 3.0276793677410226e-06, "loss": 0.7445, "step": 3494 }, { "epoch": 1.734268338091101, "grad_norm": 0.1357491981149147, "learning_rate": 3.0267239969396983e-06, "loss": 0.7689, "step": 3495 }, { "epoch": 1.7347648007943404, "grad_norm": 0.1278410172706573, "learning_rate": 3.0257685456370573e-06, "loss": 0.7004, "step": 3496 }, { "epoch": 1.7352612634975797, "grad_norm": 0.1314164181234785, "learning_rate": 3.024813013979123e-06, "loss": 0.6591, "step": 3497 }, { "epoch": 1.735757726200819, "grad_norm": 0.1455338423625045, "learning_rate": 3.0238574021119356e-06, "loss": 0.74, "step": 3498 }, { "epoch": 1.7362541889040586, "grad_norm": 0.1334665037915578, "learning_rate": 3.0229017101815424e-06, "loss": 0.7069, "step": 3499 }, { "epoch": 1.7367506516072981, "grad_norm": 0.13544177303837263, "learning_rate": 3.0219459383340073e-06, "loss": 0.7632, "step": 3500 }, { "epoch": 1.7372471143105375, "grad_norm": 0.13969483295131455, "learning_rate": 3.0209900867154042e-06, "loss": 0.7542, "step": 3501 }, { "epoch": 1.7377435770137768, "grad_norm": 0.13416029469080804, "learning_rate": 3.02003415547182e-06, "loss": 0.6914, "step": 3502 }, { "epoch": 1.7382400397170161, "grad_norm": 0.1342512889947578, "learning_rate": 3.0190781447493526e-06, "loss": 0.7125, "step": 3503 }, { "epoch": 1.7387365024202557, "grad_norm": 0.14318589298841014, "learning_rate": 3.018122054694115e-06, "loss": 0.7625, "step": 3504 }, { "epoch": 1.7392329651234952, "grad_norm": 0.13889004826208431, "learning_rate": 3.0171658854522274e-06, "loss": 0.7114, "step": 3505 }, { "epoch": 1.7397294278267346, "grad_norm": 0.13605943240627727, "learning_rate": 3.016209637169827e-06, "loss": 0.7414, "step": 3506 }, { "epoch": 1.740225890529974, "grad_norm": 0.13235701067519334, "learning_rate": 3.01525330999306e-06, "loss": 0.7208, "step": 3507 }, { "epoch": 1.7407223532332132, "grad_norm": 0.135326876994932, "learning_rate": 3.0142969040680865e-06, "loss": 0.7344, "step": 3508 }, { "epoch": 1.7412188159364528, "grad_norm": 0.13477736579977354, "learning_rate": 3.0133404195410764e-06, "loss": 0.7326, "step": 3509 }, { "epoch": 1.7417152786396923, "grad_norm": 0.13284231516066178, "learning_rate": 3.0123838565582147e-06, "loss": 0.7119, "step": 3510 }, { "epoch": 1.7422117413429317, "grad_norm": 0.13238135868096979, "learning_rate": 3.011427215265696e-06, "loss": 0.7018, "step": 3511 }, { "epoch": 1.742708204046171, "grad_norm": 0.13474513462013873, "learning_rate": 3.0104704958097275e-06, "loss": 0.7405, "step": 3512 }, { "epoch": 1.7432046667494103, "grad_norm": 0.1302603109994265, "learning_rate": 3.009513698336529e-06, "loss": 0.7194, "step": 3513 }, { "epoch": 1.7437011294526499, "grad_norm": 0.13946924173551373, "learning_rate": 3.00855682299233e-06, "loss": 0.7454, "step": 3514 }, { "epoch": 1.7441975921558894, "grad_norm": 0.14022806898060922, "learning_rate": 3.0075998699233757e-06, "loss": 0.7258, "step": 3515 }, { "epoch": 1.7446940548591288, "grad_norm": 0.14276302517939646, "learning_rate": 3.006642839275919e-06, "loss": 0.7353, "step": 3516 }, { "epoch": 1.745190517562368, "grad_norm": 0.13440824467755944, "learning_rate": 3.0056857311962286e-06, "loss": 0.6975, "step": 3517 }, { "epoch": 1.7456869802656074, "grad_norm": 0.13069918155058813, "learning_rate": 3.0047285458305818e-06, "loss": 0.7307, "step": 3518 }, { "epoch": 1.746183442968847, "grad_norm": 0.13013866848398856, "learning_rate": 3.0037712833252696e-06, "loss": 0.7075, "step": 3519 }, { "epoch": 1.7466799056720865, "grad_norm": 0.13588085761716664, "learning_rate": 3.0028139438265946e-06, "loss": 0.7028, "step": 3520 }, { "epoch": 1.7471763683753259, "grad_norm": 0.1379722426469276, "learning_rate": 3.00185652748087e-06, "loss": 0.7558, "step": 3521 }, { "epoch": 1.7476728310785652, "grad_norm": 0.13440744873614507, "learning_rate": 3.000899034434421e-06, "loss": 0.6893, "step": 3522 }, { "epoch": 1.7481692937818045, "grad_norm": 0.13573972148872435, "learning_rate": 2.9999414648335866e-06, "loss": 0.7284, "step": 3523 }, { "epoch": 1.748665756485044, "grad_norm": 0.12921771068329435, "learning_rate": 2.9989838188247157e-06, "loss": 0.7153, "step": 3524 }, { "epoch": 1.7491622191882836, "grad_norm": 0.128812429516214, "learning_rate": 2.998026096554168e-06, "loss": 0.7018, "step": 3525 }, { "epoch": 1.749658681891523, "grad_norm": 0.13351254077010827, "learning_rate": 2.9970682981683165e-06, "loss": 0.7398, "step": 3526 }, { "epoch": 1.7501551445947623, "grad_norm": 0.13254352974014932, "learning_rate": 2.9961104238135457e-06, "loss": 0.7104, "step": 3527 }, { "epoch": 1.7506516072980016, "grad_norm": 0.13040410881238124, "learning_rate": 2.9951524736362513e-06, "loss": 0.6791, "step": 3528 }, { "epoch": 1.7506516072980016, "eval_loss": 0.732168436050415, "eval_runtime": 135.7964, "eval_samples_per_second": 223.519, "eval_steps_per_second": 27.946, "step": 3528 }, { "epoch": 1.7511480700012412, "grad_norm": 0.1454158991301188, "learning_rate": 2.9941944477828405e-06, "loss": 0.729, "step": 3529 }, { "epoch": 1.7516445327044807, "grad_norm": 0.14264344607881654, "learning_rate": 2.9932363463997325e-06, "loss": 0.761, "step": 3530 }, { "epoch": 1.75214099540772, "grad_norm": 0.13191391058050855, "learning_rate": 2.992278169633357e-06, "loss": 0.6719, "step": 3531 }, { "epoch": 1.7526374581109594, "grad_norm": 0.13456024392134855, "learning_rate": 2.9913199176301567e-06, "loss": 0.7191, "step": 3532 }, { "epoch": 1.7531339208141987, "grad_norm": 0.12996595723949278, "learning_rate": 2.990361590536584e-06, "loss": 0.6824, "step": 3533 }, { "epoch": 1.7536303835174383, "grad_norm": 0.1328036765145108, "learning_rate": 2.9894031884991047e-06, "loss": 0.7057, "step": 3534 }, { "epoch": 1.7541268462206778, "grad_norm": 0.13831131399589613, "learning_rate": 2.9884447116641955e-06, "loss": 0.697, "step": 3535 }, { "epoch": 1.7546233089239172, "grad_norm": 0.1421695480209381, "learning_rate": 2.987486160178344e-06, "loss": 0.7423, "step": 3536 }, { "epoch": 1.7551197716271565, "grad_norm": 0.12821462370317024, "learning_rate": 2.9865275341880484e-06, "loss": 0.7165, "step": 3537 }, { "epoch": 1.7556162343303958, "grad_norm": 0.1420145829606162, "learning_rate": 2.9855688338398202e-06, "loss": 0.6948, "step": 3538 }, { "epoch": 1.7561126970336354, "grad_norm": 0.13455255604118072, "learning_rate": 2.9846100592801815e-06, "loss": 0.7232, "step": 3539 }, { "epoch": 1.756609159736875, "grad_norm": 0.13103602177339743, "learning_rate": 2.9836512106556655e-06, "loss": 0.7248, "step": 3540 }, { "epoch": 1.7571056224401143, "grad_norm": 0.1384857291962389, "learning_rate": 2.982692288112816e-06, "loss": 0.7303, "step": 3541 }, { "epoch": 1.7576020851433536, "grad_norm": 0.16056289613315078, "learning_rate": 2.98173329179819e-06, "loss": 0.7049, "step": 3542 }, { "epoch": 1.758098547846593, "grad_norm": 0.1344941324110072, "learning_rate": 2.9807742218583547e-06, "loss": 0.691, "step": 3543 }, { "epoch": 1.7585950105498325, "grad_norm": 0.1354748157954558, "learning_rate": 2.9798150784398885e-06, "loss": 0.7215, "step": 3544 }, { "epoch": 1.759091473253072, "grad_norm": 0.14122504918334078, "learning_rate": 2.9788558616893796e-06, "loss": 0.7167, "step": 3545 }, { "epoch": 1.7595879359563114, "grad_norm": 0.14019815087178897, "learning_rate": 2.9778965717534314e-06, "loss": 0.7125, "step": 3546 }, { "epoch": 1.7600843986595507, "grad_norm": 0.13688281809385325, "learning_rate": 2.976937208778654e-06, "loss": 0.715, "step": 3547 }, { "epoch": 1.76058086136279, "grad_norm": 0.14612296075479175, "learning_rate": 2.975977772911671e-06, "loss": 0.7693, "step": 3548 }, { "epoch": 1.7610773240660296, "grad_norm": 0.13373675241491112, "learning_rate": 2.9750182642991175e-06, "loss": 0.7149, "step": 3549 }, { "epoch": 1.7615737867692691, "grad_norm": 0.13249339511074687, "learning_rate": 2.974058683087639e-06, "loss": 0.7176, "step": 3550 }, { "epoch": 1.7620702494725085, "grad_norm": 0.1445155372590999, "learning_rate": 2.9730990294238914e-06, "loss": 0.7565, "step": 3551 }, { "epoch": 1.7625667121757478, "grad_norm": 0.1301812819229513, "learning_rate": 2.972139303454543e-06, "loss": 0.7337, "step": 3552 }, { "epoch": 1.7630631748789871, "grad_norm": 0.13813116954578447, "learning_rate": 2.971179505326272e-06, "loss": 0.7344, "step": 3553 }, { "epoch": 1.7635596375822267, "grad_norm": 0.13748336922899862, "learning_rate": 2.970219635185769e-06, "loss": 0.781, "step": 3554 }, { "epoch": 1.764056100285466, "grad_norm": 0.13434117881943183, "learning_rate": 2.969259693179733e-06, "loss": 0.7186, "step": 3555 }, { "epoch": 1.7645525629887056, "grad_norm": 0.13228896105397034, "learning_rate": 2.968299679454878e-06, "loss": 0.7308, "step": 3556 }, { "epoch": 1.7650490256919449, "grad_norm": 0.1279445994151255, "learning_rate": 2.967339594157925e-06, "loss": 0.7249, "step": 3557 }, { "epoch": 1.7655454883951842, "grad_norm": 0.13875252416949538, "learning_rate": 2.9663794374356082e-06, "loss": 0.7315, "step": 3558 }, { "epoch": 1.7660419510984238, "grad_norm": 0.13828288612620065, "learning_rate": 2.9654192094346716e-06, "loss": 0.6718, "step": 3559 }, { "epoch": 1.766538413801663, "grad_norm": 0.13503162566112942, "learning_rate": 2.9644589103018728e-06, "loss": 0.7089, "step": 3560 }, { "epoch": 1.7670348765049027, "grad_norm": 0.12961214402076365, "learning_rate": 2.9634985401839754e-06, "loss": 0.7265, "step": 3561 }, { "epoch": 1.767531339208142, "grad_norm": 0.1376599301401424, "learning_rate": 2.9625380992277585e-06, "loss": 0.7361, "step": 3562 }, { "epoch": 1.7680278019113813, "grad_norm": 0.1315003737365444, "learning_rate": 2.961577587580009e-06, "loss": 0.7222, "step": 3563 }, { "epoch": 1.7685242646146209, "grad_norm": 0.13997334248150592, "learning_rate": 2.960617005387526e-06, "loss": 0.7678, "step": 3564 }, { "epoch": 1.7690207273178602, "grad_norm": 0.1377189418395456, "learning_rate": 2.9596563527971197e-06, "loss": 0.7488, "step": 3565 }, { "epoch": 1.7695171900210998, "grad_norm": 0.13949649187035196, "learning_rate": 2.9586956299556093e-06, "loss": 0.7356, "step": 3566 }, { "epoch": 1.770013652724339, "grad_norm": 0.13072149028094537, "learning_rate": 2.957734837009827e-06, "loss": 0.7439, "step": 3567 }, { "epoch": 1.7705101154275784, "grad_norm": 0.13314492915960163, "learning_rate": 2.9567739741066135e-06, "loss": 0.7131, "step": 3568 }, { "epoch": 1.771006578130818, "grad_norm": 0.138259104473977, "learning_rate": 2.955813041392822e-06, "loss": 0.7083, "step": 3569 }, { "epoch": 1.7715030408340573, "grad_norm": 0.13460619412681396, "learning_rate": 2.9548520390153157e-06, "loss": 0.7247, "step": 3570 }, { "epoch": 1.7719995035372968, "grad_norm": 0.13188160151808176, "learning_rate": 2.9538909671209683e-06, "loss": 0.7327, "step": 3571 }, { "epoch": 1.7724959662405362, "grad_norm": 0.13915943835734876, "learning_rate": 2.952929825856664e-06, "loss": 0.7245, "step": 3572 }, { "epoch": 1.7729924289437755, "grad_norm": 0.13113442999553518, "learning_rate": 2.9519686153692984e-06, "loss": 0.6766, "step": 3573 }, { "epoch": 1.773488891647015, "grad_norm": 0.15563993017589076, "learning_rate": 2.9510073358057763e-06, "loss": 0.778, "step": 3574 }, { "epoch": 1.7739853543502544, "grad_norm": 0.13605674921389552, "learning_rate": 2.950045987313014e-06, "loss": 0.7889, "step": 3575 }, { "epoch": 1.774481817053494, "grad_norm": 0.13255856624671072, "learning_rate": 2.949084570037939e-06, "loss": 0.7268, "step": 3576 }, { "epoch": 1.7749782797567333, "grad_norm": 0.1425167455509219, "learning_rate": 2.948123084127488e-06, "loss": 0.779, "step": 3577 }, { "epoch": 1.7754747424599726, "grad_norm": 0.13366479508025797, "learning_rate": 2.947161529728609e-06, "loss": 0.6817, "step": 3578 }, { "epoch": 1.7759712051632122, "grad_norm": 0.13634858914868714, "learning_rate": 2.946199906988259e-06, "loss": 0.7171, "step": 3579 }, { "epoch": 1.7764676678664515, "grad_norm": 0.1339820519944257, "learning_rate": 2.9452382160534075e-06, "loss": 0.6727, "step": 3580 }, { "epoch": 1.776964130569691, "grad_norm": 0.13103750913100626, "learning_rate": 2.9442764570710343e-06, "loss": 0.694, "step": 3581 }, { "epoch": 1.7774605932729304, "grad_norm": 0.1313014217590884, "learning_rate": 2.943314630188127e-06, "loss": 0.7572, "step": 3582 }, { "epoch": 1.7779570559761697, "grad_norm": 0.13018315808842246, "learning_rate": 2.942352735551688e-06, "loss": 0.7372, "step": 3583 }, { "epoch": 1.7784535186794093, "grad_norm": 0.1317368490396673, "learning_rate": 2.9413907733087255e-06, "loss": 0.7442, "step": 3584 }, { "epoch": 1.7789499813826486, "grad_norm": 0.14221916855971456, "learning_rate": 2.9404287436062596e-06, "loss": 0.7101, "step": 3585 }, { "epoch": 1.7794464440858881, "grad_norm": 0.13279652857342705, "learning_rate": 2.9394666465913225e-06, "loss": 0.7166, "step": 3586 }, { "epoch": 1.7799429067891275, "grad_norm": 0.13104555294939943, "learning_rate": 2.9385044824109544e-06, "loss": 0.6961, "step": 3587 }, { "epoch": 1.7804393694923668, "grad_norm": 0.13924421190691488, "learning_rate": 2.937542251212207e-06, "loss": 0.7798, "step": 3588 }, { "epoch": 1.7809358321956061, "grad_norm": 0.13257146284425395, "learning_rate": 2.936579953142143e-06, "loss": 0.7222, "step": 3589 }, { "epoch": 1.7814322948988457, "grad_norm": 0.13510842196780357, "learning_rate": 2.935617588347832e-06, "loss": 0.6802, "step": 3590 }, { "epoch": 1.7819287576020852, "grad_norm": 0.1370255691430651, "learning_rate": 2.9346551569763584e-06, "loss": 0.7058, "step": 3591 }, { "epoch": 1.7824252203053246, "grad_norm": 0.13103573445504976, "learning_rate": 2.9336926591748116e-06, "loss": 0.7076, "step": 3592 }, { "epoch": 1.782921683008564, "grad_norm": 0.1332868798807041, "learning_rate": 2.932730095090297e-06, "loss": 0.7525, "step": 3593 }, { "epoch": 1.7834181457118032, "grad_norm": 0.13239816333094517, "learning_rate": 2.931767464869926e-06, "loss": 0.7118, "step": 3594 }, { "epoch": 1.7839146084150428, "grad_norm": 0.13806112422697028, "learning_rate": 2.93080476866082e-06, "loss": 0.6911, "step": 3595 }, { "epoch": 1.7844110711182823, "grad_norm": 0.13143745347123123, "learning_rate": 2.9298420066101123e-06, "loss": 0.7114, "step": 3596 }, { "epoch": 1.7849075338215217, "grad_norm": 0.1303545454261341, "learning_rate": 2.928879178864946e-06, "loss": 0.7026, "step": 3597 }, { "epoch": 1.785403996524761, "grad_norm": 0.13284060964480524, "learning_rate": 2.9279162855724736e-06, "loss": 0.7719, "step": 3598 }, { "epoch": 1.7859004592280003, "grad_norm": 0.13126700923262716, "learning_rate": 2.926953326879859e-06, "loss": 0.745, "step": 3599 }, { "epoch": 1.78639692193124, "grad_norm": 0.13808740540622605, "learning_rate": 2.9259903029342733e-06, "loss": 0.7492, "step": 3600 }, { "epoch": 1.7868933846344794, "grad_norm": 0.13223911487658072, "learning_rate": 2.9250272138829004e-06, "loss": 0.7115, "step": 3601 }, { "epoch": 1.7873898473377188, "grad_norm": 0.13756497219754027, "learning_rate": 2.9240640598729325e-06, "loss": 0.6888, "step": 3602 }, { "epoch": 1.787886310040958, "grad_norm": 0.12838540835989878, "learning_rate": 2.923100841051572e-06, "loss": 0.695, "step": 3603 }, { "epoch": 1.7883827727441974, "grad_norm": 0.1345217155143832, "learning_rate": 2.922137557566032e-06, "loss": 0.7352, "step": 3604 }, { "epoch": 1.788879235447437, "grad_norm": 0.13011116824419314, "learning_rate": 2.9211742095635353e-06, "loss": 0.715, "step": 3605 }, { "epoch": 1.7893756981506765, "grad_norm": 0.13345770601068715, "learning_rate": 2.9202107971913135e-06, "loss": 0.6526, "step": 3606 }, { "epoch": 1.7898721608539159, "grad_norm": 0.13058633473314155, "learning_rate": 2.9192473205966086e-06, "loss": 0.7298, "step": 3607 }, { "epoch": 1.7903686235571552, "grad_norm": 0.13576890050935692, "learning_rate": 2.918283779926673e-06, "loss": 0.7379, "step": 3608 }, { "epoch": 1.7908650862603945, "grad_norm": 0.1357470033215107, "learning_rate": 2.917320175328769e-06, "loss": 0.7684, "step": 3609 }, { "epoch": 1.791361548963634, "grad_norm": 0.12842283154879058, "learning_rate": 2.9163565069501665e-06, "loss": 0.7074, "step": 3610 }, { "epoch": 1.7918580116668736, "grad_norm": 0.1315525180161788, "learning_rate": 2.915392774938148e-06, "loss": 0.7179, "step": 3611 }, { "epoch": 1.792354474370113, "grad_norm": 0.13320229301947734, "learning_rate": 2.914428979440004e-06, "loss": 0.7488, "step": 3612 }, { "epoch": 1.7928509370733523, "grad_norm": 0.17742426766768907, "learning_rate": 2.9134651206030356e-06, "loss": 0.6971, "step": 3613 }, { "epoch": 1.7933473997765916, "grad_norm": 0.13687894210660118, "learning_rate": 2.9125011985745526e-06, "loss": 0.7579, "step": 3614 }, { "epoch": 1.7938438624798312, "grad_norm": 0.14356923426142504, "learning_rate": 2.911537213501876e-06, "loss": 0.8023, "step": 3615 }, { "epoch": 1.7943403251830707, "grad_norm": 0.13255693124637588, "learning_rate": 2.9105731655323345e-06, "loss": 0.7161, "step": 3616 }, { "epoch": 1.79483678788631, "grad_norm": 0.1340919080282845, "learning_rate": 2.9096090548132678e-06, "loss": 0.698, "step": 3617 }, { "epoch": 1.7953332505895494, "grad_norm": 0.13702138442017256, "learning_rate": 2.908644881492024e-06, "loss": 0.7523, "step": 3618 }, { "epoch": 1.7958297132927887, "grad_norm": 0.1353149372278838, "learning_rate": 2.9076806457159628e-06, "loss": 0.7048, "step": 3619 }, { "epoch": 1.7963261759960283, "grad_norm": 0.13799204504298163, "learning_rate": 2.9067163476324513e-06, "loss": 0.7481, "step": 3620 }, { "epoch": 1.7968226386992678, "grad_norm": 0.12790377926586102, "learning_rate": 2.905751987388868e-06, "loss": 0.6927, "step": 3621 }, { "epoch": 1.7973191014025072, "grad_norm": 0.13965087986871189, "learning_rate": 2.904787565132598e-06, "loss": 0.7361, "step": 3622 }, { "epoch": 1.7978155641057465, "grad_norm": 0.1336271147899048, "learning_rate": 2.903823081011039e-06, "loss": 0.7112, "step": 3623 }, { "epoch": 1.7983120268089858, "grad_norm": 0.13734805652624563, "learning_rate": 2.9028585351715977e-06, "loss": 0.7405, "step": 3624 }, { "epoch": 1.7988084895122254, "grad_norm": 0.13407436084230553, "learning_rate": 2.9018939277616887e-06, "loss": 0.7503, "step": 3625 }, { "epoch": 1.799304952215465, "grad_norm": 0.13291972705380428, "learning_rate": 2.9009292589287357e-06, "loss": 0.7089, "step": 3626 }, { "epoch": 1.7998014149187043, "grad_norm": 0.13331187297256578, "learning_rate": 2.899964528820175e-06, "loss": 0.7445, "step": 3627 }, { "epoch": 1.8002978776219436, "grad_norm": 0.13026121253634557, "learning_rate": 2.8989997375834485e-06, "loss": 0.732, "step": 3628 }, { "epoch": 1.800794340325183, "grad_norm": 0.12960005121948162, "learning_rate": 2.8980348853660096e-06, "loss": 0.7023, "step": 3629 }, { "epoch": 1.8012908030284225, "grad_norm": 0.13361582398506516, "learning_rate": 2.89706997231532e-06, "loss": 0.7045, "step": 3630 }, { "epoch": 1.801787265731662, "grad_norm": 0.13469473169085272, "learning_rate": 2.8961049985788524e-06, "loss": 0.7225, "step": 3631 }, { "epoch": 1.8022837284349014, "grad_norm": 0.13319255321122753, "learning_rate": 2.8951399643040867e-06, "loss": 0.7368, "step": 3632 }, { "epoch": 1.8027801911381407, "grad_norm": 0.12936117726098809, "learning_rate": 2.894174869638513e-06, "loss": 0.7553, "step": 3633 }, { "epoch": 1.80327665384138, "grad_norm": 0.12979563347690065, "learning_rate": 2.8932097147296308e-06, "loss": 0.7155, "step": 3634 }, { "epoch": 1.8037731165446196, "grad_norm": 0.1309079661022396, "learning_rate": 2.8922444997249477e-06, "loss": 0.7244, "step": 3635 }, { "epoch": 1.8042695792478591, "grad_norm": 0.1427263829925681, "learning_rate": 2.891279224771982e-06, "loss": 0.7333, "step": 3636 }, { "epoch": 1.8047660419510985, "grad_norm": 0.1407202293176257, "learning_rate": 2.8903138900182615e-06, "loss": 0.7456, "step": 3637 }, { "epoch": 1.8052625046543378, "grad_norm": 0.16815294681624532, "learning_rate": 2.88934849561132e-06, "loss": 0.6916, "step": 3638 }, { "epoch": 1.8057589673575771, "grad_norm": 0.1424105389357212, "learning_rate": 2.8883830416987043e-06, "loss": 0.7531, "step": 3639 }, { "epoch": 1.8062554300608167, "grad_norm": 0.13301046552150622, "learning_rate": 2.887417528427967e-06, "loss": 0.7107, "step": 3640 }, { "epoch": 1.8067518927640562, "grad_norm": 0.13697780218782096, "learning_rate": 2.8864519559466738e-06, "loss": 0.7519, "step": 3641 }, { "epoch": 1.8072483554672956, "grad_norm": 0.13703156295639382, "learning_rate": 2.8854863244023945e-06, "loss": 0.7769, "step": 3642 }, { "epoch": 1.807744818170535, "grad_norm": 0.1322398802236882, "learning_rate": 2.884520633942712e-06, "loss": 0.6999, "step": 3643 }, { "epoch": 1.8082412808737742, "grad_norm": 0.13492661813080623, "learning_rate": 2.8835548847152143e-06, "loss": 0.7147, "step": 3644 }, { "epoch": 1.8087377435770138, "grad_norm": 0.13231443100813342, "learning_rate": 2.8825890768675035e-06, "loss": 0.6931, "step": 3645 }, { "epoch": 1.8092342062802533, "grad_norm": 0.13342027348327531, "learning_rate": 2.8816232105471864e-06, "loss": 0.7234, "step": 3646 }, { "epoch": 1.8097306689834927, "grad_norm": 0.13502995129426054, "learning_rate": 2.8806572859018806e-06, "loss": 0.7344, "step": 3647 }, { "epoch": 1.810227131686732, "grad_norm": 0.13216879878124957, "learning_rate": 2.8796913030792116e-06, "loss": 0.7127, "step": 3648 }, { "epoch": 1.8107235943899713, "grad_norm": 0.1389960465198068, "learning_rate": 2.878725262226816e-06, "loss": 0.7331, "step": 3649 }, { "epoch": 1.8112200570932109, "grad_norm": 0.133130506783979, "learning_rate": 2.8777591634923353e-06, "loss": 0.7085, "step": 3650 }, { "epoch": 1.8117165197964504, "grad_norm": 0.12953910081868406, "learning_rate": 2.8767930070234233e-06, "loss": 0.7059, "step": 3651 }, { "epoch": 1.8122129824996898, "grad_norm": 0.13304246175165207, "learning_rate": 2.8758267929677418e-06, "loss": 0.7506, "step": 3652 }, { "epoch": 1.812709445202929, "grad_norm": 0.13171896309203177, "learning_rate": 2.874860521472962e-06, "loss": 0.704, "step": 3653 }, { "epoch": 1.8132059079061684, "grad_norm": 0.12867716679784078, "learning_rate": 2.873894192686761e-06, "loss": 0.7, "step": 3654 }, { "epoch": 1.813702370609408, "grad_norm": 0.13335540704857535, "learning_rate": 2.8729278067568272e-06, "loss": 0.7251, "step": 3655 }, { "epoch": 1.8141988333126475, "grad_norm": 0.1335222309537191, "learning_rate": 2.871961363830858e-06, "loss": 0.753, "step": 3656 }, { "epoch": 1.8146952960158869, "grad_norm": 0.13606044352762497, "learning_rate": 2.8709948640565582e-06, "loss": 0.7602, "step": 3657 }, { "epoch": 1.8151917587191262, "grad_norm": 0.13248054985778066, "learning_rate": 2.870028307581642e-06, "loss": 0.6717, "step": 3658 }, { "epoch": 1.8156882214223655, "grad_norm": 0.12524368516267864, "learning_rate": 2.8690616945538324e-06, "loss": 0.6575, "step": 3659 }, { "epoch": 1.816184684125605, "grad_norm": 0.12893746506812964, "learning_rate": 2.8680950251208595e-06, "loss": 0.6987, "step": 3660 }, { "epoch": 1.8166811468288446, "grad_norm": 0.13668691367646468, "learning_rate": 2.8671282994304637e-06, "loss": 0.6763, "step": 3661 }, { "epoch": 1.817177609532084, "grad_norm": 0.13337887117478803, "learning_rate": 2.8661615176303944e-06, "loss": 0.721, "step": 3662 }, { "epoch": 1.8176740722353233, "grad_norm": 0.137232017944419, "learning_rate": 2.865194679868408e-06, "loss": 0.7203, "step": 3663 }, { "epoch": 1.8181705349385626, "grad_norm": 0.1283303529499583, "learning_rate": 2.8642277862922702e-06, "loss": 0.7107, "step": 3664 }, { "epoch": 1.8186669976418022, "grad_norm": 0.14113427375297083, "learning_rate": 2.8632608370497555e-06, "loss": 0.7671, "step": 3665 }, { "epoch": 1.8191634603450417, "grad_norm": 0.1337368727892662, "learning_rate": 2.862293832288646e-06, "loss": 0.6931, "step": 3666 }, { "epoch": 1.819659923048281, "grad_norm": 0.1305593466225775, "learning_rate": 2.8613267721567334e-06, "loss": 0.7555, "step": 3667 }, { "epoch": 1.8201563857515204, "grad_norm": 0.13071527307271635, "learning_rate": 2.8603596568018166e-06, "loss": 0.7269, "step": 3668 }, { "epoch": 1.8206528484547597, "grad_norm": 0.13020862347403772, "learning_rate": 2.859392486371705e-06, "loss": 0.7363, "step": 3669 }, { "epoch": 1.8211493111579993, "grad_norm": 0.13695112485546784, "learning_rate": 2.8584252610142133e-06, "loss": 0.763, "step": 3670 }, { "epoch": 1.8216457738612388, "grad_norm": 0.13490142562797158, "learning_rate": 2.8574579808771676e-06, "loss": 0.6992, "step": 3671 }, { "epoch": 1.8221422365644782, "grad_norm": 0.12665466211192675, "learning_rate": 2.8564906461084014e-06, "loss": 0.6498, "step": 3672 }, { "epoch": 1.8226386992677175, "grad_norm": 0.13516985312128005, "learning_rate": 2.855523256855756e-06, "loss": 0.7067, "step": 3673 }, { "epoch": 1.8231351619709568, "grad_norm": 0.1288573931245718, "learning_rate": 2.8545558132670804e-06, "loss": 0.6909, "step": 3674 }, { "epoch": 1.8236316246741964, "grad_norm": 0.12713861994971612, "learning_rate": 2.8535883154902345e-06, "loss": 0.6958, "step": 3675 }, { "epoch": 1.824128087377436, "grad_norm": 0.13381135557162596, "learning_rate": 2.852620763673083e-06, "loss": 0.7299, "step": 3676 }, { "epoch": 1.8246245500806753, "grad_norm": 0.1319129422115045, "learning_rate": 2.851653157963502e-06, "loss": 0.6831, "step": 3677 }, { "epoch": 1.8251210127839146, "grad_norm": 0.1394166318448852, "learning_rate": 2.850685498509374e-06, "loss": 0.7536, "step": 3678 }, { "epoch": 1.825617475487154, "grad_norm": 0.13103025516203937, "learning_rate": 2.84971778545859e-06, "loss": 0.6629, "step": 3679 }, { "epoch": 1.8261139381903935, "grad_norm": 0.1309333120946672, "learning_rate": 2.8487500189590513e-06, "loss": 0.7068, "step": 3680 }, { "epoch": 1.826610400893633, "grad_norm": 0.12983925391000833, "learning_rate": 2.847782199158663e-06, "loss": 0.7237, "step": 3681 }, { "epoch": 1.8271068635968724, "grad_norm": 0.13732026044139703, "learning_rate": 2.8468143262053416e-06, "loss": 0.7156, "step": 3682 }, { "epoch": 1.8276033263001117, "grad_norm": 0.1364540970178744, "learning_rate": 2.8458464002470114e-06, "loss": 0.7403, "step": 3683 }, { "epoch": 1.828099789003351, "grad_norm": 0.13692850761642453, "learning_rate": 2.844878421431604e-06, "loss": 0.6859, "step": 3684 }, { "epoch": 1.8285962517065906, "grad_norm": 0.13088408167168533, "learning_rate": 2.8439103899070595e-06, "loss": 0.6676, "step": 3685 }, { "epoch": 1.8290927144098301, "grad_norm": 0.12989265875764777, "learning_rate": 2.8429423058213267e-06, "loss": 0.706, "step": 3686 }, { "epoch": 1.8295891771130695, "grad_norm": 0.1291389182253454, "learning_rate": 2.8419741693223607e-06, "loss": 0.7156, "step": 3687 }, { "epoch": 1.8300856398163088, "grad_norm": 0.13918286355290396, "learning_rate": 2.8410059805581258e-06, "loss": 0.7665, "step": 3688 }, { "epoch": 1.8305821025195481, "grad_norm": 0.13286704143055944, "learning_rate": 2.840037739676595e-06, "loss": 0.6953, "step": 3689 }, { "epoch": 1.8310785652227877, "grad_norm": 0.13464853923111927, "learning_rate": 2.8390694468257474e-06, "loss": 0.7558, "step": 3690 }, { "epoch": 1.8315750279260272, "grad_norm": 0.1338921743633815, "learning_rate": 2.838101102153572e-06, "loss": 0.7216, "step": 3691 }, { "epoch": 1.8320714906292666, "grad_norm": 0.1332544592794539, "learning_rate": 2.8371327058080634e-06, "loss": 0.7237, "step": 3692 }, { "epoch": 1.8325679533325059, "grad_norm": 0.13859871403436683, "learning_rate": 2.836164257937226e-06, "loss": 0.6647, "step": 3693 }, { "epoch": 1.8330644160357452, "grad_norm": 0.13773496966704152, "learning_rate": 2.8351957586890724e-06, "loss": 0.7558, "step": 3694 }, { "epoch": 1.8335608787389848, "grad_norm": 0.1363040721397413, "learning_rate": 2.8342272082116214e-06, "loss": 0.7176, "step": 3695 }, { "epoch": 1.834057341442224, "grad_norm": 0.13238261911259094, "learning_rate": 2.833258606652901e-06, "loss": 0.7428, "step": 3696 }, { "epoch": 1.8345538041454637, "grad_norm": 0.1289742090498221, "learning_rate": 2.8322899541609457e-06, "loss": 0.7474, "step": 3697 }, { "epoch": 1.835050266848703, "grad_norm": 0.13229223790941982, "learning_rate": 2.8313212508837985e-06, "loss": 0.6888, "step": 3698 }, { "epoch": 1.8355467295519423, "grad_norm": 0.1405977314818644, "learning_rate": 2.83035249696951e-06, "loss": 0.7118, "step": 3699 }, { "epoch": 1.8360431922551819, "grad_norm": 0.1322368134123888, "learning_rate": 2.82938369256614e-06, "loss": 0.7254, "step": 3700 }, { "epoch": 1.8365396549584212, "grad_norm": 0.13239550569763764, "learning_rate": 2.828414837821753e-06, "loss": 0.7031, "step": 3701 }, { "epoch": 1.8370361176616608, "grad_norm": 0.12443941139663224, "learning_rate": 2.827445932884425e-06, "loss": 0.6938, "step": 3702 }, { "epoch": 1.8375325803649, "grad_norm": 0.13533098431151683, "learning_rate": 2.8264769779022355e-06, "loss": 0.715, "step": 3703 }, { "epoch": 1.8380290430681394, "grad_norm": 0.13336648970273562, "learning_rate": 2.8255079730232742e-06, "loss": 0.7551, "step": 3704 }, { "epoch": 1.838525505771379, "grad_norm": 0.12584694627355517, "learning_rate": 2.8245389183956395e-06, "loss": 0.686, "step": 3705 }, { "epoch": 1.8390219684746183, "grad_norm": 0.1345026841457402, "learning_rate": 2.8235698141674338e-06, "loss": 0.7361, "step": 3706 }, { "epoch": 1.8395184311778578, "grad_norm": 0.1303560374635088, "learning_rate": 2.8226006604867705e-06, "loss": 0.7158, "step": 3707 }, { "epoch": 1.8400148938810972, "grad_norm": 0.13250398945920666, "learning_rate": 2.821631457501769e-06, "loss": 0.7667, "step": 3708 }, { "epoch": 1.8405113565843365, "grad_norm": 0.13580387586544665, "learning_rate": 2.820662205360555e-06, "loss": 0.7681, "step": 3709 }, { "epoch": 1.841007819287576, "grad_norm": 0.13301684404493566, "learning_rate": 2.8196929042112652e-06, "loss": 0.7285, "step": 3710 }, { "epoch": 1.8415042819908154, "grad_norm": 0.13473692403172938, "learning_rate": 2.818723554202041e-06, "loss": 0.721, "step": 3711 }, { "epoch": 1.842000744694055, "grad_norm": 0.12587991568709622, "learning_rate": 2.817754155481032e-06, "loss": 0.7165, "step": 3712 }, { "epoch": 1.8424972073972943, "grad_norm": 0.13035348271632663, "learning_rate": 2.816784708196395e-06, "loss": 0.7185, "step": 3713 }, { "epoch": 1.8429936701005336, "grad_norm": 0.15215075104364917, "learning_rate": 2.815815212496294e-06, "loss": 0.763, "step": 3714 }, { "epoch": 1.8434901328037732, "grad_norm": 0.1373937835941121, "learning_rate": 2.8148456685289016e-06, "loss": 0.7423, "step": 3715 }, { "epoch": 1.8439865955070125, "grad_norm": 0.12962189019847445, "learning_rate": 2.813876076442397e-06, "loss": 0.7229, "step": 3716 }, { "epoch": 1.844483058210252, "grad_norm": 0.12465566308505892, "learning_rate": 2.8129064363849674e-06, "loss": 0.664, "step": 3717 }, { "epoch": 1.8449795209134914, "grad_norm": 0.1320992278623956, "learning_rate": 2.811936748504806e-06, "loss": 0.7538, "step": 3718 }, { "epoch": 1.8454759836167307, "grad_norm": 0.1332828335116305, "learning_rate": 2.810967012950113e-06, "loss": 0.7239, "step": 3719 }, { "epoch": 1.8459724463199703, "grad_norm": 0.12797528135715117, "learning_rate": 2.809997229869099e-06, "loss": 0.7122, "step": 3720 }, { "epoch": 1.8464689090232096, "grad_norm": 0.1301117350778891, "learning_rate": 2.8090273994099793e-06, "loss": 0.72, "step": 3721 }, { "epoch": 1.8469653717264491, "grad_norm": 0.1300300122398293, "learning_rate": 2.8080575217209756e-06, "loss": 0.6907, "step": 3722 }, { "epoch": 1.8474618344296885, "grad_norm": 0.1331300565652563, "learning_rate": 2.8070875969503194e-06, "loss": 0.7219, "step": 3723 }, { "epoch": 1.8479582971329278, "grad_norm": 0.13587506563623994, "learning_rate": 2.8061176252462473e-06, "loss": 0.7145, "step": 3724 }, { "epoch": 1.8484547598361674, "grad_norm": 0.12978747829512013, "learning_rate": 2.805147606757005e-06, "loss": 0.7188, "step": 3725 }, { "epoch": 1.8489512225394067, "grad_norm": 0.12938008759831762, "learning_rate": 2.804177541630843e-06, "loss": 0.69, "step": 3726 }, { "epoch": 1.8494476852426462, "grad_norm": 0.13496840351576017, "learning_rate": 2.803207430016021e-06, "loss": 0.7572, "step": 3727 }, { "epoch": 1.8499441479458856, "grad_norm": 0.13064015668880996, "learning_rate": 2.802237272060806e-06, "loss": 0.7055, "step": 3728 }, { "epoch": 1.850440610649125, "grad_norm": 0.1310236785495075, "learning_rate": 2.8012670679134694e-06, "loss": 0.6944, "step": 3729 }, { "epoch": 1.8509370733523642, "grad_norm": 0.13058022532167907, "learning_rate": 2.8002968177222916e-06, "loss": 0.7276, "step": 3730 }, { "epoch": 1.8514335360556038, "grad_norm": 0.1399741355065743, "learning_rate": 2.7993265216355597e-06, "loss": 0.7019, "step": 3731 }, { "epoch": 1.8519299987588433, "grad_norm": 0.1309873590126447, "learning_rate": 2.798356179801569e-06, "loss": 0.7323, "step": 3732 }, { "epoch": 1.8524264614620827, "grad_norm": 0.12823473606854824, "learning_rate": 2.7973857923686192e-06, "loss": 0.6766, "step": 3733 }, { "epoch": 1.852922924165322, "grad_norm": 0.12599315475512168, "learning_rate": 2.7964153594850207e-06, "loss": 0.6611, "step": 3734 }, { "epoch": 1.8534193868685613, "grad_norm": 0.13218616368397323, "learning_rate": 2.7954448812990857e-06, "loss": 0.7666, "step": 3735 }, { "epoch": 1.853915849571801, "grad_norm": 0.13258167830517983, "learning_rate": 2.7944743579591383e-06, "loss": 0.7688, "step": 3736 }, { "epoch": 1.8544123122750404, "grad_norm": 0.13636167071644384, "learning_rate": 2.793503789613507e-06, "loss": 0.7893, "step": 3737 }, { "epoch": 1.8549087749782798, "grad_norm": 0.1353506593944718, "learning_rate": 2.7925331764105272e-06, "loss": 0.718, "step": 3738 }, { "epoch": 1.855405237681519, "grad_norm": 0.13351328738260573, "learning_rate": 2.791562518498542e-06, "loss": 0.72, "step": 3739 }, { "epoch": 1.8559017003847584, "grad_norm": 0.12916911002443757, "learning_rate": 2.7905918160259005e-06, "loss": 0.6834, "step": 3740 }, { "epoch": 1.856398163087998, "grad_norm": 0.13034694869758517, "learning_rate": 2.789621069140959e-06, "loss": 0.7152, "step": 3741 }, { "epoch": 1.8568946257912375, "grad_norm": 0.1358558324112095, "learning_rate": 2.788650277992081e-06, "loss": 0.7477, "step": 3742 }, { "epoch": 1.8573910884944769, "grad_norm": 0.13072120609339014, "learning_rate": 2.7876794427276362e-06, "loss": 0.7296, "step": 3743 }, { "epoch": 1.8578875511977162, "grad_norm": 0.12977265231141502, "learning_rate": 2.786708563496002e-06, "loss": 0.689, "step": 3744 }, { "epoch": 1.8583840139009555, "grad_norm": 0.14176411747850756, "learning_rate": 2.78573764044556e-06, "loss": 0.7371, "step": 3745 }, { "epoch": 1.858880476604195, "grad_norm": 0.13585066242740843, "learning_rate": 2.7847666737247008e-06, "loss": 0.7051, "step": 3746 }, { "epoch": 1.8593769393074346, "grad_norm": 0.12837037510490845, "learning_rate": 2.783795663481822e-06, "loss": 0.7081, "step": 3747 }, { "epoch": 1.859873402010674, "grad_norm": 0.13389138484666943, "learning_rate": 2.7828246098653255e-06, "loss": 0.7607, "step": 3748 }, { "epoch": 1.8603698647139133, "grad_norm": 0.13565169731031765, "learning_rate": 2.781853513023623e-06, "loss": 0.7119, "step": 3749 }, { "epoch": 1.8608663274171526, "grad_norm": 0.13181731856838821, "learning_rate": 2.7808823731051306e-06, "loss": 0.7253, "step": 3750 }, { "epoch": 1.8613627901203922, "grad_norm": 0.1341896017701526, "learning_rate": 2.7799111902582697e-06, "loss": 0.6954, "step": 3751 }, { "epoch": 1.8618592528236317, "grad_norm": 0.13618587231085355, "learning_rate": 2.7789399646314723e-06, "loss": 0.7345, "step": 3752 }, { "epoch": 1.862355715526871, "grad_norm": 0.1369640920886463, "learning_rate": 2.7779686963731738e-06, "loss": 0.7393, "step": 3753 }, { "epoch": 1.8628521782301104, "grad_norm": 0.13390573696400448, "learning_rate": 2.7769973856318167e-06, "loss": 0.725, "step": 3754 }, { "epoch": 1.8633486409333497, "grad_norm": 0.1336592460566132, "learning_rate": 2.7760260325558507e-06, "loss": 0.6977, "step": 3755 }, { "epoch": 1.8638451036365893, "grad_norm": 0.13604007586743042, "learning_rate": 2.7750546372937315e-06, "loss": 0.7183, "step": 3756 }, { "epoch": 1.8643415663398288, "grad_norm": 0.1357385191072151, "learning_rate": 2.774083199993921e-06, "loss": 0.7517, "step": 3757 }, { "epoch": 1.8648380290430682, "grad_norm": 0.1301840034604384, "learning_rate": 2.7731117208048875e-06, "loss": 0.6671, "step": 3758 }, { "epoch": 1.8653344917463075, "grad_norm": 0.13571137517897333, "learning_rate": 2.772140199875107e-06, "loss": 0.7193, "step": 3759 }, { "epoch": 1.8658309544495468, "grad_norm": 0.13744700754172479, "learning_rate": 2.77116863735306e-06, "loss": 0.7301, "step": 3760 }, { "epoch": 1.8663274171527864, "grad_norm": 0.1338974951951887, "learning_rate": 2.7701970333872354e-06, "loss": 0.6994, "step": 3761 }, { "epoch": 1.866823879856026, "grad_norm": 0.12864049194839253, "learning_rate": 2.769225388126126e-06, "loss": 0.693, "step": 3762 }, { "epoch": 1.8673203425592653, "grad_norm": 0.13155044309844505, "learning_rate": 2.7682537017182326e-06, "loss": 0.721, "step": 3763 }, { "epoch": 1.8678168052625046, "grad_norm": 0.1347463327038866, "learning_rate": 2.767281974312062e-06, "loss": 0.712, "step": 3764 }, { "epoch": 1.868313267965744, "grad_norm": 0.12971495960250434, "learning_rate": 2.7663102060561274e-06, "loss": 0.7143, "step": 3765 }, { "epoch": 1.8688097306689835, "grad_norm": 0.13510904366559584, "learning_rate": 2.7653383970989477e-06, "loss": 0.7057, "step": 3766 }, { "epoch": 1.869306193372223, "grad_norm": 0.13401673104563622, "learning_rate": 2.7643665475890484e-06, "loss": 0.6864, "step": 3767 }, { "epoch": 1.8698026560754624, "grad_norm": 0.13210648198821992, "learning_rate": 2.763394657674961e-06, "loss": 0.7038, "step": 3768 }, { "epoch": 1.8702991187787017, "grad_norm": 0.1318377078194905, "learning_rate": 2.762422727505224e-06, "loss": 0.7596, "step": 3769 }, { "epoch": 1.870795581481941, "grad_norm": 0.13025033485200868, "learning_rate": 2.76145075722838e-06, "loss": 0.7144, "step": 3770 }, { "epoch": 1.8712920441851806, "grad_norm": 0.14491613825665298, "learning_rate": 2.76047874699298e-06, "loss": 0.7712, "step": 3771 }, { "epoch": 1.8717885068884201, "grad_norm": 0.13563190298000607, "learning_rate": 2.75950669694758e-06, "loss": 0.7374, "step": 3772 }, { "epoch": 1.8722849695916595, "grad_norm": 0.1320789962394144, "learning_rate": 2.7585346072407422e-06, "loss": 0.7426, "step": 3773 }, { "epoch": 1.8727814322948988, "grad_norm": 0.13057853212026468, "learning_rate": 2.757562478021035e-06, "loss": 0.7262, "step": 3774 }, { "epoch": 1.8732778949981381, "grad_norm": 0.13102963708589455, "learning_rate": 2.756590309437033e-06, "loss": 0.7011, "step": 3775 }, { "epoch": 1.8737743577013777, "grad_norm": 0.13375423324978467, "learning_rate": 2.755618101637315e-06, "loss": 0.718, "step": 3776 }, { "epoch": 1.8742708204046172, "grad_norm": 0.12830373485814348, "learning_rate": 2.754645854770471e-06, "loss": 0.7265, "step": 3777 }, { "epoch": 1.8747672831078566, "grad_norm": 0.13336641643595246, "learning_rate": 2.75367356898509e-06, "loss": 0.6742, "step": 3778 }, { "epoch": 1.875263745811096, "grad_norm": 0.13157972272739404, "learning_rate": 2.7527012444297707e-06, "loss": 0.7321, "step": 3779 }, { "epoch": 1.8757602085143352, "grad_norm": 0.14208051021228643, "learning_rate": 2.751728881253118e-06, "loss": 0.7672, "step": 3780 }, { "epoch": 1.8762566712175748, "grad_norm": 0.1371097076190842, "learning_rate": 2.7507564796037424e-06, "loss": 0.7512, "step": 3781 }, { "epoch": 1.8767531339208143, "grad_norm": 0.13188702665160387, "learning_rate": 2.7497840396302596e-06, "loss": 0.7401, "step": 3782 }, { "epoch": 1.8772495966240537, "grad_norm": 0.13500026817249772, "learning_rate": 2.748811561481291e-06, "loss": 0.7295, "step": 3783 }, { "epoch": 1.877746059327293, "grad_norm": 0.13273560547309662, "learning_rate": 2.7478390453054645e-06, "loss": 0.7283, "step": 3784 }, { "epoch": 1.8782425220305323, "grad_norm": 0.13872675617089372, "learning_rate": 2.746866491251414e-06, "loss": 0.7602, "step": 3785 }, { "epoch": 1.8787389847337719, "grad_norm": 0.12924106818518485, "learning_rate": 2.7458938994677784e-06, "loss": 0.7546, "step": 3786 }, { "epoch": 1.8792354474370114, "grad_norm": 0.136941377040484, "learning_rate": 2.744921270103203e-06, "loss": 0.7579, "step": 3787 }, { "epoch": 1.8797319101402508, "grad_norm": 0.1298041641175948, "learning_rate": 2.743948603306339e-06, "loss": 0.7051, "step": 3788 }, { "epoch": 1.88022837284349, "grad_norm": 0.13832860111903247, "learning_rate": 2.7429758992258416e-06, "loss": 0.7243, "step": 3789 }, { "epoch": 1.8807248355467294, "grad_norm": 0.13402102344704708, "learning_rate": 2.7420031580103736e-06, "loss": 0.7225, "step": 3790 }, { "epoch": 1.881221298249969, "grad_norm": 0.13987575442514208, "learning_rate": 2.7410303798086034e-06, "loss": 0.7264, "step": 3791 }, { "epoch": 1.8817177609532085, "grad_norm": 0.13290550293386685, "learning_rate": 2.7400575647692046e-06, "loss": 0.7677, "step": 3792 }, { "epoch": 1.8822142236564479, "grad_norm": 0.13083520400866375, "learning_rate": 2.739084713040856e-06, "loss": 0.7193, "step": 3793 }, { "epoch": 1.8827106863596872, "grad_norm": 0.1282525167355114, "learning_rate": 2.7381118247722427e-06, "loss": 0.6802, "step": 3794 }, { "epoch": 1.8832071490629265, "grad_norm": 0.13627486143137021, "learning_rate": 2.7371389001120545e-06, "loss": 0.722, "step": 3795 }, { "epoch": 1.883703611766166, "grad_norm": 0.1277896117884158, "learning_rate": 2.736165939208987e-06, "loss": 0.6816, "step": 3796 }, { "epoch": 1.8842000744694056, "grad_norm": 0.12853176516695636, "learning_rate": 2.735192942211743e-06, "loss": 0.7553, "step": 3797 }, { "epoch": 1.884696537172645, "grad_norm": 0.13015691759857087, "learning_rate": 2.7342199092690284e-06, "loss": 0.7142, "step": 3798 }, { "epoch": 1.8851929998758843, "grad_norm": 0.13097419516180478, "learning_rate": 2.733246840529557e-06, "loss": 0.7132, "step": 3799 }, { "epoch": 1.8856894625791236, "grad_norm": 0.12724020311608158, "learning_rate": 2.7322737361420454e-06, "loss": 0.6748, "step": 3800 }, { "epoch": 1.8861859252823632, "grad_norm": 0.1270428675400565, "learning_rate": 2.7313005962552174e-06, "loss": 0.7363, "step": 3801 }, { "epoch": 1.8866823879856027, "grad_norm": 0.13592361098777284, "learning_rate": 2.7303274210178023e-06, "loss": 0.7309, "step": 3802 }, { "epoch": 1.887178850688842, "grad_norm": 0.13079941165628042, "learning_rate": 2.729354210578533e-06, "loss": 0.7352, "step": 3803 }, { "epoch": 1.8876753133920814, "grad_norm": 0.1340097748828976, "learning_rate": 2.7283809650861508e-06, "loss": 0.7285, "step": 3804 }, { "epoch": 1.8881717760953207, "grad_norm": 0.13773420298973918, "learning_rate": 2.727407684689399e-06, "loss": 0.7649, "step": 3805 }, { "epoch": 1.8886682387985603, "grad_norm": 0.133353759836882, "learning_rate": 2.7264343695370294e-06, "loss": 0.7464, "step": 3806 }, { "epoch": 1.8891647015017998, "grad_norm": 0.131033603827758, "learning_rate": 2.725461019777797e-06, "loss": 0.6845, "step": 3807 }, { "epoch": 1.8896611642050392, "grad_norm": 0.15770720974803792, "learning_rate": 2.7244876355604627e-06, "loss": 0.7314, "step": 3808 }, { "epoch": 1.8901576269082785, "grad_norm": 0.1304863297812739, "learning_rate": 2.723514217033793e-06, "loss": 0.7471, "step": 3809 }, { "epoch": 1.8906540896115178, "grad_norm": 0.13132658095809105, "learning_rate": 2.722540764346559e-06, "loss": 0.7382, "step": 3810 }, { "epoch": 1.8911505523147574, "grad_norm": 0.13948668806141548, "learning_rate": 2.7215672776475373e-06, "loss": 0.7345, "step": 3811 }, { "epoch": 1.891647015017997, "grad_norm": 0.14071216008468773, "learning_rate": 2.720593757085509e-06, "loss": 0.7849, "step": 3812 }, { "epoch": 1.8921434777212363, "grad_norm": 0.1328645102350295, "learning_rate": 2.719620202809262e-06, "loss": 0.7345, "step": 3813 }, { "epoch": 1.8926399404244756, "grad_norm": 0.1323492318218001, "learning_rate": 2.718646614967589e-06, "loss": 0.7121, "step": 3814 }, { "epoch": 1.893136403127715, "grad_norm": 0.13311000745662738, "learning_rate": 2.7176729937092868e-06, "loss": 0.6856, "step": 3815 }, { "epoch": 1.8936328658309545, "grad_norm": 0.14096801189996547, "learning_rate": 2.716699339183157e-06, "loss": 0.6663, "step": 3816 }, { "epoch": 1.894129328534194, "grad_norm": 0.13592293501407796, "learning_rate": 2.7157256515380075e-06, "loss": 0.7389, "step": 3817 }, { "epoch": 1.8946257912374334, "grad_norm": 0.13440467247038096, "learning_rate": 2.7147519309226524e-06, "loss": 0.7136, "step": 3818 }, { "epoch": 1.8951222539406727, "grad_norm": 0.13477075791277746, "learning_rate": 2.713778177485906e-06, "loss": 0.7187, "step": 3819 }, { "epoch": 1.895618716643912, "grad_norm": 0.1273414392999846, "learning_rate": 2.712804391376594e-06, "loss": 0.742, "step": 3820 }, { "epoch": 1.8961151793471516, "grad_norm": 0.13460725669842077, "learning_rate": 2.7118305727435433e-06, "loss": 0.7034, "step": 3821 }, { "epoch": 1.8966116420503911, "grad_norm": 0.13175344309138737, "learning_rate": 2.710856721735585e-06, "loss": 0.6716, "step": 3822 }, { "epoch": 1.8971081047536305, "grad_norm": 0.12573529615472806, "learning_rate": 2.709882838501558e-06, "loss": 0.6647, "step": 3823 }, { "epoch": 1.8976045674568698, "grad_norm": 0.1464775291640466, "learning_rate": 2.7089089231903045e-06, "loss": 0.7118, "step": 3824 }, { "epoch": 1.8981010301601091, "grad_norm": 0.14724481437959286, "learning_rate": 2.707934975950672e-06, "loss": 0.7267, "step": 3825 }, { "epoch": 1.8985974928633487, "grad_norm": 0.12701179330406961, "learning_rate": 2.706960996931512e-06, "loss": 0.7076, "step": 3826 }, { "epoch": 1.8990939555665882, "grad_norm": 0.13539179556079792, "learning_rate": 2.7059869862816817e-06, "loss": 0.7247, "step": 3827 }, { "epoch": 1.8995904182698276, "grad_norm": 0.13682853094889003, "learning_rate": 2.7050129441500437e-06, "loss": 0.7146, "step": 3828 }, { "epoch": 1.9000868809730669, "grad_norm": 0.13008454352233842, "learning_rate": 2.7040388706854636e-06, "loss": 0.7115, "step": 3829 }, { "epoch": 1.9005833436763062, "grad_norm": 0.13324370142235686, "learning_rate": 2.703064766036814e-06, "loss": 0.6941, "step": 3830 }, { "epoch": 1.9010798063795458, "grad_norm": 0.1379864973843644, "learning_rate": 2.7020906303529722e-06, "loss": 0.7258, "step": 3831 }, { "epoch": 1.9015762690827853, "grad_norm": 0.13430922945541224, "learning_rate": 2.701116463782816e-06, "loss": 0.717, "step": 3832 }, { "epoch": 1.9020727317860247, "grad_norm": 0.13535178628287625, "learning_rate": 2.7001422664752338e-06, "loss": 0.7187, "step": 3833 }, { "epoch": 1.902569194489264, "grad_norm": 0.13401050040469997, "learning_rate": 2.6991680385791154e-06, "loss": 0.6976, "step": 3834 }, { "epoch": 1.9030656571925033, "grad_norm": 0.13110828507944752, "learning_rate": 2.698193780243355e-06, "loss": 0.7308, "step": 3835 }, { "epoch": 1.9035621198957429, "grad_norm": 0.1330174198944991, "learning_rate": 2.6972194916168533e-06, "loss": 0.68, "step": 3836 }, { "epoch": 1.9040585825989822, "grad_norm": 0.129123889581125, "learning_rate": 2.696245172848515e-06, "loss": 0.6816, "step": 3837 }, { "epoch": 1.9045550453022217, "grad_norm": 0.1323170000848133, "learning_rate": 2.6952708240872477e-06, "loss": 0.74, "step": 3838 }, { "epoch": 1.905051508005461, "grad_norm": 0.1281877570650443, "learning_rate": 2.6942964454819663e-06, "loss": 0.6716, "step": 3839 }, { "epoch": 1.9055479707087004, "grad_norm": 0.1273111925333337, "learning_rate": 2.693322037181588e-06, "loss": 0.7161, "step": 3840 }, { "epoch": 1.90604443341194, "grad_norm": 0.13411942656602813, "learning_rate": 2.692347599335037e-06, "loss": 0.7475, "step": 3841 }, { "epoch": 1.9065408961151793, "grad_norm": 0.13465714368570397, "learning_rate": 2.69137313209124e-06, "loss": 0.6818, "step": 3842 }, { "epoch": 1.9070373588184188, "grad_norm": 0.13693810595779668, "learning_rate": 2.6903986355991267e-06, "loss": 0.7217, "step": 3843 }, { "epoch": 1.9075338215216582, "grad_norm": 0.13227383220776676, "learning_rate": 2.6894241100076356e-06, "loss": 0.7046, "step": 3844 }, { "epoch": 1.9080302842248975, "grad_norm": 0.13024783516033986, "learning_rate": 2.6884495554657057e-06, "loss": 0.7341, "step": 3845 }, { "epoch": 1.908526746928137, "grad_norm": 0.13571948071256487, "learning_rate": 2.687474972122283e-06, "loss": 0.7179, "step": 3846 }, { "epoch": 1.9090232096313764, "grad_norm": 0.14165402752990086, "learning_rate": 2.6865003601263177e-06, "loss": 0.7488, "step": 3847 }, { "epoch": 1.909519672334616, "grad_norm": 0.13217107325979766, "learning_rate": 2.685525719626762e-06, "loss": 0.7316, "step": 3848 }, { "epoch": 1.9100161350378553, "grad_norm": 0.12981859327701908, "learning_rate": 2.6845510507725747e-06, "loss": 0.6838, "step": 3849 }, { "epoch": 1.9105125977410946, "grad_norm": 0.13278332867245013, "learning_rate": 2.6835763537127186e-06, "loss": 0.698, "step": 3850 }, { "epoch": 1.9110090604443342, "grad_norm": 0.12972846902968782, "learning_rate": 2.68260162859616e-06, "loss": 0.6937, "step": 3851 }, { "epoch": 1.9115055231475735, "grad_norm": 0.13499670331712021, "learning_rate": 2.68162687557187e-06, "loss": 0.7544, "step": 3852 }, { "epoch": 1.912001985850813, "grad_norm": 0.13281988396086394, "learning_rate": 2.680652094788825e-06, "loss": 0.7012, "step": 3853 }, { "epoch": 1.9124984485540524, "grad_norm": 0.13640084140042943, "learning_rate": 2.679677286396003e-06, "loss": 0.7506, "step": 3854 }, { "epoch": 1.9129949112572917, "grad_norm": 0.1385667059889805, "learning_rate": 2.678702450542389e-06, "loss": 0.7315, "step": 3855 }, { "epoch": 1.9134913739605313, "grad_norm": 0.1340058683822914, "learning_rate": 2.6777275873769703e-06, "loss": 0.7702, "step": 3856 }, { "epoch": 1.9139878366637706, "grad_norm": 0.13229770495593288, "learning_rate": 2.67675269704874e-06, "loss": 0.688, "step": 3857 }, { "epoch": 1.9144842993670101, "grad_norm": 0.13565711456891463, "learning_rate": 2.6757777797066947e-06, "loss": 0.7322, "step": 3858 }, { "epoch": 1.9149807620702495, "grad_norm": 0.12998843981576316, "learning_rate": 2.6748028354998333e-06, "loss": 0.7108, "step": 3859 }, { "epoch": 1.9154772247734888, "grad_norm": 0.13608274463655146, "learning_rate": 2.6738278645771615e-06, "loss": 0.7161, "step": 3860 }, { "epoch": 1.9159736874767284, "grad_norm": 0.13224537124708974, "learning_rate": 2.6728528670876875e-06, "loss": 0.6993, "step": 3861 }, { "epoch": 1.9164701501799677, "grad_norm": 0.13440646467554213, "learning_rate": 2.6718778431804243e-06, "loss": 0.7041, "step": 3862 }, { "epoch": 1.9169666128832072, "grad_norm": 0.12745811657851805, "learning_rate": 2.670902793004389e-06, "loss": 0.6779, "step": 3863 }, { "epoch": 1.9174630755864466, "grad_norm": 0.13050712516771976, "learning_rate": 2.6699277167086013e-06, "loss": 0.7166, "step": 3864 }, { "epoch": 1.917959538289686, "grad_norm": 0.12713091659389295, "learning_rate": 2.668952614442087e-06, "loss": 0.7017, "step": 3865 }, { "epoch": 1.9184560009929255, "grad_norm": 0.135043325139347, "learning_rate": 2.6679774863538747e-06, "loss": 0.7295, "step": 3866 }, { "epoch": 1.9189524636961648, "grad_norm": 0.13202671430433874, "learning_rate": 2.667002332592997e-06, "loss": 0.7315, "step": 3867 }, { "epoch": 1.9194489263994043, "grad_norm": 0.13505500052720468, "learning_rate": 2.6660271533084895e-06, "loss": 0.7183, "step": 3868 }, { "epoch": 1.9199453891026437, "grad_norm": 0.13089106796090635, "learning_rate": 2.6650519486493955e-06, "loss": 0.7629, "step": 3869 }, { "epoch": 1.920441851805883, "grad_norm": 0.13514599448403475, "learning_rate": 2.664076718764756e-06, "loss": 0.7292, "step": 3870 }, { "epoch": 1.9209383145091226, "grad_norm": 0.13758072791357281, "learning_rate": 2.663101463803621e-06, "loss": 0.7058, "step": 3871 }, { "epoch": 1.9214347772123619, "grad_norm": 0.13451491052620126, "learning_rate": 2.6621261839150426e-06, "loss": 0.764, "step": 3872 }, { "epoch": 1.9219312399156014, "grad_norm": 0.13120349619972196, "learning_rate": 2.6611508792480763e-06, "loss": 0.7192, "step": 3873 }, { "epoch": 1.9224277026188408, "grad_norm": 0.1343506319273604, "learning_rate": 2.6601755499517826e-06, "loss": 0.7515, "step": 3874 }, { "epoch": 1.92292416532208, "grad_norm": 0.13155915623807082, "learning_rate": 2.6592001961752246e-06, "loss": 0.695, "step": 3875 }, { "epoch": 1.9234206280253194, "grad_norm": 0.12537341810788755, "learning_rate": 2.658224818067468e-06, "loss": 0.6914, "step": 3876 }, { "epoch": 1.923917090728559, "grad_norm": 0.1298224456392821, "learning_rate": 2.657249415777585e-06, "loss": 0.7031, "step": 3877 }, { "epoch": 1.9244135534317985, "grad_norm": 0.12831397033755135, "learning_rate": 2.6562739894546507e-06, "loss": 0.6985, "step": 3878 }, { "epoch": 1.9249100161350379, "grad_norm": 0.13463509794079495, "learning_rate": 2.6552985392477424e-06, "loss": 0.7578, "step": 3879 }, { "epoch": 1.9254064788382772, "grad_norm": 0.13466504296387913, "learning_rate": 2.6543230653059427e-06, "loss": 0.7357, "step": 3880 }, { "epoch": 1.9259029415415165, "grad_norm": 0.13590866765903148, "learning_rate": 2.6533475677783364e-06, "loss": 0.7367, "step": 3881 }, { "epoch": 1.926399404244756, "grad_norm": 0.13302016457236643, "learning_rate": 2.652372046814014e-06, "loss": 0.7737, "step": 3882 }, { "epoch": 1.9268958669479956, "grad_norm": 0.13348758747050615, "learning_rate": 2.651396502562067e-06, "loss": 0.8016, "step": 3883 }, { "epoch": 1.927392329651235, "grad_norm": 0.1327244870657118, "learning_rate": 2.6504209351715914e-06, "loss": 0.7173, "step": 3884 }, { "epoch": 1.9278887923544743, "grad_norm": 0.12927521222084867, "learning_rate": 2.6494453447916884e-06, "loss": 0.6832, "step": 3885 }, { "epoch": 1.9283852550577136, "grad_norm": 0.13386451158693466, "learning_rate": 2.6484697315714602e-06, "loss": 0.7336, "step": 3886 }, { "epoch": 1.9288817177609532, "grad_norm": 0.1264106819143579, "learning_rate": 2.6474940956600143e-06, "loss": 0.6716, "step": 3887 }, { "epoch": 1.9293781804641927, "grad_norm": 0.1280344281232393, "learning_rate": 2.64651843720646e-06, "loss": 0.7613, "step": 3888 }, { "epoch": 1.929874643167432, "grad_norm": 0.13685165914605946, "learning_rate": 2.6455427563599128e-06, "loss": 0.7595, "step": 3889 }, { "epoch": 1.9303711058706714, "grad_norm": 0.12802262711212847, "learning_rate": 2.644567053269489e-06, "loss": 0.7105, "step": 3890 }, { "epoch": 1.9308675685739107, "grad_norm": 0.1401396120332125, "learning_rate": 2.643591328084309e-06, "loss": 0.7363, "step": 3891 }, { "epoch": 1.9313640312771503, "grad_norm": 0.13423489071016823, "learning_rate": 2.6426155809534958e-06, "loss": 0.6799, "step": 3892 }, { "epoch": 1.9318604939803898, "grad_norm": 0.125010992787907, "learning_rate": 2.6416398120261782e-06, "loss": 0.6502, "step": 3893 }, { "epoch": 1.9323569566836292, "grad_norm": 0.1378318453962945, "learning_rate": 2.6406640214514866e-06, "loss": 0.7189, "step": 3894 }, { "epoch": 1.9328534193868685, "grad_norm": 0.12766766267445206, "learning_rate": 2.639688209378554e-06, "loss": 0.6806, "step": 3895 }, { "epoch": 1.9333498820901078, "grad_norm": 0.1328404296148707, "learning_rate": 2.63871237595652e-06, "loss": 0.716, "step": 3896 }, { "epoch": 1.9338463447933474, "grad_norm": 0.13020525639573502, "learning_rate": 2.6377365213345217e-06, "loss": 0.6673, "step": 3897 }, { "epoch": 1.934342807496587, "grad_norm": 0.12842653922306596, "learning_rate": 2.6367606456617057e-06, "loss": 0.6998, "step": 3898 }, { "epoch": 1.9348392701998263, "grad_norm": 0.1363023679071453, "learning_rate": 2.6357847490872176e-06, "loss": 0.7224, "step": 3899 }, { "epoch": 1.9353357329030656, "grad_norm": 0.1371628916129143, "learning_rate": 2.634808831760207e-06, "loss": 0.7227, "step": 3900 }, { "epoch": 1.935832195606305, "grad_norm": 0.13510099814120166, "learning_rate": 2.6338328938298287e-06, "loss": 0.7311, "step": 3901 }, { "epoch": 1.9363286583095445, "grad_norm": 0.1298081070212144, "learning_rate": 2.632856935445238e-06, "loss": 0.6655, "step": 3902 }, { "epoch": 1.936825121012784, "grad_norm": 0.1289538644890483, "learning_rate": 2.6318809567555946e-06, "loss": 0.7062, "step": 3903 }, { "epoch": 1.9373215837160234, "grad_norm": 0.13439353563363904, "learning_rate": 2.630904957910062e-06, "loss": 0.6991, "step": 3904 }, { "epoch": 1.9378180464192627, "grad_norm": 0.13576155759044287, "learning_rate": 2.6299289390578054e-06, "loss": 0.7344, "step": 3905 }, { "epoch": 1.938314509122502, "grad_norm": 0.1284387703639709, "learning_rate": 2.628952900347994e-06, "loss": 0.7029, "step": 3906 }, { "epoch": 1.9388109718257416, "grad_norm": 0.12937400679523783, "learning_rate": 2.6279768419297997e-06, "loss": 0.6859, "step": 3907 }, { "epoch": 1.9393074345289811, "grad_norm": 0.1317149945336735, "learning_rate": 2.6270007639523966e-06, "loss": 0.679, "step": 3908 }, { "epoch": 1.9398038972322205, "grad_norm": 0.1306880273284504, "learning_rate": 2.6260246665649623e-06, "loss": 0.6934, "step": 3909 }, { "epoch": 1.9403003599354598, "grad_norm": 0.13275523795140065, "learning_rate": 2.625048549916679e-06, "loss": 0.71, "step": 3910 }, { "epoch": 1.9407968226386991, "grad_norm": 0.13269576003308478, "learning_rate": 2.6240724141567296e-06, "loss": 0.7458, "step": 3911 }, { "epoch": 1.9412932853419387, "grad_norm": 0.12913224685659597, "learning_rate": 2.6230962594343018e-06, "loss": 0.639, "step": 3912 }, { "epoch": 1.9417897480451782, "grad_norm": 0.13621237829923233, "learning_rate": 2.622120085898584e-06, "loss": 0.7167, "step": 3913 }, { "epoch": 1.9422862107484176, "grad_norm": 0.13555011416798202, "learning_rate": 2.6211438936987692e-06, "loss": 0.7019, "step": 3914 }, { "epoch": 1.942782673451657, "grad_norm": 0.13015371567538989, "learning_rate": 2.620167682984052e-06, "loss": 0.7581, "step": 3915 }, { "epoch": 1.9432791361548962, "grad_norm": 0.14513175483219928, "learning_rate": 2.6191914539036318e-06, "loss": 0.7588, "step": 3916 }, { "epoch": 1.9437755988581358, "grad_norm": 0.13652288010038582, "learning_rate": 2.6182152066067095e-06, "loss": 0.7355, "step": 3917 }, { "epoch": 1.9442720615613753, "grad_norm": 0.14112063768896954, "learning_rate": 2.6172389412424876e-06, "loss": 0.8433, "step": 3918 }, { "epoch": 1.9447685242646147, "grad_norm": 0.13310261496502976, "learning_rate": 2.616262657960173e-06, "loss": 0.749, "step": 3919 }, { "epoch": 1.945264986967854, "grad_norm": 0.1298763438679353, "learning_rate": 2.6152863569089754e-06, "loss": 0.7002, "step": 3920 }, { "epoch": 1.9457614496710933, "grad_norm": 0.13406943600819382, "learning_rate": 2.614310038238107e-06, "loss": 0.7198, "step": 3921 }, { "epoch": 1.9462579123743329, "grad_norm": 0.1388915294171547, "learning_rate": 2.613333702096782e-06, "loss": 0.732, "step": 3922 }, { "epoch": 1.9467543750775724, "grad_norm": 0.13098745963258238, "learning_rate": 2.6123573486342185e-06, "loss": 0.694, "step": 3923 }, { "epoch": 1.9472508377808118, "grad_norm": 0.13673305400029112, "learning_rate": 2.6113809779996344e-06, "loss": 0.709, "step": 3924 }, { "epoch": 1.947747300484051, "grad_norm": 0.13672731547591427, "learning_rate": 2.610404590342254e-06, "loss": 0.7119, "step": 3925 }, { "epoch": 1.9482437631872904, "grad_norm": 0.12667903565039704, "learning_rate": 2.6094281858113026e-06, "loss": 0.7128, "step": 3926 }, { "epoch": 1.94874022589053, "grad_norm": 0.1295518629610594, "learning_rate": 2.6084517645560077e-06, "loss": 0.7045, "step": 3927 }, { "epoch": 1.9492366885937695, "grad_norm": 0.13912368621627139, "learning_rate": 2.6074753267255994e-06, "loss": 0.7286, "step": 3928 }, { "epoch": 1.9497331512970089, "grad_norm": 0.1408437562224136, "learning_rate": 2.60649887246931e-06, "loss": 0.7566, "step": 3929 }, { "epoch": 1.9502296140002482, "grad_norm": 0.1343158417044777, "learning_rate": 2.605522401936376e-06, "loss": 0.7252, "step": 3930 }, { "epoch": 1.9507260767034875, "grad_norm": 0.1312175766834046, "learning_rate": 2.604545915276035e-06, "loss": 0.718, "step": 3931 }, { "epoch": 1.951222539406727, "grad_norm": 0.1375316196872564, "learning_rate": 2.6035694126375265e-06, "loss": 0.7681, "step": 3932 }, { "epoch": 1.9517190021099666, "grad_norm": 0.1347425466729892, "learning_rate": 2.6025928941700945e-06, "loss": 0.6754, "step": 3933 }, { "epoch": 1.952215464813206, "grad_norm": 0.12903423135149714, "learning_rate": 2.6016163600229832e-06, "loss": 0.7674, "step": 3934 }, { "epoch": 1.9527119275164453, "grad_norm": 0.13070824473355627, "learning_rate": 2.6006398103454407e-06, "loss": 0.6968, "step": 3935 }, { "epoch": 1.9532083902196846, "grad_norm": 0.12638361456974517, "learning_rate": 2.5996632452867167e-06, "loss": 0.6966, "step": 3936 }, { "epoch": 1.9537048529229242, "grad_norm": 0.13442945386349076, "learning_rate": 2.5986866649960634e-06, "loss": 0.7475, "step": 3937 }, { "epoch": 1.9542013156261637, "grad_norm": 0.13647786271821394, "learning_rate": 2.597710069622736e-06, "loss": 0.6832, "step": 3938 }, { "epoch": 1.954697778329403, "grad_norm": 0.1303452381637909, "learning_rate": 2.596733459315992e-06, "loss": 0.7067, "step": 3939 }, { "epoch": 1.9551942410326424, "grad_norm": 0.1272985129582747, "learning_rate": 2.595756834225089e-06, "loss": 0.6699, "step": 3940 }, { "epoch": 1.9556907037358817, "grad_norm": 0.12900166946504296, "learning_rate": 2.594780194499289e-06, "loss": 0.6893, "step": 3941 }, { "epoch": 1.9561871664391213, "grad_norm": 0.13345773544468753, "learning_rate": 2.593803540287856e-06, "loss": 0.6645, "step": 3942 }, { "epoch": 1.9566836291423608, "grad_norm": 0.12499172467109242, "learning_rate": 2.592826871740056e-06, "loss": 0.6568, "step": 3943 }, { "epoch": 1.9571800918456002, "grad_norm": 0.1369899943691365, "learning_rate": 2.5918501890051573e-06, "loss": 0.7235, "step": 3944 }, { "epoch": 1.9576765545488395, "grad_norm": 0.13445906303874938, "learning_rate": 2.5908734922324293e-06, "loss": 0.725, "step": 3945 }, { "epoch": 1.9581730172520788, "grad_norm": 0.1620759841591545, "learning_rate": 2.5898967815711455e-06, "loss": 0.6952, "step": 3946 }, { "epoch": 1.9586694799553184, "grad_norm": 0.1398600398265006, "learning_rate": 2.58892005717058e-06, "loss": 0.766, "step": 3947 }, { "epoch": 1.959165942658558, "grad_norm": 0.13096623784090528, "learning_rate": 2.5879433191800093e-06, "loss": 0.7198, "step": 3948 }, { "epoch": 1.9596624053617973, "grad_norm": 0.12657285136649368, "learning_rate": 2.5869665677487122e-06, "loss": 0.6872, "step": 3949 }, { "epoch": 1.9601588680650366, "grad_norm": 0.13897203572470015, "learning_rate": 2.58598980302597e-06, "loss": 0.7355, "step": 3950 }, { "epoch": 1.960655330768276, "grad_norm": 0.12963711745676562, "learning_rate": 2.585013025161065e-06, "loss": 0.6979, "step": 3951 }, { "epoch": 1.9611517934715155, "grad_norm": 0.13783927250748948, "learning_rate": 2.584036234303282e-06, "loss": 0.7381, "step": 3952 }, { "epoch": 1.961648256174755, "grad_norm": 0.13448371125196593, "learning_rate": 2.583059430601908e-06, "loss": 0.699, "step": 3953 }, { "epoch": 1.9621447188779944, "grad_norm": 0.13318126568011363, "learning_rate": 2.5820826142062323e-06, "loss": 0.6785, "step": 3954 }, { "epoch": 1.9626411815812337, "grad_norm": 0.13076288936075522, "learning_rate": 2.581105785265545e-06, "loss": 0.7204, "step": 3955 }, { "epoch": 1.963137644284473, "grad_norm": 0.13117966572328443, "learning_rate": 2.580128943929139e-06, "loss": 0.7089, "step": 3956 }, { "epoch": 1.9636341069877126, "grad_norm": 0.14643814504267247, "learning_rate": 2.5791520903463076e-06, "loss": 0.7698, "step": 3957 }, { "epoch": 1.9641305696909521, "grad_norm": 0.13110955970838375, "learning_rate": 2.578175224666349e-06, "loss": 0.702, "step": 3958 }, { "epoch": 1.9646270323941915, "grad_norm": 0.13211603472359895, "learning_rate": 2.5771983470385604e-06, "loss": 0.691, "step": 3959 }, { "epoch": 1.9651234950974308, "grad_norm": 0.13433138088355598, "learning_rate": 2.576221457612243e-06, "loss": 0.7116, "step": 3960 }, { "epoch": 1.9656199578006701, "grad_norm": 0.1329409392764817, "learning_rate": 2.575244556536697e-06, "loss": 0.7445, "step": 3961 }, { "epoch": 1.9661164205039097, "grad_norm": 0.1314186532703457, "learning_rate": 2.5742676439612283e-06, "loss": 0.7041, "step": 3962 }, { "epoch": 1.9666128832071492, "grad_norm": 0.13197553608649243, "learning_rate": 2.5732907200351402e-06, "loss": 0.7109, "step": 3963 }, { "epoch": 1.9671093459103886, "grad_norm": 0.139383832666028, "learning_rate": 2.5723137849077406e-06, "loss": 0.7219, "step": 3964 }, { "epoch": 1.9676058086136279, "grad_norm": 0.12890251354191545, "learning_rate": 2.571336838728338e-06, "loss": 0.7209, "step": 3965 }, { "epoch": 1.9681022713168672, "grad_norm": 0.13068185566410134, "learning_rate": 2.5703598816462443e-06, "loss": 0.7415, "step": 3966 }, { "epoch": 1.9685987340201068, "grad_norm": 0.13578478265192953, "learning_rate": 2.5693829138107707e-06, "loss": 0.7458, "step": 3967 }, { "epoch": 1.9690951967233463, "grad_norm": 0.13252065296864216, "learning_rate": 2.568405935371231e-06, "loss": 0.7234, "step": 3968 }, { "epoch": 1.9695916594265857, "grad_norm": 0.13323778860939306, "learning_rate": 2.5674289464769405e-06, "loss": 0.7013, "step": 3969 }, { "epoch": 1.970088122129825, "grad_norm": 0.1287653472052357, "learning_rate": 2.566451947277217e-06, "loss": 0.6984, "step": 3970 }, { "epoch": 1.9705845848330643, "grad_norm": 0.12933587377045058, "learning_rate": 2.565474937921379e-06, "loss": 0.7015, "step": 3971 }, { "epoch": 1.9710810475363039, "grad_norm": 0.13330915382752193, "learning_rate": 2.5644979185587466e-06, "loss": 0.7413, "step": 3972 }, { "epoch": 1.9715775102395434, "grad_norm": 0.1266644141116552, "learning_rate": 2.5635208893386416e-06, "loss": 0.6668, "step": 3973 }, { "epoch": 1.9720739729427827, "grad_norm": 0.1394923566595831, "learning_rate": 2.5625438504103863e-06, "loss": 0.725, "step": 3974 }, { "epoch": 1.972570435646022, "grad_norm": 0.1333230212803404, "learning_rate": 2.5615668019233065e-06, "loss": 0.7066, "step": 3975 }, { "epoch": 1.9730668983492614, "grad_norm": 0.13786776101291032, "learning_rate": 2.560589744026729e-06, "loss": 0.7136, "step": 3976 }, { "epoch": 1.973563361052501, "grad_norm": 0.13850183160275445, "learning_rate": 2.5596126768699798e-06, "loss": 0.6687, "step": 3977 }, { "epoch": 1.9740598237557403, "grad_norm": 0.13732643590313048, "learning_rate": 2.5586356006023894e-06, "loss": 0.7616, "step": 3978 }, { "epoch": 1.9745562864589798, "grad_norm": 0.13057844494817822, "learning_rate": 2.5576585153732875e-06, "loss": 0.6654, "step": 3979 }, { "epoch": 1.9750527491622192, "grad_norm": 0.13328648064830395, "learning_rate": 2.556681421332005e-06, "loss": 0.7171, "step": 3980 }, { "epoch": 1.9755492118654585, "grad_norm": 0.13209382988878132, "learning_rate": 2.555704318627877e-06, "loss": 0.7245, "step": 3981 }, { "epoch": 1.976045674568698, "grad_norm": 0.1255898846757212, "learning_rate": 2.5547272074102375e-06, "loss": 0.6884, "step": 3982 }, { "epoch": 1.9765421372719374, "grad_norm": 0.13542305325011644, "learning_rate": 2.553750087828421e-06, "loss": 0.7229, "step": 3983 }, { "epoch": 1.977038599975177, "grad_norm": 0.13274636935908424, "learning_rate": 2.552772960031765e-06, "loss": 0.727, "step": 3984 }, { "epoch": 1.9775350626784163, "grad_norm": 0.13087815752389365, "learning_rate": 2.551795824169609e-06, "loss": 0.6981, "step": 3985 }, { "epoch": 1.9780315253816556, "grad_norm": 0.13311849040345575, "learning_rate": 2.550818680391292e-06, "loss": 0.7227, "step": 3986 }, { "epoch": 1.9785279880848952, "grad_norm": 0.13708649225810834, "learning_rate": 2.5498415288461537e-06, "loss": 0.7279, "step": 3987 }, { "epoch": 1.9790244507881345, "grad_norm": 0.13033779988678035, "learning_rate": 2.548864369683538e-06, "loss": 0.6814, "step": 3988 }, { "epoch": 1.979520913491374, "grad_norm": 0.13116550526012039, "learning_rate": 2.547887203052786e-06, "loss": 0.7825, "step": 3989 }, { "epoch": 1.9800173761946134, "grad_norm": 0.13081602543099036, "learning_rate": 2.5469100291032423e-06, "loss": 0.7239, "step": 3990 }, { "epoch": 1.9805138388978527, "grad_norm": 0.13072879071258814, "learning_rate": 2.545932847984254e-06, "loss": 0.7104, "step": 3991 }, { "epoch": 1.9810103016010923, "grad_norm": 0.13258224731686477, "learning_rate": 2.5449556598451656e-06, "loss": 0.6974, "step": 3992 }, { "epoch": 1.9815067643043316, "grad_norm": 0.1351940871833718, "learning_rate": 2.5439784648353256e-06, "loss": 0.7572, "step": 3993 }, { "epoch": 1.9820032270075711, "grad_norm": 0.14461540568622883, "learning_rate": 2.543001263104083e-06, "loss": 0.7281, "step": 3994 }, { "epoch": 1.9824996897108105, "grad_norm": 0.13342177790858398, "learning_rate": 2.5420240548007856e-06, "loss": 0.7461, "step": 3995 }, { "epoch": 1.9829961524140498, "grad_norm": 0.13543936141725377, "learning_rate": 2.5410468400747858e-06, "loss": 0.7591, "step": 3996 }, { "epoch": 1.9834926151172894, "grad_norm": 0.1292242889450314, "learning_rate": 2.5400696190754347e-06, "loss": 0.7125, "step": 3997 }, { "epoch": 1.9839890778205287, "grad_norm": 0.13366333141114367, "learning_rate": 2.539092391952085e-06, "loss": 0.7215, "step": 3998 }, { "epoch": 1.9844855405237682, "grad_norm": 0.13947805335489766, "learning_rate": 2.5381151588540896e-06, "loss": 0.7721, "step": 3999 }, { "epoch": 1.9849820032270076, "grad_norm": 0.13011957215342748, "learning_rate": 2.537137919930803e-06, "loss": 0.7054, "step": 4000 }, { "epoch": 1.985478465930247, "grad_norm": 0.1350409911144854, "learning_rate": 2.5361606753315814e-06, "loss": 0.687, "step": 4001 }, { "epoch": 1.9859749286334865, "grad_norm": 0.129620679858614, "learning_rate": 2.5351834252057805e-06, "loss": 0.7008, "step": 4002 }, { "epoch": 1.9864713913367258, "grad_norm": 0.12678966363438363, "learning_rate": 2.534206169702757e-06, "loss": 0.6995, "step": 4003 }, { "epoch": 1.9869678540399653, "grad_norm": 0.1318913991769552, "learning_rate": 2.533228908971869e-06, "loss": 0.734, "step": 4004 }, { "epoch": 1.9874643167432047, "grad_norm": 0.13831865321707615, "learning_rate": 2.532251643162475e-06, "loss": 0.7618, "step": 4005 }, { "epoch": 1.987960779446444, "grad_norm": 0.1349051666242995, "learning_rate": 2.5312743724239336e-06, "loss": 0.7522, "step": 4006 }, { "epoch": 1.9884572421496836, "grad_norm": 0.13052404435468878, "learning_rate": 2.5302970969056068e-06, "loss": 0.6867, "step": 4007 }, { "epoch": 1.9889537048529229, "grad_norm": 0.13818539051178105, "learning_rate": 2.529319816756854e-06, "loss": 0.733, "step": 4008 }, { "epoch": 1.9894501675561624, "grad_norm": 0.12800876536122638, "learning_rate": 2.5283425321270377e-06, "loss": 0.6684, "step": 4009 }, { "epoch": 1.9899466302594018, "grad_norm": 0.12765205661654766, "learning_rate": 2.5273652431655204e-06, "loss": 0.7032, "step": 4010 }, { "epoch": 1.990443092962641, "grad_norm": 0.1338564783816033, "learning_rate": 2.526387950021663e-06, "loss": 0.7496, "step": 4011 }, { "epoch": 1.9909395556658807, "grad_norm": 0.13415496043164832, "learning_rate": 2.525410652844831e-06, "loss": 0.7157, "step": 4012 }, { "epoch": 1.99143601836912, "grad_norm": 0.1282775301010404, "learning_rate": 2.524433351784389e-06, "loss": 0.6783, "step": 4013 }, { "epoch": 1.9919324810723595, "grad_norm": 0.13209341786586526, "learning_rate": 2.523456046989701e-06, "loss": 0.7353, "step": 4014 }, { "epoch": 1.9924289437755989, "grad_norm": 0.1287222818009373, "learning_rate": 2.5224787386101307e-06, "loss": 0.6862, "step": 4015 }, { "epoch": 1.9929254064788382, "grad_norm": 0.13035922850407525, "learning_rate": 2.5215014267950465e-06, "loss": 0.7004, "step": 4016 }, { "epoch": 1.9934218691820775, "grad_norm": 0.131542341490382, "learning_rate": 2.5205241116938137e-06, "loss": 0.7147, "step": 4017 }, { "epoch": 1.993918331885317, "grad_norm": 0.136610862731398, "learning_rate": 2.5195467934558003e-06, "loss": 0.7341, "step": 4018 }, { "epoch": 1.9944147945885566, "grad_norm": 0.13012603782449922, "learning_rate": 2.5185694722303728e-06, "loss": 0.6945, "step": 4019 }, { "epoch": 1.994911257291796, "grad_norm": 0.1325429269300131, "learning_rate": 2.517592148166899e-06, "loss": 0.7291, "step": 4020 }, { "epoch": 1.9954077199950353, "grad_norm": 0.1297801158635372, "learning_rate": 2.516614821414747e-06, "loss": 0.6867, "step": 4021 }, { "epoch": 1.9959041826982746, "grad_norm": 0.13181943517502306, "learning_rate": 2.5156374921232862e-06, "loss": 0.6634, "step": 4022 }, { "epoch": 1.9964006454015142, "grad_norm": 0.13355813171588157, "learning_rate": 2.5146601604418854e-06, "loss": 0.7556, "step": 4023 }, { "epoch": 1.9968971081047537, "grad_norm": 0.1310512552198657, "learning_rate": 2.5136828265199143e-06, "loss": 0.7198, "step": 4024 }, { "epoch": 1.997393570807993, "grad_norm": 0.1272191857000951, "learning_rate": 2.512705490506743e-06, "loss": 0.7172, "step": 4025 }, { "epoch": 1.9978900335112324, "grad_norm": 0.12766117521597195, "learning_rate": 2.511728152551741e-06, "loss": 0.724, "step": 4026 }, { "epoch": 1.9983864962144717, "grad_norm": 0.13122724706133568, "learning_rate": 2.5107508128042786e-06, "loss": 0.7186, "step": 4027 }, { "epoch": 1.9988829589177113, "grad_norm": 0.13170560779745644, "learning_rate": 2.509773471413726e-06, "loss": 0.7394, "step": 4028 }, { "epoch": 1.9993794216209508, "grad_norm": 0.1279325800617962, "learning_rate": 2.508796128529456e-06, "loss": 0.6678, "step": 4029 }, { "epoch": 1.9998758843241902, "grad_norm": 0.12867108507101366, "learning_rate": 2.507818784300839e-06, "loss": 0.6981, "step": 4030 }, { "epoch": 2.0, "grad_norm": 0.12867108507101366, "learning_rate": 2.5068414388772454e-06, "loss": 0.191, "step": 4031 }, { "epoch": 2.0003723470274295, "grad_norm": 0.13238445630417517, "learning_rate": 2.505864092408048e-06, "loss": 0.5359, "step": 4032 }, { "epoch": 2.0003723470274295, "eval_loss": 0.7288655042648315, "eval_runtime": 135.8088, "eval_samples_per_second": 223.498, "eval_steps_per_second": 27.944, "step": 4032 }, { "epoch": 2.0004964627032393, "grad_norm": 0.13212599455112428, "learning_rate": 2.504886745042618e-06, "loss": 0.724, "step": 4033 }, { "epoch": 2.0009929254064787, "grad_norm": 0.1289165900817434, "learning_rate": 2.503909396930328e-06, "loss": 0.6931, "step": 4034 }, { "epoch": 2.0014893881097184, "grad_norm": 0.12817979511727562, "learning_rate": 2.5029320482205487e-06, "loss": 0.7045, "step": 4035 }, { "epoch": 2.0019858508129578, "grad_norm": 0.13305215189225175, "learning_rate": 2.501954699062653e-06, "loss": 0.6943, "step": 4036 }, { "epoch": 2.002482313516197, "grad_norm": 0.1293844522970027, "learning_rate": 2.500977349606013e-06, "loss": 0.7171, "step": 4037 }, { "epoch": 2.0029787762194364, "grad_norm": 0.14606041921842416, "learning_rate": 2.5e-06, "loss": 0.759, "step": 4038 }, { "epoch": 2.0034752389226758, "grad_norm": 0.12805929586454654, "learning_rate": 2.499022650393988e-06, "loss": 0.6875, "step": 4039 }, { "epoch": 2.0039717016259155, "grad_norm": 0.13380230251599434, "learning_rate": 2.4980453009373475e-06, "loss": 0.7062, "step": 4040 }, { "epoch": 2.004468164329155, "grad_norm": 0.12965303996516353, "learning_rate": 2.497067951779452e-06, "loss": 0.6996, "step": 4041 }, { "epoch": 2.004964627032394, "grad_norm": 0.1295766633898341, "learning_rate": 2.4960906030696727e-06, "loss": 0.6684, "step": 4042 }, { "epoch": 2.0054610897356335, "grad_norm": 0.1312131877586875, "learning_rate": 2.495113254957382e-06, "loss": 0.7207, "step": 4043 }, { "epoch": 2.005957552438873, "grad_norm": 0.13817927580280212, "learning_rate": 2.4941359075919523e-06, "loss": 0.7428, "step": 4044 }, { "epoch": 2.0064540151421126, "grad_norm": 0.136455142677692, "learning_rate": 2.493158561122754e-06, "loss": 0.6922, "step": 4045 }, { "epoch": 2.006950477845352, "grad_norm": 0.13906822012805292, "learning_rate": 2.492181215699162e-06, "loss": 0.7097, "step": 4046 }, { "epoch": 2.0074469405485913, "grad_norm": 0.12826980150415948, "learning_rate": 2.4912038714705447e-06, "loss": 0.6902, "step": 4047 }, { "epoch": 2.0079434032518306, "grad_norm": 0.1388317530263215, "learning_rate": 2.490226528586275e-06, "loss": 0.7149, "step": 4048 }, { "epoch": 2.00843986595507, "grad_norm": 0.13268448735817925, "learning_rate": 2.489249187195723e-06, "loss": 0.7098, "step": 4049 }, { "epoch": 2.0089363286583097, "grad_norm": 0.1302092877003093, "learning_rate": 2.4882718474482604e-06, "loss": 0.7251, "step": 4050 }, { "epoch": 2.009432791361549, "grad_norm": 0.13047139016142711, "learning_rate": 2.487294509493258e-06, "loss": 0.7105, "step": 4051 }, { "epoch": 2.0099292540647884, "grad_norm": 0.13056451140753167, "learning_rate": 2.4863171734800866e-06, "loss": 0.6717, "step": 4052 }, { "epoch": 2.0104257167680277, "grad_norm": 0.1272813781903442, "learning_rate": 2.485339839558115e-06, "loss": 0.6912, "step": 4053 }, { "epoch": 2.010922179471267, "grad_norm": 0.1293306773850289, "learning_rate": 2.4843625078767146e-06, "loss": 0.6625, "step": 4054 }, { "epoch": 2.011418642174507, "grad_norm": 0.13026414160725963, "learning_rate": 2.4833851785852536e-06, "loss": 0.7192, "step": 4055 }, { "epoch": 2.011915104877746, "grad_norm": 0.13154534592494477, "learning_rate": 2.4824078518331017e-06, "loss": 0.7098, "step": 4056 }, { "epoch": 2.0124115675809855, "grad_norm": 0.13413230046665645, "learning_rate": 2.481430527769628e-06, "loss": 0.7306, "step": 4057 }, { "epoch": 2.012908030284225, "grad_norm": 0.13231043126436018, "learning_rate": 2.4804532065442e-06, "loss": 0.7263, "step": 4058 }, { "epoch": 2.013404492987464, "grad_norm": 0.13114069790750638, "learning_rate": 2.4794758883061862e-06, "loss": 0.6813, "step": 4059 }, { "epoch": 2.013900955690704, "grad_norm": 0.1330470419586042, "learning_rate": 2.4784985732049535e-06, "loss": 0.7061, "step": 4060 }, { "epoch": 2.0143974183939433, "grad_norm": 0.1345085568471715, "learning_rate": 2.4775212613898693e-06, "loss": 0.7379, "step": 4061 }, { "epoch": 2.0148938810971826, "grad_norm": 0.12782673695827584, "learning_rate": 2.4765439530103004e-06, "loss": 0.7046, "step": 4062 }, { "epoch": 2.015390343800422, "grad_norm": 0.13254520200468667, "learning_rate": 2.475566648215612e-06, "loss": 0.7034, "step": 4063 }, { "epoch": 2.0158868065036613, "grad_norm": 0.13807047247892, "learning_rate": 2.4745893471551697e-06, "loss": 0.6927, "step": 4064 }, { "epoch": 2.016383269206901, "grad_norm": 0.12831463879417368, "learning_rate": 2.4736120499783378e-06, "loss": 0.6694, "step": 4065 }, { "epoch": 2.0168797319101404, "grad_norm": 0.13082807641735877, "learning_rate": 2.472634756834481e-06, "loss": 0.6968, "step": 4066 }, { "epoch": 2.0173761946133797, "grad_norm": 0.1274033272524088, "learning_rate": 2.4716574678729627e-06, "loss": 0.6998, "step": 4067 }, { "epoch": 2.017872657316619, "grad_norm": 0.12742003523402126, "learning_rate": 2.470680183243147e-06, "loss": 0.6828, "step": 4068 }, { "epoch": 2.0183691200198584, "grad_norm": 0.13490291668223267, "learning_rate": 2.469702903094394e-06, "loss": 0.7621, "step": 4069 }, { "epoch": 2.018865582723098, "grad_norm": 0.13385281497853518, "learning_rate": 2.4687256275760668e-06, "loss": 0.7237, "step": 4070 }, { "epoch": 2.0193620454263375, "grad_norm": 0.1360671850549657, "learning_rate": 2.467748356837526e-06, "loss": 0.7226, "step": 4071 }, { "epoch": 2.019858508129577, "grad_norm": 0.13234055538841305, "learning_rate": 2.4667710910281318e-06, "loss": 0.7682, "step": 4072 }, { "epoch": 2.020354970832816, "grad_norm": 0.12771711139534347, "learning_rate": 2.465793830297244e-06, "loss": 0.6369, "step": 4073 }, { "epoch": 2.0208514335360555, "grad_norm": 0.13312672540262874, "learning_rate": 2.4648165747942203e-06, "loss": 0.7368, "step": 4074 }, { "epoch": 2.0213478962392952, "grad_norm": 0.13105724105229263, "learning_rate": 2.463839324668419e-06, "loss": 0.7299, "step": 4075 }, { "epoch": 2.0218443589425346, "grad_norm": 0.13275204495209755, "learning_rate": 2.462862080069197e-06, "loss": 0.7286, "step": 4076 }, { "epoch": 2.022340821645774, "grad_norm": 0.13761097722270485, "learning_rate": 2.461884841145911e-06, "loss": 0.725, "step": 4077 }, { "epoch": 2.022837284349013, "grad_norm": 0.13174698392561143, "learning_rate": 2.460907608047916e-06, "loss": 0.6977, "step": 4078 }, { "epoch": 2.0233337470522526, "grad_norm": 0.14004069015911763, "learning_rate": 2.459930380924566e-06, "loss": 0.7458, "step": 4079 }, { "epoch": 2.0238302097554923, "grad_norm": 0.129016892174802, "learning_rate": 2.4589531599252155e-06, "loss": 0.6907, "step": 4080 }, { "epoch": 2.0243266724587317, "grad_norm": 0.13159681156179226, "learning_rate": 2.4579759451992157e-06, "loss": 0.7164, "step": 4081 }, { "epoch": 2.024823135161971, "grad_norm": 0.13167520844662808, "learning_rate": 2.4569987368959186e-06, "loss": 0.7421, "step": 4082 }, { "epoch": 2.0253195978652103, "grad_norm": 0.1363803013638871, "learning_rate": 2.4560215351646752e-06, "loss": 0.7367, "step": 4083 }, { "epoch": 2.0258160605684497, "grad_norm": 0.12838138545946867, "learning_rate": 2.4550443401548348e-06, "loss": 0.7207, "step": 4084 }, { "epoch": 2.0263125232716894, "grad_norm": 0.1316322999014349, "learning_rate": 2.4540671520157474e-06, "loss": 0.7001, "step": 4085 }, { "epoch": 2.0268089859749288, "grad_norm": 0.13525722121983028, "learning_rate": 2.453089970896758e-06, "loss": 0.7593, "step": 4086 }, { "epoch": 2.027305448678168, "grad_norm": 0.1305004559151505, "learning_rate": 2.452112796947215e-06, "loss": 0.6767, "step": 4087 }, { "epoch": 2.0278019113814074, "grad_norm": 0.13465746531659445, "learning_rate": 2.451135630316463e-06, "loss": 0.7024, "step": 4088 }, { "epoch": 2.0282983740846467, "grad_norm": 0.12955552855480887, "learning_rate": 2.4501584711538467e-06, "loss": 0.6726, "step": 4089 }, { "epoch": 2.0287948367878865, "grad_norm": 0.1320046748116342, "learning_rate": 2.4491813196087087e-06, "loss": 0.7145, "step": 4090 }, { "epoch": 2.029291299491126, "grad_norm": 0.1426680668898575, "learning_rate": 2.4482041758303914e-06, "loss": 0.7211, "step": 4091 }, { "epoch": 2.029787762194365, "grad_norm": 0.13091969364968287, "learning_rate": 2.447227039968235e-06, "loss": 0.6937, "step": 4092 }, { "epoch": 2.0302842248976045, "grad_norm": 0.13053571724955956, "learning_rate": 2.4462499121715794e-06, "loss": 0.7057, "step": 4093 }, { "epoch": 2.030780687600844, "grad_norm": 0.13244995057683245, "learning_rate": 2.4452727925897633e-06, "loss": 0.715, "step": 4094 }, { "epoch": 2.0312771503040836, "grad_norm": 0.12980740024168078, "learning_rate": 2.4442956813721235e-06, "loss": 0.6907, "step": 4095 }, { "epoch": 2.031773613007323, "grad_norm": 0.1320072156478787, "learning_rate": 2.443318578667996e-06, "loss": 0.7048, "step": 4096 }, { "epoch": 2.0322700757105623, "grad_norm": 0.13288927327614064, "learning_rate": 2.442341484626714e-06, "loss": 0.6985, "step": 4097 }, { "epoch": 2.0327665384138016, "grad_norm": 0.13044804570837723, "learning_rate": 2.4413643993976114e-06, "loss": 0.7046, "step": 4098 }, { "epoch": 2.033263001117041, "grad_norm": 0.12860176814635504, "learning_rate": 2.4403873231300206e-06, "loss": 0.6514, "step": 4099 }, { "epoch": 2.0337594638202807, "grad_norm": 0.12841049842685956, "learning_rate": 2.4394102559732717e-06, "loss": 0.7143, "step": 4100 }, { "epoch": 2.03425592652352, "grad_norm": 0.1383324791180656, "learning_rate": 2.438433198076694e-06, "loss": 0.7236, "step": 4101 }, { "epoch": 2.0347523892267594, "grad_norm": 0.1334274425685486, "learning_rate": 2.437456149589614e-06, "loss": 0.717, "step": 4102 }, { "epoch": 2.0352488519299987, "grad_norm": 0.1328619959886049, "learning_rate": 2.4364791106613596e-06, "loss": 0.7392, "step": 4103 }, { "epoch": 2.035745314633238, "grad_norm": 0.12837963819339782, "learning_rate": 2.435502081441254e-06, "loss": 0.6838, "step": 4104 }, { "epoch": 2.0362417773364774, "grad_norm": 0.1314972595674822, "learning_rate": 2.434525062078622e-06, "loss": 0.7104, "step": 4105 }, { "epoch": 2.036738240039717, "grad_norm": 0.1376872036291365, "learning_rate": 2.4335480527227833e-06, "loss": 0.7023, "step": 4106 }, { "epoch": 2.0372347027429565, "grad_norm": 0.12746910204547873, "learning_rate": 2.43257105352306e-06, "loss": 0.7065, "step": 4107 }, { "epoch": 2.037731165446196, "grad_norm": 0.12941269428669738, "learning_rate": 2.4315940646287693e-06, "loss": 0.6786, "step": 4108 }, { "epoch": 2.038227628149435, "grad_norm": 0.1334515747366998, "learning_rate": 2.4306170861892293e-06, "loss": 0.7258, "step": 4109 }, { "epoch": 2.0387240908526745, "grad_norm": 0.13098506524365233, "learning_rate": 2.429640118353756e-06, "loss": 0.6948, "step": 4110 }, { "epoch": 2.0392205535559143, "grad_norm": 0.1314973323181186, "learning_rate": 2.4286631612716623e-06, "loss": 0.6943, "step": 4111 }, { "epoch": 2.0397170162591536, "grad_norm": 0.12965431662554677, "learning_rate": 2.427686215092261e-06, "loss": 0.6807, "step": 4112 }, { "epoch": 2.040213478962393, "grad_norm": 0.13024307058284337, "learning_rate": 2.426709279964861e-06, "loss": 0.7308, "step": 4113 }, { "epoch": 2.0407099416656322, "grad_norm": 0.13033943720971095, "learning_rate": 2.425732356038773e-06, "loss": 0.6952, "step": 4114 }, { "epoch": 2.0412064043688716, "grad_norm": 0.12922563196874387, "learning_rate": 2.424755443463303e-06, "loss": 0.7066, "step": 4115 }, { "epoch": 2.0417028670721113, "grad_norm": 0.13643925819128516, "learning_rate": 2.4237785423877576e-06, "loss": 0.7388, "step": 4116 }, { "epoch": 2.0421993297753507, "grad_norm": 0.1285865306223446, "learning_rate": 2.42280165296144e-06, "loss": 0.753, "step": 4117 }, { "epoch": 2.04269579247859, "grad_norm": 0.12885979273302448, "learning_rate": 2.421824775333652e-06, "loss": 0.6723, "step": 4118 }, { "epoch": 2.0431922551818293, "grad_norm": 0.13298912364217827, "learning_rate": 2.420847909653693e-06, "loss": 0.7522, "step": 4119 }, { "epoch": 2.0436887178850687, "grad_norm": 0.12918140250053928, "learning_rate": 2.4198710560708623e-06, "loss": 0.6887, "step": 4120 }, { "epoch": 2.0441851805883084, "grad_norm": 0.1298629305623038, "learning_rate": 2.4188942147344557e-06, "loss": 0.6918, "step": 4121 }, { "epoch": 2.044681643291548, "grad_norm": 0.1329165561864215, "learning_rate": 2.4179173857937686e-06, "loss": 0.7004, "step": 4122 }, { "epoch": 2.045178105994787, "grad_norm": 0.13353203604650155, "learning_rate": 2.4169405693980926e-06, "loss": 0.6686, "step": 4123 }, { "epoch": 2.0456745686980264, "grad_norm": 0.12807440054179015, "learning_rate": 2.4159637656967185e-06, "loss": 0.6834, "step": 4124 }, { "epoch": 2.0461710314012658, "grad_norm": 0.13435476970975527, "learning_rate": 2.4149869748389355e-06, "loss": 0.713, "step": 4125 }, { "epoch": 2.0466674941045055, "grad_norm": 0.13449883152953462, "learning_rate": 2.4140101969740305e-06, "loss": 0.7422, "step": 4126 }, { "epoch": 2.047163956807745, "grad_norm": 0.12884574289334033, "learning_rate": 2.413033432251289e-06, "loss": 0.7106, "step": 4127 }, { "epoch": 2.047660419510984, "grad_norm": 0.13241382070645977, "learning_rate": 2.412056680819992e-06, "loss": 0.7156, "step": 4128 }, { "epoch": 2.0481568822142235, "grad_norm": 0.13077843297811131, "learning_rate": 2.4110799428294214e-06, "loss": 0.669, "step": 4129 }, { "epoch": 2.048653344917463, "grad_norm": 0.12749052274478226, "learning_rate": 2.4101032184288558e-06, "loss": 0.6908, "step": 4130 }, { "epoch": 2.0491498076207026, "grad_norm": 0.12836437459659322, "learning_rate": 2.4091265077675716e-06, "loss": 0.7328, "step": 4131 }, { "epoch": 2.049646270323942, "grad_norm": 0.13191584290268923, "learning_rate": 2.4081498109948435e-06, "loss": 0.7269, "step": 4132 }, { "epoch": 2.0501427330271813, "grad_norm": 0.1336022422875839, "learning_rate": 2.407173128259945e-06, "loss": 0.746, "step": 4133 }, { "epoch": 2.0506391957304206, "grad_norm": 0.12850600170285384, "learning_rate": 2.406196459712145e-06, "loss": 0.6999, "step": 4134 }, { "epoch": 2.05113565843366, "grad_norm": 0.1280500632235552, "learning_rate": 2.4052198055007117e-06, "loss": 0.7095, "step": 4135 }, { "epoch": 2.0516321211368997, "grad_norm": 0.13308799322438067, "learning_rate": 2.404243165774912e-06, "loss": 0.7411, "step": 4136 }, { "epoch": 2.052128583840139, "grad_norm": 0.12920055964205324, "learning_rate": 2.4032665406840084e-06, "loss": 0.6944, "step": 4137 }, { "epoch": 2.0526250465433784, "grad_norm": 0.1349853449657708, "learning_rate": 2.402289930377264e-06, "loss": 0.7414, "step": 4138 }, { "epoch": 2.0531215092466177, "grad_norm": 0.12839001848947515, "learning_rate": 2.4013133350039366e-06, "loss": 0.6611, "step": 4139 }, { "epoch": 2.053617971949857, "grad_norm": 0.12960386307340557, "learning_rate": 2.4003367547132833e-06, "loss": 0.6837, "step": 4140 }, { "epoch": 2.054114434653097, "grad_norm": 0.12992168154334086, "learning_rate": 2.3993601896545593e-06, "loss": 0.7583, "step": 4141 }, { "epoch": 2.054610897356336, "grad_norm": 0.13162250535061173, "learning_rate": 2.398383639977017e-06, "loss": 0.6762, "step": 4142 }, { "epoch": 2.0551073600595755, "grad_norm": 0.1322234940484418, "learning_rate": 2.3974071058299063e-06, "loss": 0.6959, "step": 4143 }, { "epoch": 2.055603822762815, "grad_norm": 0.13363673998654405, "learning_rate": 2.3964305873624748e-06, "loss": 0.6872, "step": 4144 }, { "epoch": 2.056100285466054, "grad_norm": 0.1315605491232442, "learning_rate": 2.3954540847239663e-06, "loss": 0.7403, "step": 4145 }, { "epoch": 2.056596748169294, "grad_norm": 0.13173975211915448, "learning_rate": 2.394477598063625e-06, "loss": 0.7021, "step": 4146 }, { "epoch": 2.0570932108725333, "grad_norm": 0.12934541651577286, "learning_rate": 2.3935011275306907e-06, "loss": 0.6995, "step": 4147 }, { "epoch": 2.0575896735757726, "grad_norm": 0.1337787954167388, "learning_rate": 2.3925246732744014e-06, "loss": 0.6788, "step": 4148 }, { "epoch": 2.058086136279012, "grad_norm": 0.1409884703911019, "learning_rate": 2.3915482354439935e-06, "loss": 0.6618, "step": 4149 }, { "epoch": 2.0585825989822513, "grad_norm": 0.13863027742696146, "learning_rate": 2.390571814188698e-06, "loss": 0.6907, "step": 4150 }, { "epoch": 2.059079061685491, "grad_norm": 0.1264859190593734, "learning_rate": 2.3895954096577466e-06, "loss": 0.715, "step": 4151 }, { "epoch": 2.0595755243887304, "grad_norm": 0.12838817948703266, "learning_rate": 2.388619022000366e-06, "loss": 0.6928, "step": 4152 }, { "epoch": 2.0600719870919697, "grad_norm": 0.1382264776368354, "learning_rate": 2.3876426513657823e-06, "loss": 0.7077, "step": 4153 }, { "epoch": 2.060568449795209, "grad_norm": 0.13252726490660227, "learning_rate": 2.3866662979032183e-06, "loss": 0.6868, "step": 4154 }, { "epoch": 2.0610649124984484, "grad_norm": 0.13082652966537198, "learning_rate": 2.385689961761893e-06, "loss": 0.684, "step": 4155 }, { "epoch": 2.061561375201688, "grad_norm": 0.1330519426707675, "learning_rate": 2.384713643091025e-06, "loss": 0.6788, "step": 4156 }, { "epoch": 2.0620578379049275, "grad_norm": 0.12951338154381573, "learning_rate": 2.3837373420398274e-06, "loss": 0.7167, "step": 4157 }, { "epoch": 2.062554300608167, "grad_norm": 0.12959226185363074, "learning_rate": 2.382761058757513e-06, "loss": 0.6929, "step": 4158 }, { "epoch": 2.063050763311406, "grad_norm": 0.13289167957939557, "learning_rate": 2.381784793393292e-06, "loss": 0.7145, "step": 4159 }, { "epoch": 2.0635472260146455, "grad_norm": 0.13354521709268397, "learning_rate": 2.3808085460963686e-06, "loss": 0.6988, "step": 4160 }, { "epoch": 2.0640436887178852, "grad_norm": 0.13397190629908656, "learning_rate": 2.3798323170159487e-06, "loss": 0.7283, "step": 4161 }, { "epoch": 2.0645401514211246, "grad_norm": 0.13299819686134604, "learning_rate": 2.378856106301232e-06, "loss": 0.7049, "step": 4162 }, { "epoch": 2.065036614124364, "grad_norm": 0.1288123646274204, "learning_rate": 2.377879914101417e-06, "loss": 0.7245, "step": 4163 }, { "epoch": 2.0655330768276032, "grad_norm": 0.1393615916081668, "learning_rate": 2.376903740565699e-06, "loss": 0.6912, "step": 4164 }, { "epoch": 2.0660295395308426, "grad_norm": 0.12457000357374992, "learning_rate": 2.375927585843271e-06, "loss": 0.6801, "step": 4165 }, { "epoch": 2.0665260022340823, "grad_norm": 0.12940223030790443, "learning_rate": 2.3749514500833218e-06, "loss": 0.7121, "step": 4166 }, { "epoch": 2.0670224649373217, "grad_norm": 0.1382124046186019, "learning_rate": 2.373975333435038e-06, "loss": 0.7042, "step": 4167 }, { "epoch": 2.067518927640561, "grad_norm": 0.13543223125625606, "learning_rate": 2.3729992360476047e-06, "loss": 0.6908, "step": 4168 }, { "epoch": 2.0680153903438003, "grad_norm": 0.1367570277824597, "learning_rate": 2.372023158070201e-06, "loss": 0.7385, "step": 4169 }, { "epoch": 2.0685118530470397, "grad_norm": 0.12658280910237, "learning_rate": 2.3710470996520067e-06, "loss": 0.6485, "step": 4170 }, { "epoch": 2.0690083157502794, "grad_norm": 0.1318360238221834, "learning_rate": 2.370071060942195e-06, "loss": 0.7073, "step": 4171 }, { "epoch": 2.0695047784535188, "grad_norm": 0.1355974621244026, "learning_rate": 2.369095042089938e-06, "loss": 0.7383, "step": 4172 }, { "epoch": 2.070001241156758, "grad_norm": 0.1354835108761855, "learning_rate": 2.368119043244405e-06, "loss": 0.7249, "step": 4173 }, { "epoch": 2.0704977038599974, "grad_norm": 0.1316834160537684, "learning_rate": 2.3671430645547622e-06, "loss": 0.718, "step": 4174 }, { "epoch": 2.0709941665632368, "grad_norm": 0.1337398224105994, "learning_rate": 2.3661671061701725e-06, "loss": 0.7617, "step": 4175 }, { "epoch": 2.0714906292664765, "grad_norm": 0.13071098977935075, "learning_rate": 2.3651911682397937e-06, "loss": 0.7205, "step": 4176 }, { "epoch": 2.071987091969716, "grad_norm": 0.13108332634784578, "learning_rate": 2.3642152509127837e-06, "loss": 0.7222, "step": 4177 }, { "epoch": 2.072483554672955, "grad_norm": 0.12917550227439295, "learning_rate": 2.363239354338295e-06, "loss": 0.6863, "step": 4178 }, { "epoch": 2.0729800173761945, "grad_norm": 0.13058903950875422, "learning_rate": 2.3622634786654787e-06, "loss": 0.741, "step": 4179 }, { "epoch": 2.073476480079434, "grad_norm": 0.1317101832126491, "learning_rate": 2.361287624043481e-06, "loss": 0.69, "step": 4180 }, { "epoch": 2.0739729427826736, "grad_norm": 0.13179285865877882, "learning_rate": 2.3603117906214463e-06, "loss": 0.6874, "step": 4181 }, { "epoch": 2.074469405485913, "grad_norm": 0.13074448118330892, "learning_rate": 2.3593359785485143e-06, "loss": 0.7324, "step": 4182 }, { "epoch": 2.0749658681891523, "grad_norm": 0.13408000673376183, "learning_rate": 2.358360187973822e-06, "loss": 0.7242, "step": 4183 }, { "epoch": 2.0754623308923916, "grad_norm": 0.13011307891286955, "learning_rate": 2.3573844190465046e-06, "loss": 0.6941, "step": 4184 }, { "epoch": 2.075958793595631, "grad_norm": 0.12572911313821467, "learning_rate": 2.356408671915692e-06, "loss": 0.6861, "step": 4185 }, { "epoch": 2.0764552562988707, "grad_norm": 0.13279434671974327, "learning_rate": 2.355432946730512e-06, "loss": 0.7343, "step": 4186 }, { "epoch": 2.07695171900211, "grad_norm": 0.1375180906665279, "learning_rate": 2.3544572436400876e-06, "loss": 0.7285, "step": 4187 }, { "epoch": 2.0774481817053494, "grad_norm": 0.13344354731066305, "learning_rate": 2.3534815627935397e-06, "loss": 0.7002, "step": 4188 }, { "epoch": 2.0779446444085887, "grad_norm": 0.12763100455109364, "learning_rate": 2.352505904339986e-06, "loss": 0.6759, "step": 4189 }, { "epoch": 2.078441107111828, "grad_norm": 0.12628191599075542, "learning_rate": 2.3515302684285398e-06, "loss": 0.6725, "step": 4190 }, { "epoch": 2.078937569815068, "grad_norm": 0.13183736319992664, "learning_rate": 2.350554655208313e-06, "loss": 0.6876, "step": 4191 }, { "epoch": 2.079434032518307, "grad_norm": 0.1326348427538915, "learning_rate": 2.349579064828409e-06, "loss": 0.7015, "step": 4192 }, { "epoch": 2.0799304952215465, "grad_norm": 0.12900392788556156, "learning_rate": 2.3486034974379344e-06, "loss": 0.6592, "step": 4193 }, { "epoch": 2.080426957924786, "grad_norm": 0.12916628410416808, "learning_rate": 2.347627953185987e-06, "loss": 0.6965, "step": 4194 }, { "epoch": 2.080923420628025, "grad_norm": 0.13911006307022702, "learning_rate": 2.346652432221664e-06, "loss": 0.7477, "step": 4195 }, { "epoch": 2.081419883331265, "grad_norm": 0.12991109186278407, "learning_rate": 2.3456769346940578e-06, "loss": 0.7108, "step": 4196 }, { "epoch": 2.0819163460345043, "grad_norm": 0.1310175516830794, "learning_rate": 2.344701460752258e-06, "loss": 0.72, "step": 4197 }, { "epoch": 2.0824128087377436, "grad_norm": 0.1302438861387863, "learning_rate": 2.34372601054535e-06, "loss": 0.7045, "step": 4198 }, { "epoch": 2.082909271440983, "grad_norm": 0.131287589350185, "learning_rate": 2.3427505842224153e-06, "loss": 0.6859, "step": 4199 }, { "epoch": 2.0834057341442223, "grad_norm": 0.13636020184165526, "learning_rate": 2.3417751819325327e-06, "loss": 0.7343, "step": 4200 }, { "epoch": 2.083902196847462, "grad_norm": 0.13036105078338428, "learning_rate": 2.3407998038247767e-06, "loss": 0.6583, "step": 4201 }, { "epoch": 2.0843986595507014, "grad_norm": 0.13703412523752168, "learning_rate": 2.339824450048218e-06, "loss": 0.6875, "step": 4202 }, { "epoch": 2.0848951222539407, "grad_norm": 0.1273783968662836, "learning_rate": 2.3388491207519237e-06, "loss": 0.6739, "step": 4203 }, { "epoch": 2.08539158495718, "grad_norm": 0.1293141936432381, "learning_rate": 2.337873816084958e-06, "loss": 0.6807, "step": 4204 }, { "epoch": 2.0858880476604194, "grad_norm": 0.13247637384139838, "learning_rate": 2.3368985361963793e-06, "loss": 0.7367, "step": 4205 }, { "epoch": 2.086384510363659, "grad_norm": 0.12888079453701196, "learning_rate": 2.3359232812352444e-06, "loss": 0.6811, "step": 4206 }, { "epoch": 2.0868809730668985, "grad_norm": 0.14305169239978224, "learning_rate": 2.334948051350606e-06, "loss": 0.6632, "step": 4207 }, { "epoch": 2.087377435770138, "grad_norm": 0.13761847185157933, "learning_rate": 2.333972846691511e-06, "loss": 0.7505, "step": 4208 }, { "epoch": 2.087873898473377, "grad_norm": 0.13472353385622, "learning_rate": 2.3329976674070045e-06, "loss": 0.7507, "step": 4209 }, { "epoch": 2.0883703611766165, "grad_norm": 0.1332250195890825, "learning_rate": 2.332022513646126e-06, "loss": 0.686, "step": 4210 }, { "epoch": 2.0888668238798562, "grad_norm": 0.13094514564456525, "learning_rate": 2.331047385557914e-06, "loss": 0.6997, "step": 4211 }, { "epoch": 2.0893632865830956, "grad_norm": 0.13537482547078766, "learning_rate": 2.3300722832913995e-06, "loss": 0.6934, "step": 4212 }, { "epoch": 2.089859749286335, "grad_norm": 0.13110966344634223, "learning_rate": 2.329097206995612e-06, "loss": 0.701, "step": 4213 }, { "epoch": 2.090356211989574, "grad_norm": 0.13289972655864118, "learning_rate": 2.3281221568195765e-06, "loss": 0.6511, "step": 4214 }, { "epoch": 2.0908526746928136, "grad_norm": 0.13256649712938065, "learning_rate": 2.3271471329123134e-06, "loss": 0.7757, "step": 4215 }, { "epoch": 2.0913491373960533, "grad_norm": 0.12718273334445213, "learning_rate": 2.3261721354228394e-06, "loss": 0.6746, "step": 4216 }, { "epoch": 2.0918456000992927, "grad_norm": 0.1279366898208473, "learning_rate": 2.325197164500167e-06, "loss": 0.7287, "step": 4217 }, { "epoch": 2.092342062802532, "grad_norm": 0.1267772446319626, "learning_rate": 2.324222220293306e-06, "loss": 0.7235, "step": 4218 }, { "epoch": 2.0928385255057713, "grad_norm": 0.12657392759783875, "learning_rate": 2.3232473029512603e-06, "loss": 0.67, "step": 4219 }, { "epoch": 2.0933349882090106, "grad_norm": 0.13143006626346396, "learning_rate": 2.3222724126230296e-06, "loss": 0.6967, "step": 4220 }, { "epoch": 2.0938314509122504, "grad_norm": 0.13250048306785056, "learning_rate": 2.3212975494576113e-06, "loss": 0.6659, "step": 4221 }, { "epoch": 2.0943279136154898, "grad_norm": 0.1304492685665225, "learning_rate": 2.3203227136039973e-06, "loss": 0.754, "step": 4222 }, { "epoch": 2.094824376318729, "grad_norm": 0.14113302529355157, "learning_rate": 2.3193479052111763e-06, "loss": 0.7788, "step": 4223 }, { "epoch": 2.0953208390219684, "grad_norm": 0.12947346258330397, "learning_rate": 2.3183731244281307e-06, "loss": 0.6974, "step": 4224 }, { "epoch": 2.0958173017252077, "grad_norm": 0.12969243044770545, "learning_rate": 2.3173983714038413e-06, "loss": 0.6881, "step": 4225 }, { "epoch": 2.0963137644284475, "grad_norm": 0.12805474797723002, "learning_rate": 2.3164236462872826e-06, "loss": 0.6631, "step": 4226 }, { "epoch": 2.096810227131687, "grad_norm": 0.12991846472801588, "learning_rate": 2.315448949227426e-06, "loss": 0.7117, "step": 4227 }, { "epoch": 2.097306689834926, "grad_norm": 0.12737529203692569, "learning_rate": 2.3144742803732386e-06, "loss": 0.6619, "step": 4228 }, { "epoch": 2.0978031525381655, "grad_norm": 0.1348388296052507, "learning_rate": 2.3134996398736827e-06, "loss": 0.6702, "step": 4229 }, { "epoch": 2.098299615241405, "grad_norm": 0.12949314939302115, "learning_rate": 2.3125250278777173e-06, "loss": 0.6692, "step": 4230 }, { "epoch": 2.0987960779446446, "grad_norm": 0.1319653430303982, "learning_rate": 2.3115504445342947e-06, "loss": 0.7186, "step": 4231 }, { "epoch": 2.099292540647884, "grad_norm": 0.12754394160183852, "learning_rate": 2.3105758899923652e-06, "loss": 0.6983, "step": 4232 }, { "epoch": 2.0997890033511233, "grad_norm": 0.13105593757086795, "learning_rate": 2.3096013644008738e-06, "loss": 0.7449, "step": 4233 }, { "epoch": 2.1002854660543626, "grad_norm": 0.1352001782140455, "learning_rate": 2.308626867908761e-06, "loss": 0.7227, "step": 4234 }, { "epoch": 2.100781928757602, "grad_norm": 0.1291811312002174, "learning_rate": 2.3076524006649633e-06, "loss": 0.6668, "step": 4235 }, { "epoch": 2.1012783914608413, "grad_norm": 0.13013537752074916, "learning_rate": 2.3066779628184115e-06, "loss": 0.6691, "step": 4236 }, { "epoch": 2.101774854164081, "grad_norm": 0.13054657602863368, "learning_rate": 2.3057035545180337e-06, "loss": 0.6978, "step": 4237 }, { "epoch": 2.1022713168673204, "grad_norm": 0.12839328024574606, "learning_rate": 2.3047291759127523e-06, "loss": 0.6554, "step": 4238 }, { "epoch": 2.1027677795705597, "grad_norm": 0.12996764440677216, "learning_rate": 2.3037548271514863e-06, "loss": 0.7042, "step": 4239 }, { "epoch": 2.103264242273799, "grad_norm": 0.13523019374688494, "learning_rate": 2.3027805083831475e-06, "loss": 0.7477, "step": 4240 }, { "epoch": 2.103760704977039, "grad_norm": 0.13283035054495967, "learning_rate": 2.3018062197566464e-06, "loss": 0.7262, "step": 4241 }, { "epoch": 2.104257167680278, "grad_norm": 0.12953670758307892, "learning_rate": 2.3008319614208863e-06, "loss": 0.723, "step": 4242 }, { "epoch": 2.1047536303835175, "grad_norm": 0.13144160332075827, "learning_rate": 2.299857733524767e-06, "loss": 0.7333, "step": 4243 }, { "epoch": 2.105250093086757, "grad_norm": 0.12885131945661563, "learning_rate": 2.2988835362171845e-06, "loss": 0.736, "step": 4244 }, { "epoch": 2.105746555789996, "grad_norm": 0.13082507084952819, "learning_rate": 2.297909369647029e-06, "loss": 0.7058, "step": 4245 }, { "epoch": 2.1062430184932355, "grad_norm": 0.1308644150778887, "learning_rate": 2.2969352339631864e-06, "loss": 0.7137, "step": 4246 }, { "epoch": 2.1067394811964752, "grad_norm": 0.13130767461578424, "learning_rate": 2.295961129314537e-06, "loss": 0.6985, "step": 4247 }, { "epoch": 2.1072359438997146, "grad_norm": 0.13689384112058248, "learning_rate": 2.294987055849957e-06, "loss": 0.7249, "step": 4248 }, { "epoch": 2.107732406602954, "grad_norm": 0.13528534527442918, "learning_rate": 2.2940130137183187e-06, "loss": 0.7141, "step": 4249 }, { "epoch": 2.1082288693061932, "grad_norm": 0.1367594631782608, "learning_rate": 2.2930390030684886e-06, "loss": 0.7424, "step": 4250 }, { "epoch": 2.108725332009433, "grad_norm": 0.13326510053921575, "learning_rate": 2.292065024049329e-06, "loss": 0.732, "step": 4251 }, { "epoch": 2.1092217947126723, "grad_norm": 0.13486667743727, "learning_rate": 2.291091076809696e-06, "loss": 0.7336, "step": 4252 }, { "epoch": 2.1097182574159117, "grad_norm": 0.1342722797696259, "learning_rate": 2.2901171614984422e-06, "loss": 0.7149, "step": 4253 }, { "epoch": 2.110214720119151, "grad_norm": 0.12916009534662892, "learning_rate": 2.2891432782644148e-06, "loss": 0.6769, "step": 4254 }, { "epoch": 2.1107111828223903, "grad_norm": 0.12895780049088768, "learning_rate": 2.288169427256458e-06, "loss": 0.6941, "step": 4255 }, { "epoch": 2.1112076455256297, "grad_norm": 0.13336065989780396, "learning_rate": 2.2871956086234066e-06, "loss": 0.7312, "step": 4256 }, { "epoch": 2.1117041082288694, "grad_norm": 0.1311287040733774, "learning_rate": 2.2862218225140948e-06, "loss": 0.6849, "step": 4257 }, { "epoch": 2.112200570932109, "grad_norm": 0.1326013584545713, "learning_rate": 2.2852480690773493e-06, "loss": 0.7127, "step": 4258 }, { "epoch": 2.112697033635348, "grad_norm": 0.1293700176479527, "learning_rate": 2.284274348461993e-06, "loss": 0.6787, "step": 4259 }, { "epoch": 2.1131934963385874, "grad_norm": 0.1270172759070305, "learning_rate": 2.283300660816844e-06, "loss": 0.7109, "step": 4260 }, { "epoch": 2.1136899590418268, "grad_norm": 0.12987078013128955, "learning_rate": 2.282327006290714e-06, "loss": 0.6754, "step": 4261 }, { "epoch": 2.1141864217450665, "grad_norm": 0.1311168878775037, "learning_rate": 2.281353385032412e-06, "loss": 0.6938, "step": 4262 }, { "epoch": 2.114682884448306, "grad_norm": 0.13308422505081596, "learning_rate": 2.2803797971907384e-06, "loss": 0.7151, "step": 4263 }, { "epoch": 2.115179347151545, "grad_norm": 0.1305774573301847, "learning_rate": 2.2794062429144916e-06, "loss": 0.7384, "step": 4264 }, { "epoch": 2.1156758098547845, "grad_norm": 0.13262558018414852, "learning_rate": 2.278432722352464e-06, "loss": 0.7158, "step": 4265 }, { "epoch": 2.116172272558024, "grad_norm": 0.1321143377532212, "learning_rate": 2.2774592356534417e-06, "loss": 0.644, "step": 4266 }, { "epoch": 2.1166687352612636, "grad_norm": 0.13040322905026952, "learning_rate": 2.276485782966208e-06, "loss": 0.6894, "step": 4267 }, { "epoch": 2.117165197964503, "grad_norm": 0.14208837528045287, "learning_rate": 2.2755123644395377e-06, "loss": 0.727, "step": 4268 }, { "epoch": 2.1176616606677423, "grad_norm": 0.14092145083010946, "learning_rate": 2.2745389802222034e-06, "loss": 0.7016, "step": 4269 }, { "epoch": 2.1181581233709816, "grad_norm": 0.12913632924933782, "learning_rate": 2.2735656304629706e-06, "loss": 0.7004, "step": 4270 }, { "epoch": 2.118654586074221, "grad_norm": 0.1334111014407336, "learning_rate": 2.272592315310601e-06, "loss": 0.7373, "step": 4271 }, { "epoch": 2.1191510487774607, "grad_norm": 0.13363016697183605, "learning_rate": 2.2716190349138505e-06, "loss": 0.7042, "step": 4272 }, { "epoch": 2.1196475114807, "grad_norm": 0.13210258534803404, "learning_rate": 2.270645789421468e-06, "loss": 0.7621, "step": 4273 }, { "epoch": 2.1201439741839394, "grad_norm": 0.13777719715311174, "learning_rate": 2.2696725789821994e-06, "loss": 0.7225, "step": 4274 }, { "epoch": 2.1206404368871787, "grad_norm": 0.1308204005740167, "learning_rate": 2.268699403744784e-06, "loss": 0.7062, "step": 4275 }, { "epoch": 2.121136899590418, "grad_norm": 0.12957459770630428, "learning_rate": 2.2677262638579554e-06, "loss": 0.6877, "step": 4276 }, { "epoch": 2.121633362293658, "grad_norm": 0.13016124393616152, "learning_rate": 2.266753159470444e-06, "loss": 0.6826, "step": 4277 }, { "epoch": 2.122129824996897, "grad_norm": 0.12806205702427592, "learning_rate": 2.265780090730972e-06, "loss": 0.645, "step": 4278 }, { "epoch": 2.1226262877001365, "grad_norm": 0.1337185042729098, "learning_rate": 2.2648070577882573e-06, "loss": 0.7274, "step": 4279 }, { "epoch": 2.123122750403376, "grad_norm": 0.13067486282767551, "learning_rate": 2.2638340607910133e-06, "loss": 0.7087, "step": 4280 }, { "epoch": 2.123619213106615, "grad_norm": 0.13386479780042845, "learning_rate": 2.2628610998879463e-06, "loss": 0.7024, "step": 4281 }, { "epoch": 2.124115675809855, "grad_norm": 0.13272729071166328, "learning_rate": 2.261888175227758e-06, "loss": 0.7484, "step": 4282 }, { "epoch": 2.1246121385130943, "grad_norm": 0.1310493554217813, "learning_rate": 2.2609152869591445e-06, "loss": 0.651, "step": 4283 }, { "epoch": 2.1251086012163336, "grad_norm": 0.1316640668680429, "learning_rate": 2.2599424352307958e-06, "loss": 0.755, "step": 4284 }, { "epoch": 2.125605063919573, "grad_norm": 0.12948769043340413, "learning_rate": 2.2589696201913966e-06, "loss": 0.7538, "step": 4285 }, { "epoch": 2.1261015266228123, "grad_norm": 0.1259209524836695, "learning_rate": 2.2579968419896264e-06, "loss": 0.7165, "step": 4286 }, { "epoch": 2.126597989326052, "grad_norm": 0.1274147611602142, "learning_rate": 2.257024100774159e-06, "loss": 0.7053, "step": 4287 }, { "epoch": 2.1270944520292914, "grad_norm": 0.1273913753461934, "learning_rate": 2.2560513966936626e-06, "loss": 0.6987, "step": 4288 }, { "epoch": 2.1275909147325307, "grad_norm": 0.17368980947168566, "learning_rate": 2.255078729896798e-06, "loss": 0.6387, "step": 4289 }, { "epoch": 2.12808737743577, "grad_norm": 0.12393658684142564, "learning_rate": 2.254106100532223e-06, "loss": 0.673, "step": 4290 }, { "epoch": 2.1285838401390094, "grad_norm": 0.12933453592109587, "learning_rate": 2.253133508748587e-06, "loss": 0.7271, "step": 4291 }, { "epoch": 2.129080302842249, "grad_norm": 0.1343441282973702, "learning_rate": 2.252160954694536e-06, "loss": 0.6757, "step": 4292 }, { "epoch": 2.1295767655454885, "grad_norm": 0.13289063203957274, "learning_rate": 2.2511884385187098e-06, "loss": 0.735, "step": 4293 }, { "epoch": 2.130073228248728, "grad_norm": 0.12926554854547287, "learning_rate": 2.250215960369741e-06, "loss": 0.6849, "step": 4294 }, { "epoch": 2.130569690951967, "grad_norm": 0.13121955352587697, "learning_rate": 2.2492435203962584e-06, "loss": 0.6844, "step": 4295 }, { "epoch": 2.1310661536552065, "grad_norm": 0.13785478772610635, "learning_rate": 2.2482711187468825e-06, "loss": 0.6656, "step": 4296 }, { "epoch": 2.1315626163584462, "grad_norm": 0.13675030797419835, "learning_rate": 2.24729875557023e-06, "loss": 0.7755, "step": 4297 }, { "epoch": 2.1320590790616856, "grad_norm": 0.13218540295978803, "learning_rate": 2.246326431014911e-06, "loss": 0.7865, "step": 4298 }, { "epoch": 2.132555541764925, "grad_norm": 0.13058917038281245, "learning_rate": 2.2453541452295304e-06, "loss": 0.6822, "step": 4299 }, { "epoch": 2.1330520044681642, "grad_norm": 0.1356025414078166, "learning_rate": 2.2443818983626845e-06, "loss": 0.7416, "step": 4300 }, { "epoch": 2.1335484671714036, "grad_norm": 0.13259643816943026, "learning_rate": 2.2434096905629675e-06, "loss": 0.7331, "step": 4301 }, { "epoch": 2.1340449298746433, "grad_norm": 0.13178426485776873, "learning_rate": 2.242437521978965e-06, "loss": 0.7416, "step": 4302 }, { "epoch": 2.1345413925778827, "grad_norm": 0.12712500731582294, "learning_rate": 2.2414653927592578e-06, "loss": 0.6676, "step": 4303 }, { "epoch": 2.135037855281122, "grad_norm": 0.132762631536009, "learning_rate": 2.240493303052421e-06, "loss": 0.7412, "step": 4304 }, { "epoch": 2.1355343179843613, "grad_norm": 0.13927370521334112, "learning_rate": 2.239521253007021e-06, "loss": 0.7256, "step": 4305 }, { "epoch": 2.1360307806876007, "grad_norm": 0.1298913459257255, "learning_rate": 2.238549242771621e-06, "loss": 0.726, "step": 4306 }, { "epoch": 2.1365272433908404, "grad_norm": 0.13103490851669664, "learning_rate": 2.237577272494777e-06, "loss": 0.7101, "step": 4307 }, { "epoch": 2.1370237060940798, "grad_norm": 0.13052143851126424, "learning_rate": 2.2366053423250396e-06, "loss": 0.7063, "step": 4308 }, { "epoch": 2.137520168797319, "grad_norm": 0.1325952941973161, "learning_rate": 2.235633452410952e-06, "loss": 0.7102, "step": 4309 }, { "epoch": 2.1380166315005584, "grad_norm": 0.13151130162140853, "learning_rate": 2.2346616029010527e-06, "loss": 0.72, "step": 4310 }, { "epoch": 2.1385130942037978, "grad_norm": 0.13684162828630525, "learning_rate": 2.2336897939438734e-06, "loss": 0.7484, "step": 4311 }, { "epoch": 2.1390095569070375, "grad_norm": 0.13016064599020155, "learning_rate": 2.2327180256879384e-06, "loss": 0.7219, "step": 4312 }, { "epoch": 2.139506019610277, "grad_norm": 0.13271882669694315, "learning_rate": 2.231746298281768e-06, "loss": 0.7285, "step": 4313 }, { "epoch": 2.140002482313516, "grad_norm": 0.12493929654212456, "learning_rate": 2.230774611873875e-06, "loss": 0.6597, "step": 4314 }, { "epoch": 2.1404989450167555, "grad_norm": 0.1270152459435393, "learning_rate": 2.2298029666127654e-06, "loss": 0.6268, "step": 4315 }, { "epoch": 2.140995407719995, "grad_norm": 0.13120756963636904, "learning_rate": 2.2288313626469403e-06, "loss": 0.7737, "step": 4316 }, { "epoch": 2.1414918704232346, "grad_norm": 0.13093118304542808, "learning_rate": 2.2278598001248935e-06, "loss": 0.7182, "step": 4317 }, { "epoch": 2.141988333126474, "grad_norm": 0.13406739346906132, "learning_rate": 2.2268882791951125e-06, "loss": 0.6923, "step": 4318 }, { "epoch": 2.1424847958297133, "grad_norm": 0.1321265210500075, "learning_rate": 2.2259168000060793e-06, "loss": 0.7341, "step": 4319 }, { "epoch": 2.1429812585329526, "grad_norm": 0.1297032816599765, "learning_rate": 2.2249453627062697e-06, "loss": 0.6591, "step": 4320 }, { "epoch": 2.143477721236192, "grad_norm": 0.13306620569232747, "learning_rate": 2.22397396744415e-06, "loss": 0.7009, "step": 4321 }, { "epoch": 2.1439741839394317, "grad_norm": 0.1315841117210727, "learning_rate": 2.223002614368184e-06, "loss": 0.693, "step": 4322 }, { "epoch": 2.144470646642671, "grad_norm": 0.13522931404154925, "learning_rate": 2.2220313036268275e-06, "loss": 0.7885, "step": 4323 }, { "epoch": 2.1449671093459104, "grad_norm": 0.13000494547408423, "learning_rate": 2.2210600353685286e-06, "loss": 0.7126, "step": 4324 }, { "epoch": 2.1454635720491497, "grad_norm": 0.1331174145801834, "learning_rate": 2.2200888097417308e-06, "loss": 0.7385, "step": 4325 }, { "epoch": 2.145960034752389, "grad_norm": 0.13199478030805042, "learning_rate": 2.2191176268948707e-06, "loss": 0.6859, "step": 4326 }, { "epoch": 2.146456497455629, "grad_norm": 0.12924843473690692, "learning_rate": 2.2181464869763777e-06, "loss": 0.7077, "step": 4327 }, { "epoch": 2.146952960158868, "grad_norm": 0.13383047347606078, "learning_rate": 2.217175390134675e-06, "loss": 0.7301, "step": 4328 }, { "epoch": 2.1474494228621075, "grad_norm": 0.1301216163563912, "learning_rate": 2.216204336518179e-06, "loss": 0.7075, "step": 4329 }, { "epoch": 2.147945885565347, "grad_norm": 0.13513976833741134, "learning_rate": 2.2152333262752996e-06, "loss": 0.7069, "step": 4330 }, { "epoch": 2.148442348268586, "grad_norm": 0.12921645151422304, "learning_rate": 2.2142623595544407e-06, "loss": 0.7121, "step": 4331 }, { "epoch": 2.148938810971826, "grad_norm": 0.134789032200641, "learning_rate": 2.2132914365039993e-06, "loss": 0.7279, "step": 4332 }, { "epoch": 2.1494352736750653, "grad_norm": 0.12767095954972132, "learning_rate": 2.2123205572723638e-06, "loss": 0.6819, "step": 4333 }, { "epoch": 2.1499317363783046, "grad_norm": 0.132607654917447, "learning_rate": 2.211349722007919e-06, "loss": 0.7035, "step": 4334 }, { "epoch": 2.150428199081544, "grad_norm": 0.13081966486435517, "learning_rate": 2.210378930859041e-06, "loss": 0.7125, "step": 4335 }, { "epoch": 2.1509246617847833, "grad_norm": 0.13392118968950856, "learning_rate": 2.2094081839741007e-06, "loss": 0.6874, "step": 4336 }, { "epoch": 2.151421124488023, "grad_norm": 0.13563928410684517, "learning_rate": 2.208437481501459e-06, "loss": 0.7077, "step": 4337 }, { "epoch": 2.1519175871912624, "grad_norm": 0.13102290237210468, "learning_rate": 2.207466823589474e-06, "loss": 0.7314, "step": 4338 }, { "epoch": 2.1524140498945017, "grad_norm": 0.1308681684431675, "learning_rate": 2.206496210386494e-06, "loss": 0.6915, "step": 4339 }, { "epoch": 2.152910512597741, "grad_norm": 0.13474477124109288, "learning_rate": 2.2055256420408625e-06, "loss": 0.7038, "step": 4340 }, { "epoch": 2.1534069753009804, "grad_norm": 0.13030093914916455, "learning_rate": 2.2045551187009147e-06, "loss": 0.7066, "step": 4341 }, { "epoch": 2.15390343800422, "grad_norm": 0.12838709690946015, "learning_rate": 2.20358464051498e-06, "loss": 0.6839, "step": 4342 }, { "epoch": 2.1543999007074595, "grad_norm": 0.13054218481985586, "learning_rate": 2.2026142076313816e-06, "loss": 0.7189, "step": 4343 }, { "epoch": 2.154896363410699, "grad_norm": 0.1329355796495672, "learning_rate": 2.201643820198432e-06, "loss": 0.7484, "step": 4344 }, { "epoch": 2.155392826113938, "grad_norm": 0.12685915768729691, "learning_rate": 2.2006734783644407e-06, "loss": 0.6744, "step": 4345 }, { "epoch": 2.1558892888171775, "grad_norm": 0.12969948887272129, "learning_rate": 2.1997031822777093e-06, "loss": 0.6559, "step": 4346 }, { "epoch": 2.1563857515204172, "grad_norm": 0.12690777845885462, "learning_rate": 2.1987329320865314e-06, "loss": 0.6719, "step": 4347 }, { "epoch": 2.1568822142236566, "grad_norm": 0.13746314892957628, "learning_rate": 2.197762727939195e-06, "loss": 0.7068, "step": 4348 }, { "epoch": 2.157378676926896, "grad_norm": 0.13974129132766783, "learning_rate": 2.196792569983979e-06, "loss": 0.7702, "step": 4349 }, { "epoch": 2.157875139630135, "grad_norm": 0.13205901471380596, "learning_rate": 2.195822458369157e-06, "loss": 0.6789, "step": 4350 }, { "epoch": 2.1583716023333746, "grad_norm": 0.13151573181044662, "learning_rate": 2.194852393242995e-06, "loss": 0.6997, "step": 4351 }, { "epoch": 2.1588680650366143, "grad_norm": 0.1311550041985863, "learning_rate": 2.1938823747537535e-06, "loss": 0.7186, "step": 4352 }, { "epoch": 2.1593645277398537, "grad_norm": 0.12785154545568428, "learning_rate": 2.192912403049682e-06, "loss": 0.7337, "step": 4353 }, { "epoch": 2.159860990443093, "grad_norm": 0.1384165497626903, "learning_rate": 2.1919424782790256e-06, "loss": 0.7423, "step": 4354 }, { "epoch": 2.1603574531463323, "grad_norm": 0.1326394817355734, "learning_rate": 2.1909726005900224e-06, "loss": 0.7139, "step": 4355 }, { "epoch": 2.1608539158495716, "grad_norm": 0.1332787692360546, "learning_rate": 2.1900027701309016e-06, "loss": 0.7198, "step": 4356 }, { "epoch": 2.1613503785528114, "grad_norm": 0.13058676686648546, "learning_rate": 2.1890329870498873e-06, "loss": 0.6919, "step": 4357 }, { "epoch": 2.1618468412560508, "grad_norm": 0.12920400653725772, "learning_rate": 2.188063251495195e-06, "loss": 0.7204, "step": 4358 }, { "epoch": 2.16234330395929, "grad_norm": 0.12727242356432697, "learning_rate": 2.187093563615034e-06, "loss": 0.7072, "step": 4359 }, { "epoch": 2.1628397666625294, "grad_norm": 0.12536367743791044, "learning_rate": 2.1861239235576033e-06, "loss": 0.6552, "step": 4360 }, { "epoch": 2.1633362293657687, "grad_norm": 0.12786174220800478, "learning_rate": 2.185154331471099e-06, "loss": 0.6844, "step": 4361 }, { "epoch": 2.1638326920690085, "grad_norm": 0.13320070492782934, "learning_rate": 2.1841847875037065e-06, "loss": 0.7398, "step": 4362 }, { "epoch": 2.164329154772248, "grad_norm": 0.1388558108961887, "learning_rate": 2.1832152918036058e-06, "loss": 0.7204, "step": 4363 }, { "epoch": 2.164825617475487, "grad_norm": 0.12693695099985525, "learning_rate": 2.182245844518969e-06, "loss": 0.677, "step": 4364 }, { "epoch": 2.1653220801787265, "grad_norm": 0.12924014986291152, "learning_rate": 2.1812764457979594e-06, "loss": 0.6683, "step": 4365 }, { "epoch": 2.165818542881966, "grad_norm": 0.13159270443065568, "learning_rate": 2.1803070957887348e-06, "loss": 0.7281, "step": 4366 }, { "epoch": 2.166315005585205, "grad_norm": 0.13127772161024706, "learning_rate": 2.1793377946394448e-06, "loss": 0.6727, "step": 4367 }, { "epoch": 2.166811468288445, "grad_norm": 0.12854307859962671, "learning_rate": 2.1783685424982326e-06, "loss": 0.6921, "step": 4368 }, { "epoch": 2.1673079309916843, "grad_norm": 0.13019316986612073, "learning_rate": 2.1773993395132303e-06, "loss": 0.6875, "step": 4369 }, { "epoch": 2.1678043936949236, "grad_norm": 0.1353274833386413, "learning_rate": 2.176430185832567e-06, "loss": 0.7027, "step": 4370 }, { "epoch": 2.168300856398163, "grad_norm": 0.13265862920248359, "learning_rate": 2.175461081604362e-06, "loss": 0.6899, "step": 4371 }, { "epoch": 2.1687973191014027, "grad_norm": 0.13334516546543757, "learning_rate": 2.1744920269767266e-06, "loss": 0.6968, "step": 4372 }, { "epoch": 2.169293781804642, "grad_norm": 0.13704956210912356, "learning_rate": 2.1735230220977653e-06, "loss": 0.7129, "step": 4373 }, { "epoch": 2.1697902445078814, "grad_norm": 0.13014552774663893, "learning_rate": 2.172554067115576e-06, "loss": 0.7324, "step": 4374 }, { "epoch": 2.1702867072111207, "grad_norm": 0.1281661360302656, "learning_rate": 2.1715851621782473e-06, "loss": 0.6973, "step": 4375 }, { "epoch": 2.17078316991436, "grad_norm": 0.13330711985579916, "learning_rate": 2.170616307433861e-06, "loss": 0.7038, "step": 4376 }, { "epoch": 2.1712796326175994, "grad_norm": 0.13056351010723308, "learning_rate": 2.1696475030304902e-06, "loss": 0.6866, "step": 4377 }, { "epoch": 2.171776095320839, "grad_norm": 0.13268943046062295, "learning_rate": 2.1686787491162023e-06, "loss": 0.6891, "step": 4378 }, { "epoch": 2.1722725580240785, "grad_norm": 0.13924146096251458, "learning_rate": 2.1677100458390547e-06, "loss": 0.6916, "step": 4379 }, { "epoch": 2.172769020727318, "grad_norm": 0.13972180628069247, "learning_rate": 2.1667413933470998e-06, "loss": 0.7463, "step": 4380 }, { "epoch": 2.173265483430557, "grad_norm": 0.13661608957760332, "learning_rate": 2.165772791788379e-06, "loss": 0.7453, "step": 4381 }, { "epoch": 2.173761946133797, "grad_norm": 0.1282782445633921, "learning_rate": 2.1648042413109276e-06, "loss": 0.7236, "step": 4382 }, { "epoch": 2.1742584088370362, "grad_norm": 0.12716164432242413, "learning_rate": 2.163835742062774e-06, "loss": 0.687, "step": 4383 }, { "epoch": 2.1747548715402756, "grad_norm": 0.13109851282826201, "learning_rate": 2.162867294191938e-06, "loss": 0.7189, "step": 4384 }, { "epoch": 2.175251334243515, "grad_norm": 0.14106981155374845, "learning_rate": 2.1618988978464296e-06, "loss": 0.7263, "step": 4385 }, { "epoch": 2.1757477969467542, "grad_norm": 0.1287290868986218, "learning_rate": 2.1609305531742534e-06, "loss": 0.6981, "step": 4386 }, { "epoch": 2.1762442596499936, "grad_norm": 0.1294869956830014, "learning_rate": 2.159962260323406e-06, "loss": 0.7166, "step": 4387 }, { "epoch": 2.1767407223532333, "grad_norm": 0.13558488604187074, "learning_rate": 2.158994019441875e-06, "loss": 0.6741, "step": 4388 }, { "epoch": 2.1772371850564727, "grad_norm": 0.1337077290344641, "learning_rate": 2.15802583067764e-06, "loss": 0.712, "step": 4389 }, { "epoch": 2.177733647759712, "grad_norm": 0.13200934226771094, "learning_rate": 2.157057694178674e-06, "loss": 0.6919, "step": 4390 }, { "epoch": 2.1782301104629513, "grad_norm": 0.12792142983023513, "learning_rate": 2.1560896100929413e-06, "loss": 0.672, "step": 4391 }, { "epoch": 2.178726573166191, "grad_norm": 0.14201553304959288, "learning_rate": 2.155121578568397e-06, "loss": 0.7635, "step": 4392 }, { "epoch": 2.1792230358694304, "grad_norm": 0.12863093955613852, "learning_rate": 2.1541535997529894e-06, "loss": 0.6829, "step": 4393 }, { "epoch": 2.17971949857267, "grad_norm": 0.13051647735684252, "learning_rate": 2.153185673794659e-06, "loss": 0.6927, "step": 4394 }, { "epoch": 2.180215961275909, "grad_norm": 0.1378899333922816, "learning_rate": 2.1522178008413376e-06, "loss": 0.7242, "step": 4395 }, { "epoch": 2.1807124239791484, "grad_norm": 0.12915706282038747, "learning_rate": 2.15124998104095e-06, "loss": 0.7223, "step": 4396 }, { "epoch": 2.1812088866823878, "grad_norm": 0.13756383241184975, "learning_rate": 2.15028221454141e-06, "loss": 0.7269, "step": 4397 }, { "epoch": 2.1817053493856275, "grad_norm": 0.13566981965904096, "learning_rate": 2.1493145014906264e-06, "loss": 0.7178, "step": 4398 }, { "epoch": 2.182201812088867, "grad_norm": 0.13120681413105775, "learning_rate": 2.1483468420364984e-06, "loss": 0.6638, "step": 4399 }, { "epoch": 2.182698274792106, "grad_norm": 0.13010531035199235, "learning_rate": 2.1473792363269183e-06, "loss": 0.7361, "step": 4400 }, { "epoch": 2.1831947374953455, "grad_norm": 0.13102941992333803, "learning_rate": 2.1464116845097672e-06, "loss": 0.6625, "step": 4401 }, { "epoch": 2.1836912001985853, "grad_norm": 0.13143047623874493, "learning_rate": 2.1454441867329205e-06, "loss": 0.7456, "step": 4402 }, { "epoch": 2.1841876629018246, "grad_norm": 0.12849792172954375, "learning_rate": 2.1444767431442455e-06, "loss": 0.6448, "step": 4403 }, { "epoch": 2.184684125605064, "grad_norm": 0.1339908605256051, "learning_rate": 2.1435093538916e-06, "loss": 0.7366, "step": 4404 }, { "epoch": 2.1851805883083033, "grad_norm": 0.1355852126115786, "learning_rate": 2.1425420191228328e-06, "loss": 0.7442, "step": 4405 }, { "epoch": 2.1856770510115426, "grad_norm": 0.13537532518922524, "learning_rate": 2.1415747389857875e-06, "loss": 0.7584, "step": 4406 }, { "epoch": 2.186173513714782, "grad_norm": 0.1376705768549547, "learning_rate": 2.140607513628296e-06, "loss": 0.6996, "step": 4407 }, { "epoch": 2.1866699764180217, "grad_norm": 0.13019400686207325, "learning_rate": 2.1396403431981843e-06, "loss": 0.6652, "step": 4408 }, { "epoch": 2.187166439121261, "grad_norm": 0.1322767474800185, "learning_rate": 2.1386732278432674e-06, "loss": 0.6934, "step": 4409 }, { "epoch": 2.1876629018245004, "grad_norm": 0.13168416201103167, "learning_rate": 2.1377061677113547e-06, "loss": 0.6779, "step": 4410 }, { "epoch": 2.1881593645277397, "grad_norm": 0.12841437119394158, "learning_rate": 2.136739162950245e-06, "loss": 0.6991, "step": 4411 }, { "epoch": 2.188655827230979, "grad_norm": 0.13341291386169005, "learning_rate": 2.13577221370773e-06, "loss": 0.6849, "step": 4412 }, { "epoch": 2.189152289934219, "grad_norm": 0.13556040065635502, "learning_rate": 2.1348053201315926e-06, "loss": 0.7101, "step": 4413 }, { "epoch": 2.189648752637458, "grad_norm": 0.1347896034578669, "learning_rate": 2.1338384823696056e-06, "loss": 0.7173, "step": 4414 }, { "epoch": 2.1901452153406975, "grad_norm": 0.13265640243012952, "learning_rate": 2.1328717005695363e-06, "loss": 0.6666, "step": 4415 }, { "epoch": 2.190641678043937, "grad_norm": 0.1283246868922253, "learning_rate": 2.1319049748791418e-06, "loss": 0.7144, "step": 4416 }, { "epoch": 2.191138140747176, "grad_norm": 0.13569707723504326, "learning_rate": 2.1309383054461692e-06, "loss": 0.7276, "step": 4417 }, { "epoch": 2.191634603450416, "grad_norm": 0.1350932272158915, "learning_rate": 2.1299716924183586e-06, "loss": 0.7084, "step": 4418 }, { "epoch": 2.1921310661536553, "grad_norm": 0.1293789781280897, "learning_rate": 2.1290051359434426e-06, "loss": 0.7082, "step": 4419 }, { "epoch": 2.1926275288568946, "grad_norm": 0.1334911228325809, "learning_rate": 2.128038636169143e-06, "loss": 0.694, "step": 4420 }, { "epoch": 2.193123991560134, "grad_norm": 0.13202766897134072, "learning_rate": 2.1270721932431736e-06, "loss": 0.7453, "step": 4421 }, { "epoch": 2.1936204542633733, "grad_norm": 0.132585729157581, "learning_rate": 2.1261058073132403e-06, "loss": 0.7075, "step": 4422 }, { "epoch": 2.194116916966613, "grad_norm": 0.13273752133950062, "learning_rate": 2.1251394785270388e-06, "loss": 0.7006, "step": 4423 }, { "epoch": 2.1946133796698524, "grad_norm": 0.13508600941080354, "learning_rate": 2.1241732070322586e-06, "loss": 0.7252, "step": 4424 }, { "epoch": 2.1951098423730917, "grad_norm": 0.12963329955825967, "learning_rate": 2.123206992976577e-06, "loss": 0.7375, "step": 4425 }, { "epoch": 2.195606305076331, "grad_norm": 0.12468851759067956, "learning_rate": 2.122240836507665e-06, "loss": 0.6814, "step": 4426 }, { "epoch": 2.1961027677795704, "grad_norm": 0.13146278221760682, "learning_rate": 2.1212747377731845e-06, "loss": 0.721, "step": 4427 }, { "epoch": 2.19659923048281, "grad_norm": 0.14207414385746342, "learning_rate": 2.120308696920789e-06, "loss": 0.749, "step": 4428 }, { "epoch": 2.1970956931860495, "grad_norm": 0.1299724476761059, "learning_rate": 2.11934271409812e-06, "loss": 0.6992, "step": 4429 }, { "epoch": 2.197592155889289, "grad_norm": 0.13750736020293242, "learning_rate": 2.1183767894528135e-06, "loss": 0.7027, "step": 4430 }, { "epoch": 2.198088618592528, "grad_norm": 0.1327599147789671, "learning_rate": 2.1174109231324965e-06, "loss": 0.661, "step": 4431 }, { "epoch": 2.1985850812957675, "grad_norm": 0.13320281277199114, "learning_rate": 2.1164451152847865e-06, "loss": 0.7928, "step": 4432 }, { "epoch": 2.1990815439990072, "grad_norm": 0.12945660859813715, "learning_rate": 2.1154793660572897e-06, "loss": 0.7051, "step": 4433 }, { "epoch": 2.1995780067022466, "grad_norm": 0.1404344217727835, "learning_rate": 2.1145136755976063e-06, "loss": 0.7047, "step": 4434 }, { "epoch": 2.200074469405486, "grad_norm": 0.13253286798252542, "learning_rate": 2.1135480440533275e-06, "loss": 0.687, "step": 4435 }, { "epoch": 2.2005709321087252, "grad_norm": 0.1315732384032246, "learning_rate": 2.1125824715720335e-06, "loss": 0.7066, "step": 4436 }, { "epoch": 2.2010673948119646, "grad_norm": 0.12983170486233375, "learning_rate": 2.1116169583012965e-06, "loss": 0.7092, "step": 4437 }, { "epoch": 2.2015638575152043, "grad_norm": 0.12962669211533243, "learning_rate": 2.1106515043886804e-06, "loss": 0.7187, "step": 4438 }, { "epoch": 2.2020603202184437, "grad_norm": 0.13258204247552738, "learning_rate": 2.1096861099817394e-06, "loss": 0.7111, "step": 4439 }, { "epoch": 2.202556782921683, "grad_norm": 0.13733375443222062, "learning_rate": 2.1087207752280186e-06, "loss": 0.7446, "step": 4440 }, { "epoch": 2.2030532456249223, "grad_norm": 0.1335139785372661, "learning_rate": 2.107755500275053e-06, "loss": 0.733, "step": 4441 }, { "epoch": 2.2035497083281617, "grad_norm": 0.12985134152635452, "learning_rate": 2.10679028527037e-06, "loss": 0.7306, "step": 4442 }, { "epoch": 2.2040461710314014, "grad_norm": 0.12811473834247125, "learning_rate": 2.1058251303614875e-06, "loss": 0.7068, "step": 4443 }, { "epoch": 2.2045426337346408, "grad_norm": 0.12645032456670896, "learning_rate": 2.1048600356959133e-06, "loss": 0.6633, "step": 4444 }, { "epoch": 2.20503909643788, "grad_norm": 0.13171507130184576, "learning_rate": 2.103895001421148e-06, "loss": 0.6973, "step": 4445 }, { "epoch": 2.2055355591411194, "grad_norm": 0.1302019612259144, "learning_rate": 2.10293002768468e-06, "loss": 0.68, "step": 4446 }, { "epoch": 2.2060320218443588, "grad_norm": 0.12865346316964513, "learning_rate": 2.101965114633991e-06, "loss": 0.7241, "step": 4447 }, { "epoch": 2.2065284845475985, "grad_norm": 0.12938944539941274, "learning_rate": 2.1010002624165528e-06, "loss": 0.7068, "step": 4448 }, { "epoch": 2.207024947250838, "grad_norm": 0.1315311127735872, "learning_rate": 2.1000354711798258e-06, "loss": 0.7197, "step": 4449 }, { "epoch": 2.207521409954077, "grad_norm": 0.1304635101926576, "learning_rate": 2.0990707410712647e-06, "loss": 0.7177, "step": 4450 }, { "epoch": 2.2080178726573165, "grad_norm": 0.12566986860796162, "learning_rate": 2.098106072238313e-06, "loss": 0.7147, "step": 4451 }, { "epoch": 2.208514335360556, "grad_norm": 0.1291854407145579, "learning_rate": 2.097141464828403e-06, "loss": 0.6987, "step": 4452 }, { "epoch": 2.2090107980637956, "grad_norm": 0.12932256073625428, "learning_rate": 2.0961769189889612e-06, "loss": 0.6682, "step": 4453 }, { "epoch": 2.209507260767035, "grad_norm": 0.13534326258562565, "learning_rate": 2.0952124348674027e-06, "loss": 0.7585, "step": 4454 }, { "epoch": 2.2100037234702743, "grad_norm": 0.13139570296537265, "learning_rate": 2.094248012611133e-06, "loss": 0.7238, "step": 4455 }, { "epoch": 2.2105001861735136, "grad_norm": 0.12736205425045874, "learning_rate": 2.0932836523675495e-06, "loss": 0.7331, "step": 4456 }, { "epoch": 2.210996648876753, "grad_norm": 0.1285738140080393, "learning_rate": 2.0923193542840376e-06, "loss": 0.6831, "step": 4457 }, { "epoch": 2.2114931115799927, "grad_norm": 0.1373192184817093, "learning_rate": 2.0913551185079763e-06, "loss": 0.7511, "step": 4458 }, { "epoch": 2.211989574283232, "grad_norm": 0.1315083921749297, "learning_rate": 2.0903909451867327e-06, "loss": 0.7186, "step": 4459 }, { "epoch": 2.2124860369864714, "grad_norm": 0.13197214005113778, "learning_rate": 2.089426834467666e-06, "loss": 0.7278, "step": 4460 }, { "epoch": 2.2129824996897107, "grad_norm": 0.13117095945392646, "learning_rate": 2.0884627864981247e-06, "loss": 0.7158, "step": 4461 }, { "epoch": 2.21347896239295, "grad_norm": 0.14366920819680415, "learning_rate": 2.0874988014254474e-06, "loss": 0.697, "step": 4462 }, { "epoch": 2.21397542509619, "grad_norm": 0.13103101512448223, "learning_rate": 2.0865348793969644e-06, "loss": 0.7099, "step": 4463 }, { "epoch": 2.214471887799429, "grad_norm": 0.13241402045221123, "learning_rate": 2.085571020559997e-06, "loss": 0.6921, "step": 4464 }, { "epoch": 2.2149683505026685, "grad_norm": 0.12588765448059275, "learning_rate": 2.084607225061853e-06, "loss": 0.6612, "step": 4465 }, { "epoch": 2.215464813205908, "grad_norm": 0.1310167292666887, "learning_rate": 2.0836434930498343e-06, "loss": 0.6417, "step": 4466 }, { "epoch": 2.215961275909147, "grad_norm": 0.13194158154041208, "learning_rate": 2.082679824671232e-06, "loss": 0.7063, "step": 4467 }, { "epoch": 2.216457738612387, "grad_norm": 0.13066331982217858, "learning_rate": 2.0817162200733275e-06, "loss": 0.6948, "step": 4468 }, { "epoch": 2.2169542013156263, "grad_norm": 0.1318788924378777, "learning_rate": 2.080752679403392e-06, "loss": 0.7012, "step": 4469 }, { "epoch": 2.2174506640188656, "grad_norm": 0.13209070722048075, "learning_rate": 2.0797892028086873e-06, "loss": 0.7201, "step": 4470 }, { "epoch": 2.217947126722105, "grad_norm": 0.1274600384445204, "learning_rate": 2.078825790436465e-06, "loss": 0.7131, "step": 4471 }, { "epoch": 2.2184435894253443, "grad_norm": 0.13371229036895962, "learning_rate": 2.0778624424339684e-06, "loss": 0.7382, "step": 4472 }, { "epoch": 2.218940052128584, "grad_norm": 0.13862248032289534, "learning_rate": 2.0768991589484284e-06, "loss": 0.7566, "step": 4473 }, { "epoch": 2.2194365148318234, "grad_norm": 0.13523476510615928, "learning_rate": 2.0759359401270683e-06, "loss": 0.7046, "step": 4474 }, { "epoch": 2.2199329775350627, "grad_norm": 0.13349052506342798, "learning_rate": 2.0749727861171e-06, "loss": 0.7179, "step": 4475 }, { "epoch": 2.220429440238302, "grad_norm": 0.12706375105277193, "learning_rate": 2.074009697065727e-06, "loss": 0.7001, "step": 4476 }, { "epoch": 2.2209259029415414, "grad_norm": 0.13175995019806763, "learning_rate": 2.073046673120142e-06, "loss": 0.7124, "step": 4477 }, { "epoch": 2.221422365644781, "grad_norm": 0.12839478228159643, "learning_rate": 2.0720837144275264e-06, "loss": 0.6965, "step": 4478 }, { "epoch": 2.2219188283480205, "grad_norm": 0.13013639445767175, "learning_rate": 2.0711208211350543e-06, "loss": 0.6632, "step": 4479 }, { "epoch": 2.22241529105126, "grad_norm": 0.13013755734088792, "learning_rate": 2.070157993389889e-06, "loss": 0.6993, "step": 4480 }, { "epoch": 2.222911753754499, "grad_norm": 0.12451994734244691, "learning_rate": 2.069195231339182e-06, "loss": 0.6995, "step": 4481 }, { "epoch": 2.2234082164577385, "grad_norm": 0.13477779261267323, "learning_rate": 2.0682325351300754e-06, "loss": 0.6804, "step": 4482 }, { "epoch": 2.2239046791609782, "grad_norm": 0.12668543781608235, "learning_rate": 2.0672699049097034e-06, "loss": 0.7061, "step": 4483 }, { "epoch": 2.2244011418642176, "grad_norm": 0.13314313496622696, "learning_rate": 2.0663073408251888e-06, "loss": 0.7014, "step": 4484 }, { "epoch": 2.224897604567457, "grad_norm": 0.12970701923177527, "learning_rate": 2.065344843023643e-06, "loss": 0.68, "step": 4485 }, { "epoch": 2.225394067270696, "grad_norm": 0.12387800818585266, "learning_rate": 2.0643824116521683e-06, "loss": 0.6338, "step": 4486 }, { "epoch": 2.2258905299739355, "grad_norm": 0.13553410886475062, "learning_rate": 2.0634200468578577e-06, "loss": 0.6881, "step": 4487 }, { "epoch": 2.2263869926771753, "grad_norm": 0.1316309439657593, "learning_rate": 2.0624577487877934e-06, "loss": 0.7049, "step": 4488 }, { "epoch": 2.2268834553804147, "grad_norm": 0.12922645687790163, "learning_rate": 2.0614955175890464e-06, "loss": 0.7002, "step": 4489 }, { "epoch": 2.227379918083654, "grad_norm": 0.13080644734923877, "learning_rate": 2.0605333534086783e-06, "loss": 0.7158, "step": 4490 }, { "epoch": 2.2278763807868933, "grad_norm": 0.13245492757085336, "learning_rate": 2.0595712563937412e-06, "loss": 0.749, "step": 4491 }, { "epoch": 2.2283728434901326, "grad_norm": 0.13270571932773106, "learning_rate": 2.0586092266912753e-06, "loss": 0.7214, "step": 4492 }, { "epoch": 2.2288693061933724, "grad_norm": 0.13643011762485555, "learning_rate": 2.0576472644483133e-06, "loss": 0.7247, "step": 4493 }, { "epoch": 2.2293657688966118, "grad_norm": 0.13005640680659797, "learning_rate": 2.056685369811873e-06, "loss": 0.7014, "step": 4494 }, { "epoch": 2.229862231599851, "grad_norm": 0.13129470666359272, "learning_rate": 2.055723542928966e-06, "loss": 0.7518, "step": 4495 }, { "epoch": 2.2303586943030904, "grad_norm": 0.13311828367670825, "learning_rate": 2.0547617839465925e-06, "loss": 0.7229, "step": 4496 }, { "epoch": 2.2308551570063297, "grad_norm": 0.12371038963722301, "learning_rate": 2.0538000930117424e-06, "loss": 0.6783, "step": 4497 }, { "epoch": 2.2313516197095695, "grad_norm": 0.1292453879856836, "learning_rate": 2.0528384702713924e-06, "loss": 0.6828, "step": 4498 }, { "epoch": 2.231848082412809, "grad_norm": 0.13392931788982132, "learning_rate": 2.0518769158725126e-06, "loss": 0.7158, "step": 4499 }, { "epoch": 2.232344545116048, "grad_norm": 0.13236176803456814, "learning_rate": 2.0509154299620622e-06, "loss": 0.7292, "step": 4500 }, { "epoch": 2.2328410078192875, "grad_norm": 0.12728431676565705, "learning_rate": 2.0499540126869864e-06, "loss": 0.709, "step": 4501 }, { "epoch": 2.233337470522527, "grad_norm": 0.13394951954145282, "learning_rate": 2.0489926641942245e-06, "loss": 0.7129, "step": 4502 }, { "epoch": 2.2338339332257666, "grad_norm": 0.12837119042814552, "learning_rate": 2.0480313846307025e-06, "loss": 0.6862, "step": 4503 }, { "epoch": 2.234330395929006, "grad_norm": 0.13885619880171285, "learning_rate": 2.047070174143337e-06, "loss": 0.7047, "step": 4504 }, { "epoch": 2.2348268586322453, "grad_norm": 0.13053786492237074, "learning_rate": 2.0461090328790325e-06, "loss": 0.741, "step": 4505 }, { "epoch": 2.2353233213354846, "grad_norm": 0.12630369808853714, "learning_rate": 2.0451479609846847e-06, "loss": 0.6303, "step": 4506 }, { "epoch": 2.235819784038724, "grad_norm": 0.1334775735012497, "learning_rate": 2.0441869586071784e-06, "loss": 0.7161, "step": 4507 }, { "epoch": 2.2363162467419633, "grad_norm": 0.1243537069117441, "learning_rate": 2.043226025893387e-06, "loss": 0.7083, "step": 4508 }, { "epoch": 2.236812709445203, "grad_norm": 0.1362055506738069, "learning_rate": 2.0422651629901743e-06, "loss": 0.7073, "step": 4509 }, { "epoch": 2.2373091721484424, "grad_norm": 0.13381191349240082, "learning_rate": 2.041304370044391e-06, "loss": 0.715, "step": 4510 }, { "epoch": 2.2378056348516817, "grad_norm": 0.13008180169114836, "learning_rate": 2.0403436472028807e-06, "loss": 0.6912, "step": 4511 }, { "epoch": 2.238302097554921, "grad_norm": 0.13665438512065606, "learning_rate": 2.0393829946124737e-06, "loss": 0.7122, "step": 4512 }, { "epoch": 2.238798560258161, "grad_norm": 0.1306034467966443, "learning_rate": 2.0384224124199918e-06, "loss": 0.6518, "step": 4513 }, { "epoch": 2.2392950229614, "grad_norm": 0.14766441708462488, "learning_rate": 2.0374619007722423e-06, "loss": 0.75, "step": 4514 }, { "epoch": 2.2397914856646395, "grad_norm": 0.14199023614242443, "learning_rate": 2.036501459816025e-06, "loss": 0.7057, "step": 4515 }, { "epoch": 2.240287948367879, "grad_norm": 0.13183647650911845, "learning_rate": 2.0355410896981285e-06, "loss": 0.7167, "step": 4516 }, { "epoch": 2.240784411071118, "grad_norm": 0.12935480831259677, "learning_rate": 2.034580790565329e-06, "loss": 0.6945, "step": 4517 }, { "epoch": 2.2412808737743575, "grad_norm": 0.123741363188653, "learning_rate": 2.033620562564393e-06, "loss": 0.6593, "step": 4518 }, { "epoch": 2.2417773364775972, "grad_norm": 0.1324876830108556, "learning_rate": 2.032660405842076e-06, "loss": 0.7795, "step": 4519 }, { "epoch": 2.2422737991808366, "grad_norm": 0.13082736167618114, "learning_rate": 2.031700320545123e-06, "loss": 0.6666, "step": 4520 }, { "epoch": 2.242770261884076, "grad_norm": 0.13454831662707822, "learning_rate": 2.0307403068202677e-06, "loss": 0.6773, "step": 4521 }, { "epoch": 2.2432667245873152, "grad_norm": 0.13082635983605484, "learning_rate": 2.0297803648142324e-06, "loss": 0.6761, "step": 4522 }, { "epoch": 2.243763187290555, "grad_norm": 0.1300274112305257, "learning_rate": 2.0288204946737283e-06, "loss": 0.6994, "step": 4523 }, { "epoch": 2.2442596499937943, "grad_norm": 0.13137498359275115, "learning_rate": 2.0278606965454573e-06, "loss": 0.7014, "step": 4524 }, { "epoch": 2.2447561126970337, "grad_norm": 0.1380783519062254, "learning_rate": 2.026900970576109e-06, "loss": 0.7264, "step": 4525 }, { "epoch": 2.245252575400273, "grad_norm": 0.1334871254288439, "learning_rate": 2.0259413169123615e-06, "loss": 0.7035, "step": 4526 }, { "epoch": 2.2457490381035123, "grad_norm": 0.13467702526258202, "learning_rate": 2.0249817357008825e-06, "loss": 0.7435, "step": 4527 }, { "epoch": 2.2462455008067517, "grad_norm": 0.13343539696190834, "learning_rate": 2.024022227088329e-06, "loss": 0.766, "step": 4528 }, { "epoch": 2.2467419635099914, "grad_norm": 0.1336433964166616, "learning_rate": 2.0230627912213475e-06, "loss": 0.7278, "step": 4529 }, { "epoch": 2.2472384262132308, "grad_norm": 0.13069507120960716, "learning_rate": 2.02210342824657e-06, "loss": 0.6983, "step": 4530 }, { "epoch": 2.24773488891647, "grad_norm": 0.13107224337831874, "learning_rate": 2.0211441383106208e-06, "loss": 0.677, "step": 4531 }, { "epoch": 2.2482313516197094, "grad_norm": 0.13776105234734956, "learning_rate": 2.020184921560113e-06, "loss": 0.7045, "step": 4532 }, { "epoch": 2.248727814322949, "grad_norm": 0.12525227813051776, "learning_rate": 2.019225778141646e-06, "loss": 0.6817, "step": 4533 }, { "epoch": 2.2492242770261885, "grad_norm": 0.13625075248045249, "learning_rate": 2.0182667082018104e-06, "loss": 0.7373, "step": 4534 }, { "epoch": 2.249720739729428, "grad_norm": 0.13175367979094055, "learning_rate": 2.0173077118871847e-06, "loss": 0.7571, "step": 4535 }, { "epoch": 2.250217202432667, "grad_norm": 0.1313325217650813, "learning_rate": 2.016348789344335e-06, "loss": 0.6996, "step": 4536 }, { "epoch": 2.250217202432667, "eval_loss": 0.7270435690879822, "eval_runtime": 141.8878, "eval_samples_per_second": 213.923, "eval_steps_per_second": 26.746, "step": 4536 }, { "epoch": 2.2507136651359065, "grad_norm": 0.130395751964868, "learning_rate": 2.0153899407198193e-06, "loss": 0.7365, "step": 4537 }, { "epoch": 2.251210127839146, "grad_norm": 0.133483489650425, "learning_rate": 2.01443116616018e-06, "loss": 0.7063, "step": 4538 }, { "epoch": 2.2517065905423856, "grad_norm": 0.12958543122285368, "learning_rate": 2.0134724658119525e-06, "loss": 0.7304, "step": 4539 }, { "epoch": 2.252203053245625, "grad_norm": 0.13021997063309637, "learning_rate": 2.012513839821657e-06, "loss": 0.6835, "step": 4540 }, { "epoch": 2.2526995159488643, "grad_norm": 0.1327109634903606, "learning_rate": 2.011555288335805e-06, "loss": 0.7004, "step": 4541 }, { "epoch": 2.2531959786521036, "grad_norm": 0.13418960633883067, "learning_rate": 2.0105968115008957e-06, "loss": 0.6589, "step": 4542 }, { "epoch": 2.2536924413553434, "grad_norm": 0.134302800194372, "learning_rate": 2.0096384094634165e-06, "loss": 0.7305, "step": 4543 }, { "epoch": 2.2541889040585827, "grad_norm": 0.1313733378397713, "learning_rate": 2.0086800823698437e-06, "loss": 0.7205, "step": 4544 }, { "epoch": 2.254685366761822, "grad_norm": 0.13297045310339728, "learning_rate": 2.007721830366644e-06, "loss": 0.7046, "step": 4545 }, { "epoch": 2.2551818294650614, "grad_norm": 0.1336572863632231, "learning_rate": 2.0067636536002687e-06, "loss": 0.6846, "step": 4546 }, { "epoch": 2.2556782921683007, "grad_norm": 0.13982170058768661, "learning_rate": 2.00580555221716e-06, "loss": 0.7349, "step": 4547 }, { "epoch": 2.25617475487154, "grad_norm": 0.13758235182829331, "learning_rate": 2.0048475263637495e-06, "loss": 0.7076, "step": 4548 }, { "epoch": 2.25667121757478, "grad_norm": 0.13023416929452136, "learning_rate": 2.003889576186455e-06, "loss": 0.7703, "step": 4549 }, { "epoch": 2.257167680278019, "grad_norm": 0.12888207678478222, "learning_rate": 2.002931701831684e-06, "loss": 0.7119, "step": 4550 }, { "epoch": 2.2576641429812585, "grad_norm": 0.12816010732415842, "learning_rate": 2.0019739034458328e-06, "loss": 0.725, "step": 4551 }, { "epoch": 2.258160605684498, "grad_norm": 0.14248392567472656, "learning_rate": 2.0010161811752856e-06, "loss": 0.7778, "step": 4552 }, { "epoch": 2.2586570683877376, "grad_norm": 0.13798620466349681, "learning_rate": 2.000058535166414e-06, "loss": 0.6932, "step": 4553 }, { "epoch": 2.259153531090977, "grad_norm": 0.1329997986135555, "learning_rate": 1.9991009655655796e-06, "loss": 0.6758, "step": 4554 }, { "epoch": 2.2596499937942163, "grad_norm": 0.13463323485659834, "learning_rate": 1.9981434725191314e-06, "loss": 0.7121, "step": 4555 }, { "epoch": 2.2601464564974556, "grad_norm": 0.12854539155868666, "learning_rate": 1.9971860561734062e-06, "loss": 0.6769, "step": 4556 }, { "epoch": 2.260642919200695, "grad_norm": 0.1282085935483695, "learning_rate": 1.9962287166747304e-06, "loss": 0.6794, "step": 4557 }, { "epoch": 2.2611393819039343, "grad_norm": 0.1294147737679884, "learning_rate": 1.9952714541694186e-06, "loss": 0.7308, "step": 4558 }, { "epoch": 2.261635844607174, "grad_norm": 0.13473387061029163, "learning_rate": 1.994314268803772e-06, "loss": 0.7255, "step": 4559 }, { "epoch": 2.2621323073104134, "grad_norm": 0.1325041645843652, "learning_rate": 1.993357160724081e-06, "loss": 0.697, "step": 4560 }, { "epoch": 2.2626287700136527, "grad_norm": 0.1342544420441968, "learning_rate": 1.9924001300766256e-06, "loss": 0.7283, "step": 4561 }, { "epoch": 2.263125232716892, "grad_norm": 0.13379243253594394, "learning_rate": 1.9914431770076707e-06, "loss": 0.7161, "step": 4562 }, { "epoch": 2.263621695420132, "grad_norm": 0.1319578992058116, "learning_rate": 1.9904863016634724e-06, "loss": 0.7133, "step": 4563 }, { "epoch": 2.264118158123371, "grad_norm": 0.13825130015364384, "learning_rate": 1.9895295041902733e-06, "loss": 0.7064, "step": 4564 }, { "epoch": 2.2646146208266105, "grad_norm": 0.1261519713290881, "learning_rate": 1.988572784734305e-06, "loss": 0.7139, "step": 4565 }, { "epoch": 2.26511108352985, "grad_norm": 0.13132799813077256, "learning_rate": 1.9876161434417857e-06, "loss": 0.6944, "step": 4566 }, { "epoch": 2.265607546233089, "grad_norm": 0.13312127226666143, "learning_rate": 1.986659580458924e-06, "loss": 0.7534, "step": 4567 }, { "epoch": 2.2661040089363285, "grad_norm": 0.13300727555740405, "learning_rate": 1.9857030959319143e-06, "loss": 0.6843, "step": 4568 }, { "epoch": 2.2666004716395682, "grad_norm": 0.12803205729134937, "learning_rate": 1.984746690006941e-06, "loss": 0.6661, "step": 4569 }, { "epoch": 2.2670969343428076, "grad_norm": 0.13134108543944287, "learning_rate": 1.983790362830174e-06, "loss": 0.715, "step": 4570 }, { "epoch": 2.267593397046047, "grad_norm": 0.1310214195071098, "learning_rate": 1.982834114547773e-06, "loss": 0.6808, "step": 4571 }, { "epoch": 2.2680898597492862, "grad_norm": 0.12868216227851317, "learning_rate": 1.981877945305886e-06, "loss": 0.6859, "step": 4572 }, { "epoch": 2.2685863224525256, "grad_norm": 0.1308307615447908, "learning_rate": 1.980921855250647e-06, "loss": 0.706, "step": 4573 }, { "epoch": 2.2690827851557653, "grad_norm": 0.13070485382531827, "learning_rate": 1.979965844528181e-06, "loss": 0.7233, "step": 4574 }, { "epoch": 2.2695792478590047, "grad_norm": 0.13488405502043913, "learning_rate": 1.979009913284596e-06, "loss": 0.6707, "step": 4575 }, { "epoch": 2.270075710562244, "grad_norm": 0.1357162099459169, "learning_rate": 1.978054061665993e-06, "loss": 0.7506, "step": 4576 }, { "epoch": 2.2705721732654833, "grad_norm": 0.1324174585525689, "learning_rate": 1.977098289818459e-06, "loss": 0.7365, "step": 4577 }, { "epoch": 2.2710686359687227, "grad_norm": 0.13116743586662247, "learning_rate": 1.976142597888066e-06, "loss": 0.6689, "step": 4578 }, { "epoch": 2.2715650986719624, "grad_norm": 0.13835608552718603, "learning_rate": 1.9751869860208774e-06, "loss": 0.706, "step": 4579 }, { "epoch": 2.2720615613752018, "grad_norm": 0.13412470397613815, "learning_rate": 1.974231454362944e-06, "loss": 0.7028, "step": 4580 }, { "epoch": 2.272558024078441, "grad_norm": 0.12973612445352137, "learning_rate": 1.973276003060302e-06, "loss": 0.7142, "step": 4581 }, { "epoch": 2.2730544867816804, "grad_norm": 0.12782910807584713, "learning_rate": 1.972320632258978e-06, "loss": 0.6797, "step": 4582 }, { "epoch": 2.2735509494849198, "grad_norm": 0.12805462086856045, "learning_rate": 1.9713653421049844e-06, "loss": 0.6942, "step": 4583 }, { "epoch": 2.2740474121881595, "grad_norm": 0.13882829796209556, "learning_rate": 1.970410132744322e-06, "loss": 0.6987, "step": 4584 }, { "epoch": 2.274543874891399, "grad_norm": 0.13480442463376907, "learning_rate": 1.9694550043229802e-06, "loss": 0.7136, "step": 4585 }, { "epoch": 2.275040337594638, "grad_norm": 0.12813305978151937, "learning_rate": 1.968499956986934e-06, "loss": 0.6925, "step": 4586 }, { "epoch": 2.2755368002978775, "grad_norm": 0.13565940409117114, "learning_rate": 1.9675449908821473e-06, "loss": 0.7073, "step": 4587 }, { "epoch": 2.276033263001117, "grad_norm": 0.13297564484436258, "learning_rate": 1.9665901061545715e-06, "loss": 0.7146, "step": 4588 }, { "epoch": 2.2765297257043566, "grad_norm": 0.1292833330312642, "learning_rate": 1.965635302950145e-06, "loss": 0.6681, "step": 4589 }, { "epoch": 2.277026188407596, "grad_norm": 0.13161230553838985, "learning_rate": 1.9646805814147956e-06, "loss": 0.7131, "step": 4590 }, { "epoch": 2.2775226511108353, "grad_norm": 0.13259185019815198, "learning_rate": 1.9637259416944352e-06, "loss": 0.7156, "step": 4591 }, { "epoch": 2.2780191138140746, "grad_norm": 0.13109212966194678, "learning_rate": 1.9627713839349665e-06, "loss": 0.7343, "step": 4592 }, { "epoch": 2.278515576517314, "grad_norm": 0.13887226544683762, "learning_rate": 1.961816908282279e-06, "loss": 0.7579, "step": 4593 }, { "epoch": 2.2790120392205537, "grad_norm": 0.13379712126091625, "learning_rate": 1.960862514882247e-06, "loss": 0.741, "step": 4594 }, { "epoch": 2.279508501923793, "grad_norm": 0.12731411318156655, "learning_rate": 1.959908203880735e-06, "loss": 0.6717, "step": 4595 }, { "epoch": 2.2800049646270324, "grad_norm": 0.13191309058764128, "learning_rate": 1.9589539754235938e-06, "loss": 0.6887, "step": 4596 }, { "epoch": 2.2805014273302717, "grad_norm": 0.13477829801291258, "learning_rate": 1.957999829656664e-06, "loss": 0.7024, "step": 4597 }, { "epoch": 2.280997890033511, "grad_norm": 0.1408048819606926, "learning_rate": 1.9570457667257686e-06, "loss": 0.736, "step": 4598 }, { "epoch": 2.281494352736751, "grad_norm": 0.13409166712691484, "learning_rate": 1.9560917867767223e-06, "loss": 0.7182, "step": 4599 }, { "epoch": 2.28199081543999, "grad_norm": 0.1323033246480032, "learning_rate": 1.9551378899553255e-06, "loss": 0.6757, "step": 4600 }, { "epoch": 2.2824872781432295, "grad_norm": 0.1279575053978906, "learning_rate": 1.9541840764073666e-06, "loss": 0.6639, "step": 4601 }, { "epoch": 2.282983740846469, "grad_norm": 0.1284865860394091, "learning_rate": 1.9532303462786196e-06, "loss": 0.7076, "step": 4602 }, { "epoch": 2.283480203549708, "grad_norm": 0.13236730912378264, "learning_rate": 1.9522766997148473e-06, "loss": 0.7177, "step": 4603 }, { "epoch": 2.283976666252948, "grad_norm": 0.13334477268216768, "learning_rate": 1.9513231368617996e-06, "loss": 0.7152, "step": 4604 }, { "epoch": 2.2844731289561873, "grad_norm": 0.129554016821733, "learning_rate": 1.950369657865213e-06, "loss": 0.7274, "step": 4605 }, { "epoch": 2.2849695916594266, "grad_norm": 0.12809936242000544, "learning_rate": 1.949416262870812e-06, "loss": 0.6868, "step": 4606 }, { "epoch": 2.285466054362666, "grad_norm": 0.12619361467512472, "learning_rate": 1.948462952024307e-06, "loss": 0.6536, "step": 4607 }, { "epoch": 2.2859625170659053, "grad_norm": 0.13019492596083582, "learning_rate": 1.9475097254713963e-06, "loss": 0.6921, "step": 4608 }, { "epoch": 2.286458979769145, "grad_norm": 0.12712933900402623, "learning_rate": 1.9465565833577667e-06, "loss": 0.7071, "step": 4609 }, { "epoch": 2.2869554424723844, "grad_norm": 0.13073678098550098, "learning_rate": 1.9456035258290886e-06, "loss": 0.7274, "step": 4610 }, { "epoch": 2.2874519051756237, "grad_norm": 0.13476232848198935, "learning_rate": 1.944650553031022e-06, "loss": 0.7651, "step": 4611 }, { "epoch": 2.287948367878863, "grad_norm": 0.12940407399602424, "learning_rate": 1.9436976651092143e-06, "loss": 0.7052, "step": 4612 }, { "epoch": 2.2884448305821024, "grad_norm": 0.12521819355840508, "learning_rate": 1.9427448622092997e-06, "loss": 0.6643, "step": 4613 }, { "epoch": 2.288941293285342, "grad_norm": 0.1364514191356369, "learning_rate": 1.941792144476897e-06, "loss": 0.6871, "step": 4614 }, { "epoch": 2.2894377559885815, "grad_norm": 0.12501502708363635, "learning_rate": 1.940839512057615e-06, "loss": 0.6713, "step": 4615 }, { "epoch": 2.289934218691821, "grad_norm": 0.13288530127304785, "learning_rate": 1.9398869650970483e-06, "loss": 0.7355, "step": 4616 }, { "epoch": 2.29043068139506, "grad_norm": 0.1356585375859948, "learning_rate": 1.9389345037407787e-06, "loss": 0.7225, "step": 4617 }, { "epoch": 2.2909271440982995, "grad_norm": 0.13053848389346934, "learning_rate": 1.9379821281343736e-06, "loss": 0.7053, "step": 4618 }, { "epoch": 2.2914236068015392, "grad_norm": 0.13397921868792434, "learning_rate": 1.937029838423389e-06, "loss": 0.7573, "step": 4619 }, { "epoch": 2.2919200695047786, "grad_norm": 0.13057634121314804, "learning_rate": 1.9360776347533667e-06, "loss": 0.7348, "step": 4620 }, { "epoch": 2.292416532208018, "grad_norm": 0.12556161832630441, "learning_rate": 1.9351255172698368e-06, "loss": 0.6724, "step": 4621 }, { "epoch": 2.292912994911257, "grad_norm": 0.13237700928498475, "learning_rate": 1.9341734861183146e-06, "loss": 0.7015, "step": 4622 }, { "epoch": 2.2934094576144965, "grad_norm": 0.12996320795796523, "learning_rate": 1.9332215414443023e-06, "loss": 0.6838, "step": 4623 }, { "epoch": 2.2939059203177363, "grad_norm": 0.1264053811415136, "learning_rate": 1.9322696833932896e-06, "loss": 0.672, "step": 4624 }, { "epoch": 2.2944023830209757, "grad_norm": 0.13696458123198366, "learning_rate": 1.931317912110754e-06, "loss": 0.7194, "step": 4625 }, { "epoch": 2.294898845724215, "grad_norm": 0.12935669893699142, "learning_rate": 1.930366227742157e-06, "loss": 0.7196, "step": 4626 }, { "epoch": 2.2953953084274543, "grad_norm": 0.13011349995194715, "learning_rate": 1.9294146304329482e-06, "loss": 0.7146, "step": 4627 }, { "epoch": 2.2958917711306936, "grad_norm": 0.132257641760658, "learning_rate": 1.9284631203285644e-06, "loss": 0.7539, "step": 4628 }, { "epoch": 2.296388233833933, "grad_norm": 0.12618209390437157, "learning_rate": 1.927511697574429e-06, "loss": 0.6542, "step": 4629 }, { "epoch": 2.2968846965371728, "grad_norm": 0.13076964633199217, "learning_rate": 1.9265603623159517e-06, "loss": 0.736, "step": 4630 }, { "epoch": 2.297381159240412, "grad_norm": 0.1278597050601475, "learning_rate": 1.9256091146985282e-06, "loss": 0.7204, "step": 4631 }, { "epoch": 2.2978776219436514, "grad_norm": 0.12737458954653222, "learning_rate": 1.924657954867542e-06, "loss": 0.6605, "step": 4632 }, { "epoch": 2.2983740846468907, "grad_norm": 0.12824874951751916, "learning_rate": 1.923706882968362e-06, "loss": 0.6611, "step": 4633 }, { "epoch": 2.2988705473501305, "grad_norm": 0.13788032532644687, "learning_rate": 1.922755899146346e-06, "loss": 0.7261, "step": 4634 }, { "epoch": 2.29936701005337, "grad_norm": 0.13925532124868073, "learning_rate": 1.9218050035468343e-06, "loss": 0.6746, "step": 4635 }, { "epoch": 2.299863472756609, "grad_norm": 0.13354164221145085, "learning_rate": 1.9208541963151576e-06, "loss": 0.6896, "step": 4636 }, { "epoch": 2.3003599354598485, "grad_norm": 0.13161140898185, "learning_rate": 1.919903477596631e-06, "loss": 0.6969, "step": 4637 }, { "epoch": 2.300856398163088, "grad_norm": 0.13142493984817077, "learning_rate": 1.918952847536557e-06, "loss": 0.6899, "step": 4638 }, { "epoch": 2.301352860866327, "grad_norm": 0.12827685841442193, "learning_rate": 1.9180023062802237e-06, "loss": 0.6423, "step": 4639 }, { "epoch": 2.301849323569567, "grad_norm": 0.13137125453834245, "learning_rate": 1.9170518539729063e-06, "loss": 0.6769, "step": 4640 }, { "epoch": 2.3023457862728063, "grad_norm": 0.1256889202189166, "learning_rate": 1.9161014907598668e-06, "loss": 0.6618, "step": 4641 }, { "epoch": 2.3028422489760456, "grad_norm": 0.12999406754723933, "learning_rate": 1.915151216786352e-06, "loss": 0.7263, "step": 4642 }, { "epoch": 2.303338711679285, "grad_norm": 0.12802875535724298, "learning_rate": 1.9142010321975956e-06, "loss": 0.6686, "step": 4643 }, { "epoch": 2.3038351743825247, "grad_norm": 0.13208418783479883, "learning_rate": 1.9132509371388187e-06, "loss": 0.7076, "step": 4644 }, { "epoch": 2.304331637085764, "grad_norm": 0.13301405076047154, "learning_rate": 1.9123009317552294e-06, "loss": 0.7435, "step": 4645 }, { "epoch": 2.3048280997890034, "grad_norm": 0.13096465156829554, "learning_rate": 1.911351016192019e-06, "loss": 0.6746, "step": 4646 }, { "epoch": 2.3053245624922427, "grad_norm": 0.1282318000984796, "learning_rate": 1.9104011905943675e-06, "loss": 0.6925, "step": 4647 }, { "epoch": 2.305821025195482, "grad_norm": 0.1374326520419337, "learning_rate": 1.9094514551074405e-06, "loss": 0.721, "step": 4648 }, { "epoch": 2.3063174878987214, "grad_norm": 0.13296090301658972, "learning_rate": 1.9085018098763895e-06, "loss": 0.7127, "step": 4649 }, { "epoch": 2.306813950601961, "grad_norm": 0.12863266739621676, "learning_rate": 1.9075522550463538e-06, "loss": 0.6811, "step": 4650 }, { "epoch": 2.3073104133052005, "grad_norm": 0.1292808442724112, "learning_rate": 1.9066027907624563e-06, "loss": 0.6921, "step": 4651 }, { "epoch": 2.30780687600844, "grad_norm": 0.13198793172057285, "learning_rate": 1.9056534171698076e-06, "loss": 0.6872, "step": 4652 }, { "epoch": 2.308303338711679, "grad_norm": 0.1308224331561388, "learning_rate": 1.9047041344135045e-06, "loss": 0.6865, "step": 4653 }, { "epoch": 2.308799801414919, "grad_norm": 0.130383003378401, "learning_rate": 1.9037549426386304e-06, "loss": 0.7286, "step": 4654 }, { "epoch": 2.3092962641181582, "grad_norm": 0.13067038581766985, "learning_rate": 1.9028058419902524e-06, "loss": 0.6786, "step": 4655 }, { "epoch": 2.3097927268213976, "grad_norm": 0.1312758611395035, "learning_rate": 1.9018568326134262e-06, "loss": 0.6942, "step": 4656 }, { "epoch": 2.310289189524637, "grad_norm": 0.13012405788871972, "learning_rate": 1.900907914653194e-06, "loss": 0.6997, "step": 4657 }, { "epoch": 2.3107856522278762, "grad_norm": 0.13238886769700214, "learning_rate": 1.8999590882545798e-06, "loss": 0.7042, "step": 4658 }, { "epoch": 2.3112821149311156, "grad_norm": 0.13003113363469201, "learning_rate": 1.8990103535625983e-06, "loss": 0.7026, "step": 4659 }, { "epoch": 2.3117785776343553, "grad_norm": 0.12888802864649773, "learning_rate": 1.8980617107222482e-06, "loss": 0.6841, "step": 4660 }, { "epoch": 2.3122750403375947, "grad_norm": 0.13431432633019263, "learning_rate": 1.8971131598785148e-06, "loss": 0.7537, "step": 4661 }, { "epoch": 2.312771503040834, "grad_norm": 0.12849233342145183, "learning_rate": 1.8961647011763676e-06, "loss": 0.6768, "step": 4662 }, { "epoch": 2.3132679657440733, "grad_norm": 0.13345147046633807, "learning_rate": 1.8952163347607642e-06, "loss": 0.7059, "step": 4663 }, { "epoch": 2.313764428447313, "grad_norm": 0.13053349945942708, "learning_rate": 1.894268060776647e-06, "loss": 0.7291, "step": 4664 }, { "epoch": 2.3142608911505524, "grad_norm": 0.1343964907723944, "learning_rate": 1.8933198793689444e-06, "loss": 0.7698, "step": 4665 }, { "epoch": 2.3147573538537918, "grad_norm": 0.14226824478362488, "learning_rate": 1.8923717906825718e-06, "loss": 0.7851, "step": 4666 }, { "epoch": 2.315253816557031, "grad_norm": 0.12925409867242452, "learning_rate": 1.8914237948624275e-06, "loss": 0.655, "step": 4667 }, { "epoch": 2.3157502792602704, "grad_norm": 0.12816123194872267, "learning_rate": 1.8904758920533988e-06, "loss": 0.6711, "step": 4668 }, { "epoch": 2.3162467419635098, "grad_norm": 0.1294204482164869, "learning_rate": 1.8895280824003568e-06, "loss": 0.6621, "step": 4669 }, { "epoch": 2.3167432046667495, "grad_norm": 0.12776210547198066, "learning_rate": 1.888580366048159e-06, "loss": 0.7061, "step": 4670 }, { "epoch": 2.317239667369989, "grad_norm": 0.12704629242381982, "learning_rate": 1.8876327431416498e-06, "loss": 0.7007, "step": 4671 }, { "epoch": 2.317736130073228, "grad_norm": 0.1316157139453224, "learning_rate": 1.8866852138256565e-06, "loss": 0.7049, "step": 4672 }, { "epoch": 2.3182325927764675, "grad_norm": 0.13375751981024753, "learning_rate": 1.8857377782449956e-06, "loss": 0.7209, "step": 4673 }, { "epoch": 2.3187290554797073, "grad_norm": 0.13344576229065264, "learning_rate": 1.8847904365444653e-06, "loss": 0.7304, "step": 4674 }, { "epoch": 2.3192255181829466, "grad_norm": 0.13003917367562956, "learning_rate": 1.8838431888688528e-06, "loss": 0.6972, "step": 4675 }, { "epoch": 2.319721980886186, "grad_norm": 0.14062745708919816, "learning_rate": 1.8828960353629294e-06, "loss": 0.738, "step": 4676 }, { "epoch": 2.3202184435894253, "grad_norm": 0.12901976125458806, "learning_rate": 1.881948976171453e-06, "loss": 0.6849, "step": 4677 }, { "epoch": 2.3207149062926646, "grad_norm": 0.1277269250608724, "learning_rate": 1.8810020114391653e-06, "loss": 0.6756, "step": 4678 }, { "epoch": 2.321211368995904, "grad_norm": 0.1291372202878994, "learning_rate": 1.8800551413107955e-06, "loss": 0.6776, "step": 4679 }, { "epoch": 2.3217078316991437, "grad_norm": 0.12757692002915727, "learning_rate": 1.8791083659310568e-06, "loss": 0.6778, "step": 4680 }, { "epoch": 2.322204294402383, "grad_norm": 0.1396567920307466, "learning_rate": 1.8781616854446496e-06, "loss": 0.7325, "step": 4681 }, { "epoch": 2.3227007571056224, "grad_norm": 0.13455039781184047, "learning_rate": 1.8772150999962588e-06, "loss": 0.7038, "step": 4682 }, { "epoch": 2.3231972198088617, "grad_norm": 0.13252407557746912, "learning_rate": 1.8762686097305537e-06, "loss": 0.6989, "step": 4683 }, { "epoch": 2.3236936825121015, "grad_norm": 0.12477814641502248, "learning_rate": 1.875322214792191e-06, "loss": 0.6746, "step": 4684 }, { "epoch": 2.324190145215341, "grad_norm": 0.1295558006823391, "learning_rate": 1.8743759153258118e-06, "loss": 0.7017, "step": 4685 }, { "epoch": 2.32468660791858, "grad_norm": 0.13196900667132647, "learning_rate": 1.8734297114760427e-06, "loss": 0.7403, "step": 4686 }, { "epoch": 2.3251830706218195, "grad_norm": 0.13187534712299806, "learning_rate": 1.8724836033874966e-06, "loss": 0.7402, "step": 4687 }, { "epoch": 2.325679533325059, "grad_norm": 0.12450825043027855, "learning_rate": 1.8715375912047695e-06, "loss": 0.6719, "step": 4688 }, { "epoch": 2.326175996028298, "grad_norm": 0.1264511706987313, "learning_rate": 1.8705916750724462e-06, "loss": 0.7006, "step": 4689 }, { "epoch": 2.326672458731538, "grad_norm": 0.13114832252468836, "learning_rate": 1.8696458551350927e-06, "loss": 0.7447, "step": 4690 }, { "epoch": 2.3271689214347773, "grad_norm": 0.1354210750717035, "learning_rate": 1.868700131537263e-06, "loss": 0.7688, "step": 4691 }, { "epoch": 2.3276653841380166, "grad_norm": 0.1301186261261761, "learning_rate": 1.8677545044234962e-06, "loss": 0.7138, "step": 4692 }, { "epoch": 2.328161846841256, "grad_norm": 0.13060257839716047, "learning_rate": 1.8668089739383165e-06, "loss": 0.7208, "step": 4693 }, { "epoch": 2.3286583095444957, "grad_norm": 0.12633300107009157, "learning_rate": 1.865863540226232e-06, "loss": 0.6773, "step": 4694 }, { "epoch": 2.329154772247735, "grad_norm": 0.12498631011162595, "learning_rate": 1.8649182034317382e-06, "loss": 0.693, "step": 4695 }, { "epoch": 2.3296512349509744, "grad_norm": 0.13096791761906182, "learning_rate": 1.863972963699314e-06, "loss": 0.7156, "step": 4696 }, { "epoch": 2.3301476976542137, "grad_norm": 0.1302613925721332, "learning_rate": 1.8630278211734243e-06, "loss": 0.7099, "step": 4697 }, { "epoch": 2.330644160357453, "grad_norm": 0.13213767324984668, "learning_rate": 1.86208277599852e-06, "loss": 0.7447, "step": 4698 }, { "epoch": 2.3311406230606924, "grad_norm": 0.1307800597646881, "learning_rate": 1.861137828319034e-06, "loss": 0.7238, "step": 4699 }, { "epoch": 2.331637085763932, "grad_norm": 0.12668332112462782, "learning_rate": 1.8601929782793882e-06, "loss": 0.663, "step": 4700 }, { "epoch": 2.3321335484671715, "grad_norm": 0.1299102574958521, "learning_rate": 1.8592482260239869e-06, "loss": 0.7101, "step": 4701 }, { "epoch": 2.332630011170411, "grad_norm": 0.1263931699115363, "learning_rate": 1.8583035716972203e-06, "loss": 0.6929, "step": 4702 }, { "epoch": 2.33312647387365, "grad_norm": 0.12904444206327081, "learning_rate": 1.8573590154434648e-06, "loss": 0.7177, "step": 4703 }, { "epoch": 2.33362293657689, "grad_norm": 0.12940234345889146, "learning_rate": 1.856414557407079e-06, "loss": 0.6601, "step": 4704 }, { "epoch": 2.3341193992801292, "grad_norm": 0.12912909817222648, "learning_rate": 1.8554701977324104e-06, "loss": 0.7058, "step": 4705 }, { "epoch": 2.3346158619833686, "grad_norm": 0.13166188626901182, "learning_rate": 1.8545259365637869e-06, "loss": 0.6971, "step": 4706 }, { "epoch": 2.335112324686608, "grad_norm": 0.13412107252812872, "learning_rate": 1.8535817740455243e-06, "loss": 0.7281, "step": 4707 }, { "epoch": 2.3356087873898472, "grad_norm": 0.14219099357241585, "learning_rate": 1.8526377103219228e-06, "loss": 0.7635, "step": 4708 }, { "epoch": 2.3361052500930866, "grad_norm": 0.12785200595648355, "learning_rate": 1.851693745537268e-06, "loss": 0.6974, "step": 4709 }, { "epoch": 2.3366017127963263, "grad_norm": 0.1281849252246767, "learning_rate": 1.8507498798358298e-06, "loss": 0.7275, "step": 4710 }, { "epoch": 2.3370981754995657, "grad_norm": 0.13346008136791995, "learning_rate": 1.849806113361862e-06, "loss": 0.7424, "step": 4711 }, { "epoch": 2.337594638202805, "grad_norm": 0.12589143302285735, "learning_rate": 1.8488624462596045e-06, "loss": 0.7088, "step": 4712 }, { "epoch": 2.3380911009060443, "grad_norm": 0.12714926877182972, "learning_rate": 1.8479188786732821e-06, "loss": 0.7187, "step": 4713 }, { "epoch": 2.3385875636092837, "grad_norm": 0.1279972258903848, "learning_rate": 1.8469754107471045e-06, "loss": 0.6718, "step": 4714 }, { "epoch": 2.3390840263125234, "grad_norm": 0.1312317942984953, "learning_rate": 1.846032042625264e-06, "loss": 0.7033, "step": 4715 }, { "epoch": 2.3395804890157628, "grad_norm": 0.12873451062526245, "learning_rate": 1.8450887744519402e-06, "loss": 0.702, "step": 4716 }, { "epoch": 2.340076951719002, "grad_norm": 0.12882507141223737, "learning_rate": 1.844145606371297e-06, "loss": 0.7278, "step": 4717 }, { "epoch": 2.3405734144222414, "grad_norm": 0.12746855121924552, "learning_rate": 1.8432025385274816e-06, "loss": 0.6702, "step": 4718 }, { "epoch": 2.3410698771254808, "grad_norm": 0.1279698618848009, "learning_rate": 1.8422595710646279e-06, "loss": 0.6851, "step": 4719 }, { "epoch": 2.3415663398287205, "grad_norm": 0.13439282233196687, "learning_rate": 1.841316704126852e-06, "loss": 0.7056, "step": 4720 }, { "epoch": 2.34206280253196, "grad_norm": 0.13075250402139188, "learning_rate": 1.840373937858257e-06, "loss": 0.7028, "step": 4721 }, { "epoch": 2.342559265235199, "grad_norm": 0.13576682060101108, "learning_rate": 1.8394312724029302e-06, "loss": 0.715, "step": 4722 }, { "epoch": 2.3430557279384385, "grad_norm": 0.13095475394049344, "learning_rate": 1.838488707904941e-06, "loss": 0.7263, "step": 4723 }, { "epoch": 2.343552190641678, "grad_norm": 0.12252662103848144, "learning_rate": 1.8375462445083464e-06, "loss": 0.6212, "step": 4724 }, { "epoch": 2.3440486533449176, "grad_norm": 0.1321014567093405, "learning_rate": 1.8366038823571864e-06, "loss": 0.6869, "step": 4725 }, { "epoch": 2.344545116048157, "grad_norm": 0.12839430799368412, "learning_rate": 1.835661621595487e-06, "loss": 0.7022, "step": 4726 }, { "epoch": 2.3450415787513963, "grad_norm": 0.13572608728421834, "learning_rate": 1.834719462367256e-06, "loss": 0.7124, "step": 4727 }, { "epoch": 2.3455380414546356, "grad_norm": 0.1327198212125845, "learning_rate": 1.8337774048164886e-06, "loss": 0.7145, "step": 4728 }, { "epoch": 2.346034504157875, "grad_norm": 0.13233453890145816, "learning_rate": 1.8328354490871624e-06, "loss": 0.723, "step": 4729 }, { "epoch": 2.3465309668611147, "grad_norm": 0.1275819850450933, "learning_rate": 1.8318935953232417e-06, "loss": 0.6562, "step": 4730 }, { "epoch": 2.347027429564354, "grad_norm": 0.12862371806632422, "learning_rate": 1.8309518436686718e-06, "loss": 0.6557, "step": 4731 }, { "epoch": 2.3475238922675934, "grad_norm": 0.13020123965864694, "learning_rate": 1.8300101942673854e-06, "loss": 0.7, "step": 4732 }, { "epoch": 2.3480203549708327, "grad_norm": 0.13526821353378768, "learning_rate": 1.829068647263298e-06, "loss": 0.7129, "step": 4733 }, { "epoch": 2.348516817674072, "grad_norm": 0.12778442163510337, "learning_rate": 1.8281272028003105e-06, "loss": 0.6919, "step": 4734 }, { "epoch": 2.349013280377312, "grad_norm": 0.12730442338186904, "learning_rate": 1.827185861022308e-06, "loss": 0.6737, "step": 4735 }, { "epoch": 2.349509743080551, "grad_norm": 0.13135300464883143, "learning_rate": 1.8262446220731583e-06, "loss": 0.6902, "step": 4736 }, { "epoch": 2.3500062057837905, "grad_norm": 0.13633030408853347, "learning_rate": 1.8253034860967156e-06, "loss": 0.7665, "step": 4737 }, { "epoch": 2.35050266848703, "grad_norm": 0.12967623071202264, "learning_rate": 1.8243624532368176e-06, "loss": 0.6925, "step": 4738 }, { "epoch": 2.350999131190269, "grad_norm": 0.12823110926047526, "learning_rate": 1.823421523637285e-06, "loss": 0.6446, "step": 4739 }, { "epoch": 2.351495593893509, "grad_norm": 0.13140489998321872, "learning_rate": 1.822480697441924e-06, "loss": 0.736, "step": 4740 }, { "epoch": 2.3519920565967483, "grad_norm": 0.12898773935905025, "learning_rate": 1.8215399747945256e-06, "loss": 0.7298, "step": 4741 }, { "epoch": 2.3524885192999876, "grad_norm": 0.139264930199382, "learning_rate": 1.820599355838864e-06, "loss": 0.716, "step": 4742 }, { "epoch": 2.352984982003227, "grad_norm": 0.12812055474218081, "learning_rate": 1.8196588407186972e-06, "loss": 0.7316, "step": 4743 }, { "epoch": 2.3534814447064663, "grad_norm": 0.1305997310787121, "learning_rate": 1.8187184295777676e-06, "loss": 0.6861, "step": 4744 }, { "epoch": 2.353977907409706, "grad_norm": 0.1353100470985911, "learning_rate": 1.8177781225598033e-06, "loss": 0.7478, "step": 4745 }, { "epoch": 2.3544743701129454, "grad_norm": 0.13303661646904097, "learning_rate": 1.816837919808514e-06, "loss": 0.7119, "step": 4746 }, { "epoch": 2.3549708328161847, "grad_norm": 0.13363770660484806, "learning_rate": 1.8158978214675953e-06, "loss": 0.673, "step": 4747 }, { "epoch": 2.355467295519424, "grad_norm": 0.13089630550011988, "learning_rate": 1.8149578276807257e-06, "loss": 0.6883, "step": 4748 }, { "epoch": 2.3559637582226634, "grad_norm": 0.13168358638427374, "learning_rate": 1.814017938591568e-06, "loss": 0.6873, "step": 4749 }, { "epoch": 2.356460220925903, "grad_norm": 0.13484692782487573, "learning_rate": 1.8130781543437698e-06, "loss": 0.7583, "step": 4750 }, { "epoch": 2.3569566836291425, "grad_norm": 0.1306126749166303, "learning_rate": 1.8121384750809623e-06, "loss": 0.6905, "step": 4751 }, { "epoch": 2.357453146332382, "grad_norm": 0.12784958982539274, "learning_rate": 1.811198900946759e-06, "loss": 0.6999, "step": 4752 }, { "epoch": 2.357949609035621, "grad_norm": 0.13515705011245394, "learning_rate": 1.81025943208476e-06, "loss": 0.7064, "step": 4753 }, { "epoch": 2.3584460717388605, "grad_norm": 0.13046415278522383, "learning_rate": 1.8093200686385484e-06, "loss": 0.7217, "step": 4754 }, { "epoch": 2.3589425344421002, "grad_norm": 0.12991942766461034, "learning_rate": 1.8083808107516892e-06, "loss": 0.71, "step": 4755 }, { "epoch": 2.3594389971453396, "grad_norm": 0.13073051110349773, "learning_rate": 1.8074416585677335e-06, "loss": 0.6924, "step": 4756 }, { "epoch": 2.359935459848579, "grad_norm": 0.13719409511910485, "learning_rate": 1.8065026122302165e-06, "loss": 0.7551, "step": 4757 }, { "epoch": 2.360431922551818, "grad_norm": 0.12947943146773364, "learning_rate": 1.8055636718826557e-06, "loss": 0.6755, "step": 4758 }, { "epoch": 2.3609283852550575, "grad_norm": 0.12841550971350973, "learning_rate": 1.8046248376685532e-06, "loss": 0.703, "step": 4759 }, { "epoch": 2.3614248479582973, "grad_norm": 0.1311479945588234, "learning_rate": 1.8036861097313946e-06, "loss": 0.7219, "step": 4760 }, { "epoch": 2.3619213106615367, "grad_norm": 0.12849688100852175, "learning_rate": 1.8027474882146498e-06, "loss": 0.6653, "step": 4761 }, { "epoch": 2.362417773364776, "grad_norm": 0.13113427313547274, "learning_rate": 1.8018089732617716e-06, "loss": 0.7302, "step": 4762 }, { "epoch": 2.3629142360680153, "grad_norm": 0.12964668689183, "learning_rate": 1.8008705650161979e-06, "loss": 0.7078, "step": 4763 }, { "epoch": 2.3634106987712546, "grad_norm": 0.13260160889826755, "learning_rate": 1.799932263621348e-06, "loss": 0.7268, "step": 4764 }, { "epoch": 2.3639071614744944, "grad_norm": 0.13006599121273307, "learning_rate": 1.7989940692206267e-06, "loss": 0.7147, "step": 4765 }, { "epoch": 2.3644036241777338, "grad_norm": 0.13478322856677002, "learning_rate": 1.7980559819574222e-06, "loss": 0.7086, "step": 4766 }, { "epoch": 2.364900086880973, "grad_norm": 0.1278201414701354, "learning_rate": 1.7971180019751068e-06, "loss": 0.6734, "step": 4767 }, { "epoch": 2.3653965495842124, "grad_norm": 0.12854256872198694, "learning_rate": 1.7961801294170344e-06, "loss": 0.7037, "step": 4768 }, { "epoch": 2.3658930122874517, "grad_norm": 0.1326157442892243, "learning_rate": 1.7952423644265441e-06, "loss": 0.7093, "step": 4769 }, { "epoch": 2.366389474990691, "grad_norm": 0.13759009456859725, "learning_rate": 1.7943047071469597e-06, "loss": 0.7401, "step": 4770 }, { "epoch": 2.366885937693931, "grad_norm": 0.13497019322133594, "learning_rate": 1.7933671577215846e-06, "loss": 0.7085, "step": 4771 }, { "epoch": 2.36738240039717, "grad_norm": 0.12878040498696178, "learning_rate": 1.7924297162937095e-06, "loss": 0.6994, "step": 4772 }, { "epoch": 2.3678788631004095, "grad_norm": 0.12913566113987018, "learning_rate": 1.7914923830066074e-06, "loss": 0.7142, "step": 4773 }, { "epoch": 2.368375325803649, "grad_norm": 0.13029016007788516, "learning_rate": 1.790555158003535e-06, "loss": 0.6907, "step": 4774 }, { "epoch": 2.3688717885068886, "grad_norm": 0.1317576623479519, "learning_rate": 1.7896180414277308e-06, "loss": 0.7187, "step": 4775 }, { "epoch": 2.369368251210128, "grad_norm": 0.13402471815823666, "learning_rate": 1.7886810334224192e-06, "loss": 0.6727, "step": 4776 }, { "epoch": 2.3698647139133673, "grad_norm": 0.12910099553816476, "learning_rate": 1.7877441341308065e-06, "loss": 0.6883, "step": 4777 }, { "epoch": 2.3703611766166066, "grad_norm": 0.12857345243673593, "learning_rate": 1.7868073436960826e-06, "loss": 0.7037, "step": 4778 }, { "epoch": 2.370857639319846, "grad_norm": 0.13050987325503174, "learning_rate": 1.7858706622614217e-06, "loss": 0.7294, "step": 4779 }, { "epoch": 2.3713541020230853, "grad_norm": 0.12825871824045287, "learning_rate": 1.784934089969979e-06, "loss": 0.6861, "step": 4780 }, { "epoch": 2.371850564726325, "grad_norm": 0.13089580339165302, "learning_rate": 1.783997626964896e-06, "loss": 0.7541, "step": 4781 }, { "epoch": 2.3723470274295644, "grad_norm": 0.1334628464244765, "learning_rate": 1.783061273389295e-06, "loss": 0.7078, "step": 4782 }, { "epoch": 2.3728434901328037, "grad_norm": 0.1340782941366848, "learning_rate": 1.7821250293862836e-06, "loss": 0.7718, "step": 4783 }, { "epoch": 2.373339952836043, "grad_norm": 0.13228953216571118, "learning_rate": 1.7811888950989515e-06, "loss": 0.7161, "step": 4784 }, { "epoch": 2.373836415539283, "grad_norm": 0.1351301578501817, "learning_rate": 1.780252870670371e-06, "loss": 0.6803, "step": 4785 }, { "epoch": 2.374332878242522, "grad_norm": 0.12686143379738357, "learning_rate": 1.7793169562435996e-06, "loss": 0.6786, "step": 4786 }, { "epoch": 2.3748293409457615, "grad_norm": 0.12977295444856893, "learning_rate": 1.7783811519616756e-06, "loss": 0.7315, "step": 4787 }, { "epoch": 2.375325803649001, "grad_norm": 0.13124111123654278, "learning_rate": 1.777445457967622e-06, "loss": 0.7298, "step": 4788 }, { "epoch": 2.37582226635224, "grad_norm": 0.13452592431133958, "learning_rate": 1.7765098744044452e-06, "loss": 0.6963, "step": 4789 }, { "epoch": 2.3763187290554795, "grad_norm": 0.13429719254562902, "learning_rate": 1.7755744014151338e-06, "loss": 0.6756, "step": 4790 }, { "epoch": 2.3768151917587192, "grad_norm": 0.12721035894910274, "learning_rate": 1.7746390391426598e-06, "loss": 0.6617, "step": 4791 }, { "epoch": 2.3773116544619586, "grad_norm": 0.12936319272862273, "learning_rate": 1.7737037877299784e-06, "loss": 0.683, "step": 4792 }, { "epoch": 2.377808117165198, "grad_norm": 0.13813116770259098, "learning_rate": 1.7727686473200276e-06, "loss": 0.7706, "step": 4793 }, { "epoch": 2.3783045798684372, "grad_norm": 0.13184854493613757, "learning_rate": 1.771833618055729e-06, "loss": 0.7047, "step": 4794 }, { "epoch": 2.378801042571677, "grad_norm": 0.1326136257727315, "learning_rate": 1.7708987000799866e-06, "loss": 0.6991, "step": 4795 }, { "epoch": 2.3792975052749163, "grad_norm": 0.13583743329061757, "learning_rate": 1.7699638935356876e-06, "loss": 0.7719, "step": 4796 }, { "epoch": 2.3797939679781557, "grad_norm": 0.13008368245130267, "learning_rate": 1.7690291985657021e-06, "loss": 0.6925, "step": 4797 }, { "epoch": 2.380290430681395, "grad_norm": 0.12928279258704853, "learning_rate": 1.7680946153128833e-06, "loss": 0.6964, "step": 4798 }, { "epoch": 2.3807868933846343, "grad_norm": 0.126896632250804, "learning_rate": 1.7671601439200675e-06, "loss": 0.7119, "step": 4799 }, { "epoch": 2.3812833560878737, "grad_norm": 0.1343279347693961, "learning_rate": 1.766225784530074e-06, "loss": 0.7165, "step": 4800 }, { "epoch": 2.3817798187911134, "grad_norm": 0.12647325466363368, "learning_rate": 1.7652915372857035e-06, "loss": 0.6949, "step": 4801 }, { "epoch": 2.3822762814943528, "grad_norm": 0.1300158284756839, "learning_rate": 1.7643574023297424e-06, "loss": 0.7007, "step": 4802 }, { "epoch": 2.382772744197592, "grad_norm": 0.1303069401818913, "learning_rate": 1.7634233798049563e-06, "loss": 0.6973, "step": 4803 }, { "epoch": 2.3832692069008314, "grad_norm": 0.1250185249073528, "learning_rate": 1.7624894698540964e-06, "loss": 0.6759, "step": 4804 }, { "epoch": 2.383765669604071, "grad_norm": 0.12683133498041957, "learning_rate": 1.7615556726198963e-06, "loss": 0.6577, "step": 4805 }, { "epoch": 2.3842621323073105, "grad_norm": 0.13065274070747337, "learning_rate": 1.7606219882450713e-06, "loss": 0.7088, "step": 4806 }, { "epoch": 2.38475859501055, "grad_norm": 0.13340077639503192, "learning_rate": 1.7596884168723209e-06, "loss": 0.7117, "step": 4807 }, { "epoch": 2.385255057713789, "grad_norm": 0.12846892823423328, "learning_rate": 1.7587549586443253e-06, "loss": 0.7508, "step": 4808 }, { "epoch": 2.3857515204170285, "grad_norm": 0.133668088002894, "learning_rate": 1.7578216137037496e-06, "loss": 0.7036, "step": 4809 }, { "epoch": 2.386247983120268, "grad_norm": 0.1297358762173912, "learning_rate": 1.75688838219324e-06, "loss": 0.6972, "step": 4810 }, { "epoch": 2.3867444458235076, "grad_norm": 0.12907419259927128, "learning_rate": 1.755955264255427e-06, "loss": 0.6749, "step": 4811 }, { "epoch": 2.387240908526747, "grad_norm": 0.14320560575199254, "learning_rate": 1.7550222600329214e-06, "loss": 0.7493, "step": 4812 }, { "epoch": 2.3877373712299863, "grad_norm": 0.13107914192089937, "learning_rate": 1.7540893696683187e-06, "loss": 0.6801, "step": 4813 }, { "epoch": 2.3882338339332256, "grad_norm": 0.1299399190627306, "learning_rate": 1.753156593304196e-06, "loss": 0.6749, "step": 4814 }, { "epoch": 2.3887302966364654, "grad_norm": 0.12936932954561534, "learning_rate": 1.7522239310831134e-06, "loss": 0.7269, "step": 4815 }, { "epoch": 2.3892267593397047, "grad_norm": 0.13265135552744697, "learning_rate": 1.7512913831476135e-06, "loss": 0.7383, "step": 4816 }, { "epoch": 2.389723222042944, "grad_norm": 0.13764593191752025, "learning_rate": 1.750358949640221e-06, "loss": 0.7176, "step": 4817 }, { "epoch": 2.3902196847461834, "grad_norm": 0.12796750916884678, "learning_rate": 1.749426630703444e-06, "loss": 0.6821, "step": 4818 }, { "epoch": 2.3907161474494227, "grad_norm": 0.13180747695825185, "learning_rate": 1.7484944264797713e-06, "loss": 0.7342, "step": 4819 }, { "epoch": 2.391212610152662, "grad_norm": 0.14126658198697672, "learning_rate": 1.7475623371116759e-06, "loss": 0.7135, "step": 4820 }, { "epoch": 2.391709072855902, "grad_norm": 0.1305260276629293, "learning_rate": 1.7466303627416129e-06, "loss": 0.7204, "step": 4821 }, { "epoch": 2.392205535559141, "grad_norm": 0.12966913916891723, "learning_rate": 1.7456985035120194e-06, "loss": 0.6982, "step": 4822 }, { "epoch": 2.3927019982623805, "grad_norm": 0.1254585082749373, "learning_rate": 1.7447667595653162e-06, "loss": 0.6747, "step": 4823 }, { "epoch": 2.39319846096562, "grad_norm": 0.15094163056194543, "learning_rate": 1.7438351310439036e-06, "loss": 0.7751, "step": 4824 }, { "epoch": 2.3936949236688596, "grad_norm": 0.12776045500839703, "learning_rate": 1.7429036180901674e-06, "loss": 0.7159, "step": 4825 }, { "epoch": 2.394191386372099, "grad_norm": 0.13208737547487745, "learning_rate": 1.741972220846474e-06, "loss": 0.67, "step": 4826 }, { "epoch": 2.3946878490753383, "grad_norm": 0.12814416225344802, "learning_rate": 1.741040939455173e-06, "loss": 0.6881, "step": 4827 }, { "epoch": 2.3951843117785776, "grad_norm": 0.1329316175712077, "learning_rate": 1.7401097740585947e-06, "loss": 0.7058, "step": 4828 }, { "epoch": 2.395680774481817, "grad_norm": 0.1314293899989232, "learning_rate": 1.7391787247990537e-06, "loss": 0.6882, "step": 4829 }, { "epoch": 2.3961772371850563, "grad_norm": 0.13031161109059494, "learning_rate": 1.7382477918188462e-06, "loss": 0.7541, "step": 4830 }, { "epoch": 2.396673699888296, "grad_norm": 0.12930814709648858, "learning_rate": 1.7373169752602493e-06, "loss": 0.7058, "step": 4831 }, { "epoch": 2.3971701625915354, "grad_norm": 0.1291944574432241, "learning_rate": 1.7363862752655248e-06, "loss": 0.6674, "step": 4832 }, { "epoch": 2.3976666252947747, "grad_norm": 0.1304229911398871, "learning_rate": 1.735455691976914e-06, "loss": 0.6796, "step": 4833 }, { "epoch": 2.398163087998014, "grad_norm": 0.13562249887328157, "learning_rate": 1.7345252255366434e-06, "loss": 0.7102, "step": 4834 }, { "epoch": 2.398659550701254, "grad_norm": 0.13155960598330008, "learning_rate": 1.7335948760869175e-06, "loss": 0.6952, "step": 4835 }, { "epoch": 2.399156013404493, "grad_norm": 0.12956566494989916, "learning_rate": 1.7326646437699262e-06, "loss": 0.7399, "step": 4836 }, { "epoch": 2.3996524761077325, "grad_norm": 0.129991409065886, "learning_rate": 1.7317345287278408e-06, "loss": 0.6942, "step": 4837 }, { "epoch": 2.400148938810972, "grad_norm": 0.13550941541419204, "learning_rate": 1.7308045311028148e-06, "loss": 0.7386, "step": 4838 }, { "epoch": 2.400645401514211, "grad_norm": 0.12897072490235445, "learning_rate": 1.7298746510369836e-06, "loss": 0.6888, "step": 4839 }, { "epoch": 2.4011418642174505, "grad_norm": 0.12791367362789313, "learning_rate": 1.728944888672463e-06, "loss": 0.6921, "step": 4840 }, { "epoch": 2.4016383269206902, "grad_norm": 0.12980510973949744, "learning_rate": 1.7280152441513536e-06, "loss": 0.6914, "step": 4841 }, { "epoch": 2.4021347896239296, "grad_norm": 0.12698243358765066, "learning_rate": 1.7270857176157363e-06, "loss": 0.7243, "step": 4842 }, { "epoch": 2.402631252327169, "grad_norm": 0.12943217000030752, "learning_rate": 1.7261563092076742e-06, "loss": 0.6894, "step": 4843 }, { "epoch": 2.4031277150304082, "grad_norm": 0.14555273425787743, "learning_rate": 1.7252270190692133e-06, "loss": 0.7761, "step": 4844 }, { "epoch": 2.403624177733648, "grad_norm": 0.13293572550265437, "learning_rate": 1.724297847342379e-06, "loss": 0.7029, "step": 4845 }, { "epoch": 2.4041206404368873, "grad_norm": 0.12653010232213735, "learning_rate": 1.7233687941691819e-06, "loss": 0.6353, "step": 4846 }, { "epoch": 2.4046171031401267, "grad_norm": 0.1314378535643785, "learning_rate": 1.722439859691612e-06, "loss": 0.698, "step": 4847 }, { "epoch": 2.405113565843366, "grad_norm": 0.12696993067028972, "learning_rate": 1.7215110440516427e-06, "loss": 0.6477, "step": 4848 }, { "epoch": 2.4056100285466053, "grad_norm": 0.1293761765904413, "learning_rate": 1.720582347391228e-06, "loss": 0.6863, "step": 4849 }, { "epoch": 2.4061064912498447, "grad_norm": 0.12977487418429212, "learning_rate": 1.7196537698523052e-06, "loss": 0.6806, "step": 4850 }, { "epoch": 2.4066029539530844, "grad_norm": 0.13188762511672084, "learning_rate": 1.718725311576791e-06, "loss": 0.7124, "step": 4851 }, { "epoch": 2.4070994166563238, "grad_norm": 0.1290870421059128, "learning_rate": 1.7177969727065861e-06, "loss": 0.7406, "step": 4852 }, { "epoch": 2.407595879359563, "grad_norm": 0.1324826867302518, "learning_rate": 1.716868753383572e-06, "loss": 0.7023, "step": 4853 }, { "epoch": 2.4080923420628024, "grad_norm": 0.13190718413667324, "learning_rate": 1.7159406537496127e-06, "loss": 0.698, "step": 4854 }, { "epoch": 2.4085888047660418, "grad_norm": 0.12929265660157932, "learning_rate": 1.7150126739465534e-06, "loss": 0.6906, "step": 4855 }, { "epoch": 2.4090852674692815, "grad_norm": 0.12998790137705218, "learning_rate": 1.7140848141162201e-06, "loss": 0.663, "step": 4856 }, { "epoch": 2.409581730172521, "grad_norm": 0.13078521035118826, "learning_rate": 1.7131570744004216e-06, "loss": 0.6851, "step": 4857 }, { "epoch": 2.41007819287576, "grad_norm": 0.12843707540780416, "learning_rate": 1.7122294549409486e-06, "loss": 0.6911, "step": 4858 }, { "epoch": 2.4105746555789995, "grad_norm": 0.13192234560191793, "learning_rate": 1.7113019558795722e-06, "loss": 0.7878, "step": 4859 }, { "epoch": 2.411071118282239, "grad_norm": 0.1319340861365238, "learning_rate": 1.7103745773580465e-06, "loss": 0.7045, "step": 4860 }, { "epoch": 2.4115675809854786, "grad_norm": 0.13307582097518272, "learning_rate": 1.7094473195181054e-06, "loss": 0.7248, "step": 4861 }, { "epoch": 2.412064043688718, "grad_norm": 0.13305909648819517, "learning_rate": 1.7085201825014663e-06, "loss": 0.7069, "step": 4862 }, { "epoch": 2.4125605063919573, "grad_norm": 0.12564507225345678, "learning_rate": 1.7075931664498265e-06, "loss": 0.6419, "step": 4863 }, { "epoch": 2.4130569690951966, "grad_norm": 0.1353169682104955, "learning_rate": 1.7066662715048668e-06, "loss": 0.74, "step": 4864 }, { "epoch": 2.413553431798436, "grad_norm": 0.12985183063208408, "learning_rate": 1.7057394978082465e-06, "loss": 0.6654, "step": 4865 }, { "epoch": 2.4140498945016757, "grad_norm": 0.13177829235442678, "learning_rate": 1.7048128455016105e-06, "loss": 0.7362, "step": 4866 }, { "epoch": 2.414546357204915, "grad_norm": 0.13043257360310376, "learning_rate": 1.7038863147265796e-06, "loss": 0.7091, "step": 4867 }, { "epoch": 2.4150428199081544, "grad_norm": 0.12897077992222986, "learning_rate": 1.702959905624761e-06, "loss": 0.7194, "step": 4868 }, { "epoch": 2.4155392826113937, "grad_norm": 0.129946311887381, "learning_rate": 1.7020336183377414e-06, "loss": 0.7313, "step": 4869 }, { "epoch": 2.416035745314633, "grad_norm": 0.13239014442468938, "learning_rate": 1.7011074530070888e-06, "loss": 0.7355, "step": 4870 }, { "epoch": 2.416532208017873, "grad_norm": 0.13601224242524987, "learning_rate": 1.700181409774353e-06, "loss": 0.7279, "step": 4871 }, { "epoch": 2.417028670721112, "grad_norm": 0.13833920681618717, "learning_rate": 1.6992554887810642e-06, "loss": 0.7629, "step": 4872 }, { "epoch": 2.4175251334243515, "grad_norm": 0.13419638193226116, "learning_rate": 1.6983296901687351e-06, "loss": 0.7244, "step": 4873 }, { "epoch": 2.418021596127591, "grad_norm": 0.12689180227839011, "learning_rate": 1.697404014078859e-06, "loss": 0.6595, "step": 4874 }, { "epoch": 2.41851805883083, "grad_norm": 0.13100363032562112, "learning_rate": 1.6964784606529106e-06, "loss": 0.6761, "step": 4875 }, { "epoch": 2.41901452153407, "grad_norm": 0.13288360240949995, "learning_rate": 1.6955530300323467e-06, "loss": 0.7103, "step": 4876 }, { "epoch": 2.4195109842373093, "grad_norm": 0.13073699217986937, "learning_rate": 1.6946277223586033e-06, "loss": 0.688, "step": 4877 }, { "epoch": 2.4200074469405486, "grad_norm": 0.13046075666698376, "learning_rate": 1.6937025377730992e-06, "loss": 0.6997, "step": 4878 }, { "epoch": 2.420503909643788, "grad_norm": 0.1375868918604195, "learning_rate": 1.6927774764172344e-06, "loss": 0.7279, "step": 4879 }, { "epoch": 2.4210003723470273, "grad_norm": 0.1290611075411292, "learning_rate": 1.6918525384323892e-06, "loss": 0.6923, "step": 4880 }, { "epoch": 2.421496835050267, "grad_norm": 0.12719530065201395, "learning_rate": 1.6909277239599266e-06, "loss": 0.7026, "step": 4881 }, { "epoch": 2.4219932977535064, "grad_norm": 0.12404152026479529, "learning_rate": 1.690003033141189e-06, "loss": 0.6793, "step": 4882 }, { "epoch": 2.4224897604567457, "grad_norm": 0.1281425281209562, "learning_rate": 1.6890784661175003e-06, "loss": 0.6684, "step": 4883 }, { "epoch": 2.422986223159985, "grad_norm": 0.13530606073217435, "learning_rate": 1.6881540230301654e-06, "loss": 0.7593, "step": 4884 }, { "epoch": 2.4234826858632244, "grad_norm": 0.13469127543994983, "learning_rate": 1.687229704020471e-06, "loss": 0.7071, "step": 4885 }, { "epoch": 2.423979148566464, "grad_norm": 0.13832168876093476, "learning_rate": 1.6863055092296849e-06, "loss": 0.7108, "step": 4886 }, { "epoch": 2.4244756112697035, "grad_norm": 0.13226395995413887, "learning_rate": 1.6853814387990553e-06, "loss": 0.7019, "step": 4887 }, { "epoch": 2.424972073972943, "grad_norm": 0.13201504974645803, "learning_rate": 1.684457492869811e-06, "loss": 0.7436, "step": 4888 }, { "epoch": 2.425468536676182, "grad_norm": 0.12931429802413458, "learning_rate": 1.683533671583163e-06, "loss": 0.7012, "step": 4889 }, { "epoch": 2.4259649993794214, "grad_norm": 0.13156592559267707, "learning_rate": 1.6826099750803015e-06, "loss": 0.7484, "step": 4890 }, { "epoch": 2.4264614620826612, "grad_norm": 0.12950517255649976, "learning_rate": 1.6816864035024e-06, "loss": 0.7049, "step": 4891 }, { "epoch": 2.4269579247859006, "grad_norm": 0.12953295412900734, "learning_rate": 1.6807629569906113e-06, "loss": 0.7068, "step": 4892 }, { "epoch": 2.42745438748914, "grad_norm": 0.13090036376760572, "learning_rate": 1.679839635686069e-06, "loss": 0.705, "step": 4893 }, { "epoch": 2.427950850192379, "grad_norm": 0.12975357279758154, "learning_rate": 1.678916439729888e-06, "loss": 0.6817, "step": 4894 }, { "epoch": 2.4284473128956185, "grad_norm": 0.13799080774336994, "learning_rate": 1.6779933692631639e-06, "loss": 0.7595, "step": 4895 }, { "epoch": 2.4289437755988583, "grad_norm": 0.1350060724395047, "learning_rate": 1.6770704244269737e-06, "loss": 0.7284, "step": 4896 }, { "epoch": 2.4294402383020977, "grad_norm": 0.1303139802763453, "learning_rate": 1.6761476053623748e-06, "loss": 0.7219, "step": 4897 }, { "epoch": 2.429936701005337, "grad_norm": 0.1273561658716899, "learning_rate": 1.675224912210405e-06, "loss": 0.7254, "step": 4898 }, { "epoch": 2.4304331637085763, "grad_norm": 0.13457224415207078, "learning_rate": 1.6743023451120831e-06, "loss": 0.7421, "step": 4899 }, { "epoch": 2.4309296264118156, "grad_norm": 0.13264396885754928, "learning_rate": 1.673379904208408e-06, "loss": 0.7332, "step": 4900 }, { "epoch": 2.4314260891150554, "grad_norm": 0.1349589055432028, "learning_rate": 1.672457589640361e-06, "loss": 0.7287, "step": 4901 }, { "epoch": 2.4319225518182948, "grad_norm": 0.13262022984457753, "learning_rate": 1.6715354015489028e-06, "loss": 0.7355, "step": 4902 }, { "epoch": 2.432419014521534, "grad_norm": 0.13029145762137417, "learning_rate": 1.6706133400749752e-06, "loss": 0.7185, "step": 4903 }, { "epoch": 2.4329154772247734, "grad_norm": 0.12961039350323814, "learning_rate": 1.6696914053595004e-06, "loss": 0.7257, "step": 4904 }, { "epoch": 2.4334119399280127, "grad_norm": 0.13202507745720699, "learning_rate": 1.668769597543381e-06, "loss": 0.6851, "step": 4905 }, { "epoch": 2.4339084026312525, "grad_norm": 0.13315852105173265, "learning_rate": 1.6678479167675005e-06, "loss": 0.7019, "step": 4906 }, { "epoch": 2.434404865334492, "grad_norm": 0.1407425970112104, "learning_rate": 1.6669263631727239e-06, "loss": 0.7146, "step": 4907 }, { "epoch": 2.434901328037731, "grad_norm": 0.13143176907968773, "learning_rate": 1.6660049368998958e-06, "loss": 0.7233, "step": 4908 }, { "epoch": 2.4353977907409705, "grad_norm": 0.12643932478055675, "learning_rate": 1.6650836380898402e-06, "loss": 0.6687, "step": 4909 }, { "epoch": 2.43589425344421, "grad_norm": 0.12854404310842657, "learning_rate": 1.664162466883364e-06, "loss": 0.7138, "step": 4910 }, { "epoch": 2.436390716147449, "grad_norm": 0.1334591427590283, "learning_rate": 1.663241423421253e-06, "loss": 0.6816, "step": 4911 }, { "epoch": 2.436887178850689, "grad_norm": 0.1293428646101715, "learning_rate": 1.6623205078442739e-06, "loss": 0.7063, "step": 4912 }, { "epoch": 2.4373836415539283, "grad_norm": 0.13844369579181617, "learning_rate": 1.6613997202931745e-06, "loss": 0.7307, "step": 4913 }, { "epoch": 2.4378801042571676, "grad_norm": 0.13252378360013214, "learning_rate": 1.6604790609086818e-06, "loss": 0.6987, "step": 4914 }, { "epoch": 2.438376566960407, "grad_norm": 0.13126753203447522, "learning_rate": 1.6595585298315043e-06, "loss": 0.6512, "step": 4915 }, { "epoch": 2.4388730296636467, "grad_norm": 0.13183951523709572, "learning_rate": 1.6586381272023295e-06, "loss": 0.7062, "step": 4916 }, { "epoch": 2.439369492366886, "grad_norm": 0.13000918489742638, "learning_rate": 1.6577178531618266e-06, "loss": 0.6794, "step": 4917 }, { "epoch": 2.4398659550701254, "grad_norm": 0.13272390835266695, "learning_rate": 1.6567977078506447e-06, "loss": 0.6733, "step": 4918 }, { "epoch": 2.4403624177733647, "grad_norm": 0.13196802532399263, "learning_rate": 1.6558776914094138e-06, "loss": 0.6988, "step": 4919 }, { "epoch": 2.440858880476604, "grad_norm": 0.13711894757545962, "learning_rate": 1.6549578039787436e-06, "loss": 0.7248, "step": 4920 }, { "epoch": 2.4413553431798434, "grad_norm": 0.13100192331000934, "learning_rate": 1.6540380456992234e-06, "loss": 0.7116, "step": 4921 }, { "epoch": 2.441851805883083, "grad_norm": 0.13106084542326454, "learning_rate": 1.653118416711424e-06, "loss": 0.6839, "step": 4922 }, { "epoch": 2.4423482685863225, "grad_norm": 0.13151486257585226, "learning_rate": 1.6521989171558958e-06, "loss": 0.7221, "step": 4923 }, { "epoch": 2.442844731289562, "grad_norm": 0.13197614090048165, "learning_rate": 1.6512795471731702e-06, "loss": 0.7504, "step": 4924 }, { "epoch": 2.443341193992801, "grad_norm": 0.12679261823216995, "learning_rate": 1.6503603069037572e-06, "loss": 0.6952, "step": 4925 }, { "epoch": 2.443837656696041, "grad_norm": 0.12936331957688477, "learning_rate": 1.6494411964881482e-06, "loss": 0.6724, "step": 4926 }, { "epoch": 2.4443341193992802, "grad_norm": 0.1342595649174414, "learning_rate": 1.6485222160668147e-06, "loss": 0.721, "step": 4927 }, { "epoch": 2.4448305821025196, "grad_norm": 0.13228023991573945, "learning_rate": 1.6476033657802081e-06, "loss": 0.7077, "step": 4928 }, { "epoch": 2.445327044805759, "grad_norm": 0.13098966056504466, "learning_rate": 1.6466846457687603e-06, "loss": 0.6618, "step": 4929 }, { "epoch": 2.4458235075089982, "grad_norm": 0.1292930748407539, "learning_rate": 1.6457660561728827e-06, "loss": 0.6647, "step": 4930 }, { "epoch": 2.4463199702122376, "grad_norm": 0.13636772124500665, "learning_rate": 1.6448475971329667e-06, "loss": 0.7434, "step": 4931 }, { "epoch": 2.4468164329154773, "grad_norm": 0.13511041258014103, "learning_rate": 1.6439292687893838e-06, "loss": 0.723, "step": 4932 }, { "epoch": 2.4473128956187167, "grad_norm": 0.13305991113837243, "learning_rate": 1.6430110712824857e-06, "loss": 0.7419, "step": 4933 }, { "epoch": 2.447809358321956, "grad_norm": 0.1284621762342019, "learning_rate": 1.6420930047526048e-06, "loss": 0.703, "step": 4934 }, { "epoch": 2.4483058210251953, "grad_norm": 0.13208141055635828, "learning_rate": 1.6411750693400527e-06, "loss": 0.7114, "step": 4935 }, { "epoch": 2.448802283728435, "grad_norm": 0.1334427283256501, "learning_rate": 1.6402572651851217e-06, "loss": 0.7029, "step": 4936 }, { "epoch": 2.4492987464316744, "grad_norm": 0.13331062356506063, "learning_rate": 1.639339592428082e-06, "loss": 0.7153, "step": 4937 }, { "epoch": 2.4497952091349138, "grad_norm": 0.13759074892380946, "learning_rate": 1.638422051209186e-06, "loss": 0.7888, "step": 4938 }, { "epoch": 2.450291671838153, "grad_norm": 0.1283084309158128, "learning_rate": 1.6375046416686652e-06, "loss": 0.6724, "step": 4939 }, { "epoch": 2.4507881345413924, "grad_norm": 0.13133438114077706, "learning_rate": 1.6365873639467314e-06, "loss": 0.7411, "step": 4940 }, { "epoch": 2.4512845972446318, "grad_norm": 0.13217904256740817, "learning_rate": 1.635670218183575e-06, "loss": 0.685, "step": 4941 }, { "epoch": 2.4517810599478715, "grad_norm": 0.13190452013229165, "learning_rate": 1.6347532045193664e-06, "loss": 0.7293, "step": 4942 }, { "epoch": 2.452277522651111, "grad_norm": 0.13055191375790906, "learning_rate": 1.6338363230942583e-06, "loss": 0.7185, "step": 4943 }, { "epoch": 2.45277398535435, "grad_norm": 0.13277302356989085, "learning_rate": 1.6329195740483797e-06, "loss": 0.746, "step": 4944 }, { "epoch": 2.4532704480575895, "grad_norm": 0.13175605359588108, "learning_rate": 1.6320029575218424e-06, "loss": 0.7421, "step": 4945 }, { "epoch": 2.4537669107608293, "grad_norm": 0.13186597595104096, "learning_rate": 1.6310864736547352e-06, "loss": 0.707, "step": 4946 }, { "epoch": 2.4542633734640686, "grad_norm": 0.12766107544270525, "learning_rate": 1.6301701225871297e-06, "loss": 0.6824, "step": 4947 }, { "epoch": 2.454759836167308, "grad_norm": 0.13352247929313615, "learning_rate": 1.629253904459073e-06, "loss": 0.716, "step": 4948 }, { "epoch": 2.4552562988705473, "grad_norm": 0.12517152600846954, "learning_rate": 1.6283378194105958e-06, "loss": 0.688, "step": 4949 }, { "epoch": 2.4557527615737866, "grad_norm": 0.1281114060367298, "learning_rate": 1.627421867581707e-06, "loss": 0.6663, "step": 4950 }, { "epoch": 2.456249224277026, "grad_norm": 0.12627705318778099, "learning_rate": 1.6265060491123945e-06, "loss": 0.7013, "step": 4951 }, { "epoch": 2.4567456869802657, "grad_norm": 0.1324199896128721, "learning_rate": 1.6255903641426282e-06, "loss": 0.725, "step": 4952 }, { "epoch": 2.457242149683505, "grad_norm": 0.13004158500704271, "learning_rate": 1.6246748128123537e-06, "loss": 0.6722, "step": 4953 }, { "epoch": 2.4577386123867444, "grad_norm": 0.13174097008700347, "learning_rate": 1.6237593952614994e-06, "loss": 0.6889, "step": 4954 }, { "epoch": 2.4582350750899837, "grad_norm": 0.13318866205780763, "learning_rate": 1.622844111629972e-06, "loss": 0.7379, "step": 4955 }, { "epoch": 2.4587315377932235, "grad_norm": 0.1337606359354983, "learning_rate": 1.6219289620576583e-06, "loss": 0.7546, "step": 4956 }, { "epoch": 2.459228000496463, "grad_norm": 0.12844627082050522, "learning_rate": 1.6210139466844244e-06, "loss": 0.6996, "step": 4957 }, { "epoch": 2.459724463199702, "grad_norm": 0.12849347327276872, "learning_rate": 1.6200990656501146e-06, "loss": 0.7006, "step": 4958 }, { "epoch": 2.4602209259029415, "grad_norm": 0.12716771704033275, "learning_rate": 1.6191843190945544e-06, "loss": 0.7079, "step": 4959 }, { "epoch": 2.460717388606181, "grad_norm": 0.1313359040978842, "learning_rate": 1.618269707157548e-06, "loss": 0.705, "step": 4960 }, { "epoch": 2.46121385130942, "grad_norm": 0.1332264821711248, "learning_rate": 1.6173552299788805e-06, "loss": 0.7429, "step": 4961 }, { "epoch": 2.46171031401266, "grad_norm": 0.12976033946371668, "learning_rate": 1.616440887698313e-06, "loss": 0.6886, "step": 4962 }, { "epoch": 2.4622067767158993, "grad_norm": 0.1275893351046771, "learning_rate": 1.6155266804555903e-06, "loss": 0.7263, "step": 4963 }, { "epoch": 2.4627032394191386, "grad_norm": 0.13229798618783215, "learning_rate": 1.614612608390432e-06, "loss": 0.7365, "step": 4964 }, { "epoch": 2.463199702122378, "grad_norm": 0.12896939980843639, "learning_rate": 1.6136986716425404e-06, "loss": 0.6556, "step": 4965 }, { "epoch": 2.4636961648256177, "grad_norm": 0.1288248754058537, "learning_rate": 1.6127848703515962e-06, "loss": 0.6835, "step": 4966 }, { "epoch": 2.464192627528857, "grad_norm": 0.13426674389004808, "learning_rate": 1.6118712046572587e-06, "loss": 0.7412, "step": 4967 }, { "epoch": 2.4646890902320964, "grad_norm": 0.13038176699779896, "learning_rate": 1.6109576746991684e-06, "loss": 0.6967, "step": 4968 }, { "epoch": 2.4651855529353357, "grad_norm": 0.13301186253518696, "learning_rate": 1.6100442806169423e-06, "loss": 0.7605, "step": 4969 }, { "epoch": 2.465682015638575, "grad_norm": 0.13134857714155998, "learning_rate": 1.6091310225501782e-06, "loss": 0.7556, "step": 4970 }, { "epoch": 2.4661784783418144, "grad_norm": 0.1332594834907538, "learning_rate": 1.6082179006384535e-06, "loss": 0.7556, "step": 4971 }, { "epoch": 2.466674941045054, "grad_norm": 0.1250597053615197, "learning_rate": 1.6073049150213243e-06, "loss": 0.6891, "step": 4972 }, { "epoch": 2.4671714037482935, "grad_norm": 0.1321067771404992, "learning_rate": 1.6063920658383258e-06, "loss": 0.6851, "step": 4973 }, { "epoch": 2.467667866451533, "grad_norm": 0.12621805146652318, "learning_rate": 1.605479353228972e-06, "loss": 0.6641, "step": 4974 }, { "epoch": 2.468164329154772, "grad_norm": 0.12940412380904856, "learning_rate": 1.6045667773327562e-06, "loss": 0.6976, "step": 4975 }, { "epoch": 2.468660791858012, "grad_norm": 0.13197779767436546, "learning_rate": 1.6036543382891512e-06, "loss": 0.7368, "step": 4976 }, { "epoch": 2.4691572545612512, "grad_norm": 0.1322531297862413, "learning_rate": 1.6027420362376092e-06, "loss": 0.7481, "step": 4977 }, { "epoch": 2.4696537172644906, "grad_norm": 0.1269834035874267, "learning_rate": 1.6018298713175602e-06, "loss": 0.6829, "step": 4978 }, { "epoch": 2.47015017996773, "grad_norm": 0.13354786856232534, "learning_rate": 1.6009178436684153e-06, "loss": 0.7016, "step": 4979 }, { "epoch": 2.4706466426709692, "grad_norm": 0.13062340464990232, "learning_rate": 1.6000059534295614e-06, "loss": 0.7193, "step": 4980 }, { "epoch": 2.4711431053742086, "grad_norm": 0.1306702771269536, "learning_rate": 1.599094200740367e-06, "loss": 0.7059, "step": 4981 }, { "epoch": 2.4716395680774483, "grad_norm": 0.13005823715313075, "learning_rate": 1.5981825857401796e-06, "loss": 0.6991, "step": 4982 }, { "epoch": 2.4721360307806877, "grad_norm": 0.12974971867119356, "learning_rate": 1.5972711085683241e-06, "loss": 0.7102, "step": 4983 }, { "epoch": 2.472632493483927, "grad_norm": 0.12960690860093224, "learning_rate": 1.596359769364106e-06, "loss": 0.6814, "step": 4984 }, { "epoch": 2.4731289561871663, "grad_norm": 0.1264988539550879, "learning_rate": 1.5954485682668075e-06, "loss": 0.6807, "step": 4985 }, { "epoch": 2.473625418890406, "grad_norm": 0.13011994137414026, "learning_rate": 1.5945375054156926e-06, "loss": 0.7104, "step": 4986 }, { "epoch": 2.4741218815936454, "grad_norm": 0.14027792550347323, "learning_rate": 1.5936265809500012e-06, "loss": 0.7041, "step": 4987 }, { "epoch": 2.4746183442968848, "grad_norm": 0.13203696744805918, "learning_rate": 1.592715795008955e-06, "loss": 0.7102, "step": 4988 }, { "epoch": 2.475114807000124, "grad_norm": 0.1318975498818065, "learning_rate": 1.5918051477317525e-06, "loss": 0.7065, "step": 4989 }, { "epoch": 2.4756112697033634, "grad_norm": 0.13322263973824197, "learning_rate": 1.5908946392575713e-06, "loss": 0.7667, "step": 4990 }, { "epoch": 2.4761077324066028, "grad_norm": 0.1275784020608823, "learning_rate": 1.5899842697255676e-06, "loss": 0.7045, "step": 4991 }, { "epoch": 2.4766041951098425, "grad_norm": 0.12621490198653326, "learning_rate": 1.5890740392748778e-06, "loss": 0.6955, "step": 4992 }, { "epoch": 2.477100657813082, "grad_norm": 0.13147433530074085, "learning_rate": 1.588163948044615e-06, "loss": 0.6624, "step": 4993 }, { "epoch": 2.477597120516321, "grad_norm": 0.12864338251047736, "learning_rate": 1.587253996173873e-06, "loss": 0.6439, "step": 4994 }, { "epoch": 2.4780935832195605, "grad_norm": 0.12915557397838578, "learning_rate": 1.5863441838017235e-06, "loss": 0.6478, "step": 4995 }, { "epoch": 2.4785900459228, "grad_norm": 0.13124819867045107, "learning_rate": 1.585434511067216e-06, "loss": 0.763, "step": 4996 }, { "epoch": 2.4790865086260396, "grad_norm": 0.13444097971021127, "learning_rate": 1.5845249781093786e-06, "loss": 0.7284, "step": 4997 }, { "epoch": 2.479582971329279, "grad_norm": 0.12597972778964714, "learning_rate": 1.5836155850672202e-06, "loss": 0.6775, "step": 4998 }, { "epoch": 2.4800794340325183, "grad_norm": 0.13275658755510444, "learning_rate": 1.5827063320797266e-06, "loss": 0.7051, "step": 4999 }, { "epoch": 2.4805758967357576, "grad_norm": 0.12509208961914398, "learning_rate": 1.5817972192858624e-06, "loss": 0.7111, "step": 5000 }, { "epoch": 2.481072359438997, "grad_norm": 0.12842493642077443, "learning_rate": 1.5808882468245706e-06, "loss": 0.6731, "step": 5001 }, { "epoch": 2.4815688221422367, "grad_norm": 0.13994016275679041, "learning_rate": 1.5799794148347738e-06, "loss": 0.7332, "step": 5002 }, { "epoch": 2.482065284845476, "grad_norm": 0.13160007453858913, "learning_rate": 1.579070723455372e-06, "loss": 0.7184, "step": 5003 }, { "epoch": 2.4825617475487154, "grad_norm": 0.12476420674542148, "learning_rate": 1.5781621728252439e-06, "loss": 0.7323, "step": 5004 }, { "epoch": 2.4830582102519547, "grad_norm": 0.13342172172903713, "learning_rate": 1.5772537630832477e-06, "loss": 0.6979, "step": 5005 }, { "epoch": 2.483554672955194, "grad_norm": 0.13261519543470868, "learning_rate": 1.5763454943682183e-06, "loss": 0.6798, "step": 5006 }, { "epoch": 2.484051135658434, "grad_norm": 0.13196930690487815, "learning_rate": 1.5754373668189703e-06, "loss": 0.6926, "step": 5007 }, { "epoch": 2.484547598361673, "grad_norm": 0.1284437605438022, "learning_rate": 1.5745293805742968e-06, "loss": 0.7104, "step": 5008 }, { "epoch": 2.4850440610649125, "grad_norm": 0.1271584061825038, "learning_rate": 1.5736215357729683e-06, "loss": 0.7149, "step": 5009 }, { "epoch": 2.485540523768152, "grad_norm": 0.12986340829886878, "learning_rate": 1.5727138325537355e-06, "loss": 0.7077, "step": 5010 }, { "epoch": 2.486036986471391, "grad_norm": 0.13788400838359663, "learning_rate": 1.5718062710553253e-06, "loss": 0.751, "step": 5011 }, { "epoch": 2.486533449174631, "grad_norm": 0.1348639652151765, "learning_rate": 1.5708988514164442e-06, "loss": 0.7636, "step": 5012 }, { "epoch": 2.4870299118778703, "grad_norm": 0.13129936769800069, "learning_rate": 1.569991573775776e-06, "loss": 0.6946, "step": 5013 }, { "epoch": 2.4875263745811096, "grad_norm": 0.131016067710159, "learning_rate": 1.5690844382719844e-06, "loss": 0.7299, "step": 5014 }, { "epoch": 2.488022837284349, "grad_norm": 0.13469638317810959, "learning_rate": 1.5681774450437104e-06, "loss": 0.7123, "step": 5015 }, { "epoch": 2.4885192999875883, "grad_norm": 0.12704079453926578, "learning_rate": 1.5672705942295735e-06, "loss": 0.6537, "step": 5016 }, { "epoch": 2.489015762690828, "grad_norm": 0.12753576550951762, "learning_rate": 1.5663638859681706e-06, "loss": 0.7195, "step": 5017 }, { "epoch": 2.4895122253940674, "grad_norm": 0.13049316211920853, "learning_rate": 1.5654573203980782e-06, "loss": 0.7029, "step": 5018 }, { "epoch": 2.4900086880973067, "grad_norm": 0.12674348845812947, "learning_rate": 1.5645508976578501e-06, "loss": 0.6749, "step": 5019 }, { "epoch": 2.490505150800546, "grad_norm": 0.13119282880613511, "learning_rate": 1.5636446178860188e-06, "loss": 0.7071, "step": 5020 }, { "epoch": 2.4910016135037854, "grad_norm": 0.12857653431579513, "learning_rate": 1.5627384812210945e-06, "loss": 0.7098, "step": 5021 }, { "epoch": 2.491498076207025, "grad_norm": 0.1299396609011369, "learning_rate": 1.561832487801565e-06, "loss": 0.7165, "step": 5022 }, { "epoch": 2.4919945389102645, "grad_norm": 0.13187761577665433, "learning_rate": 1.560926637765897e-06, "loss": 0.6788, "step": 5023 }, { "epoch": 2.492491001613504, "grad_norm": 0.13676658429929037, "learning_rate": 1.560020931252536e-06, "loss": 0.774, "step": 5024 }, { "epoch": 2.492987464316743, "grad_norm": 0.2391281093395671, "learning_rate": 1.5591153683999045e-06, "loss": 0.7162, "step": 5025 }, { "epoch": 2.4934839270199824, "grad_norm": 0.1321280807778348, "learning_rate": 1.5582099493464032e-06, "loss": 0.7193, "step": 5026 }, { "epoch": 2.4939803897232222, "grad_norm": 0.12836015141870605, "learning_rate": 1.5573046742304104e-06, "loss": 0.7098, "step": 5027 }, { "epoch": 2.4944768524264616, "grad_norm": 0.12731919945775716, "learning_rate": 1.5563995431902834e-06, "loss": 0.7006, "step": 5028 }, { "epoch": 2.494973315129701, "grad_norm": 0.129683691750222, "learning_rate": 1.5554945563643564e-06, "loss": 0.7271, "step": 5029 }, { "epoch": 2.49546977783294, "grad_norm": 0.12619265071805652, "learning_rate": 1.5545897138909423e-06, "loss": 0.7269, "step": 5030 }, { "epoch": 2.4959662405361795, "grad_norm": 0.12864219160660795, "learning_rate": 1.5536850159083319e-06, "loss": 0.7173, "step": 5031 }, { "epoch": 2.4964627032394193, "grad_norm": 0.12745661281840148, "learning_rate": 1.5527804625547937e-06, "loss": 0.6913, "step": 5032 }, { "epoch": 2.4969591659426587, "grad_norm": 0.12858809922709444, "learning_rate": 1.5518760539685752e-06, "loss": 0.7134, "step": 5033 }, { "epoch": 2.497455628645898, "grad_norm": 0.12827890445554876, "learning_rate": 1.550971790287899e-06, "loss": 0.6648, "step": 5034 }, { "epoch": 2.4979520913491373, "grad_norm": 0.13300762899595023, "learning_rate": 1.5500676716509683e-06, "loss": 0.7096, "step": 5035 }, { "epoch": 2.4984485540523766, "grad_norm": 0.13329585467541116, "learning_rate": 1.549163698195963e-06, "loss": 0.739, "step": 5036 }, { "epoch": 2.4989450167556164, "grad_norm": 0.13351068526158444, "learning_rate": 1.5482598700610412e-06, "loss": 0.7359, "step": 5037 }, { "epoch": 2.4994414794588558, "grad_norm": 0.13746974435710943, "learning_rate": 1.5473561873843378e-06, "loss": 0.7429, "step": 5038 }, { "epoch": 2.499937942162095, "grad_norm": 0.1311946484491049, "learning_rate": 1.5464526503039668e-06, "loss": 0.7333, "step": 5039 }, { "epoch": 2.5004344048653344, "grad_norm": 0.13003936186444456, "learning_rate": 1.5455492589580195e-06, "loss": 0.6862, "step": 5040 }, { "epoch": 2.5004344048653344, "eval_loss": 0.7251803278923035, "eval_runtime": 135.8229, "eval_samples_per_second": 223.475, "eval_steps_per_second": 27.941, "step": 5040 }, { "epoch": 2.5009308675685737, "grad_norm": 0.12908028509507846, "learning_rate": 1.5446460134845642e-06, "loss": 0.6778, "step": 5041 }, { "epoch": 2.501427330271813, "grad_norm": 0.13166843080491383, "learning_rate": 1.543742914021648e-06, "loss": 0.7092, "step": 5042 }, { "epoch": 2.501923792975053, "grad_norm": 0.13088243606617905, "learning_rate": 1.5428399607072956e-06, "loss": 0.7373, "step": 5043 }, { "epoch": 2.502420255678292, "grad_norm": 0.12629275270854762, "learning_rate": 1.541937153679508e-06, "loss": 0.708, "step": 5044 }, { "epoch": 2.5029167183815315, "grad_norm": 0.13225143684011295, "learning_rate": 1.5410344930762648e-06, "loss": 0.701, "step": 5045 }, { "epoch": 2.503413181084771, "grad_norm": 0.13340939597155863, "learning_rate": 1.5401319790355232e-06, "loss": 0.7098, "step": 5046 }, { "epoch": 2.5039096437880106, "grad_norm": 0.13043769011930573, "learning_rate": 1.5392296116952181e-06, "loss": 0.6952, "step": 5047 }, { "epoch": 2.50440610649125, "grad_norm": 0.13682071863988435, "learning_rate": 1.5383273911932627e-06, "loss": 0.7542, "step": 5048 }, { "epoch": 2.5049025691944893, "grad_norm": 0.13032047556352128, "learning_rate": 1.5374253176675464e-06, "loss": 0.6926, "step": 5049 }, { "epoch": 2.5053990318977286, "grad_norm": 0.13244019431180845, "learning_rate": 1.5365233912559364e-06, "loss": 0.7175, "step": 5050 }, { "epoch": 2.505895494600968, "grad_norm": 0.13817001240133622, "learning_rate": 1.5356216120962774e-06, "loss": 0.7282, "step": 5051 }, { "epoch": 2.5063919573042073, "grad_norm": 0.13140272145786022, "learning_rate": 1.5347199803263927e-06, "loss": 0.6855, "step": 5052 }, { "epoch": 2.506888420007447, "grad_norm": 0.1360933004526906, "learning_rate": 1.5338184960840824e-06, "loss": 0.7409, "step": 5053 }, { "epoch": 2.5073848827106864, "grad_norm": 0.13091513330256985, "learning_rate": 1.5329171595071227e-06, "loss": 0.6919, "step": 5054 }, { "epoch": 2.5078813454139257, "grad_norm": 0.1265533074222129, "learning_rate": 1.5320159707332695e-06, "loss": 0.6243, "step": 5055 }, { "epoch": 2.508377808117165, "grad_norm": 0.1252099518570422, "learning_rate": 1.5311149299002542e-06, "loss": 0.6715, "step": 5056 }, { "epoch": 2.508874270820405, "grad_norm": 0.13609272644415313, "learning_rate": 1.5302140371457875e-06, "loss": 0.732, "step": 5057 }, { "epoch": 2.509370733523644, "grad_norm": 0.13323577694421218, "learning_rate": 1.529313292607556e-06, "loss": 0.6702, "step": 5058 }, { "epoch": 2.5098671962268835, "grad_norm": 0.13041020040639237, "learning_rate": 1.5284126964232244e-06, "loss": 0.7071, "step": 5059 }, { "epoch": 2.510363658930123, "grad_norm": 0.1304162413787588, "learning_rate": 1.5275122487304337e-06, "loss": 0.7312, "step": 5060 }, { "epoch": 2.510860121633362, "grad_norm": 0.13021570150444117, "learning_rate": 1.5266119496668025e-06, "loss": 0.7283, "step": 5061 }, { "epoch": 2.5113565843366015, "grad_norm": 0.1279339703409767, "learning_rate": 1.5257117993699276e-06, "loss": 0.696, "step": 5062 }, { "epoch": 2.5118530470398412, "grad_norm": 0.13240644628046436, "learning_rate": 1.524811797977383e-06, "loss": 0.6864, "step": 5063 }, { "epoch": 2.5123495097430806, "grad_norm": 0.1400762626467057, "learning_rate": 1.523911945626719e-06, "loss": 0.7678, "step": 5064 }, { "epoch": 2.51284597244632, "grad_norm": 0.13014151678535776, "learning_rate": 1.5230122424554644e-06, "loss": 0.7036, "step": 5065 }, { "epoch": 2.5133424351495592, "grad_norm": 0.12985990176866047, "learning_rate": 1.5221126886011228e-06, "loss": 0.7224, "step": 5066 }, { "epoch": 2.513838897852799, "grad_norm": 0.1317976170535492, "learning_rate": 1.5212132842011778e-06, "loss": 0.6924, "step": 5067 }, { "epoch": 2.5143353605560383, "grad_norm": 0.12970897949094737, "learning_rate": 1.5203140293930888e-06, "loss": 0.6995, "step": 5068 }, { "epoch": 2.5148318232592777, "grad_norm": 0.1303319284929338, "learning_rate": 1.519414924314292e-06, "loss": 0.7018, "step": 5069 }, { "epoch": 2.515328285962517, "grad_norm": 0.12850755690334908, "learning_rate": 1.5185159691022023e-06, "loss": 0.7186, "step": 5070 }, { "epoch": 2.5158247486657563, "grad_norm": 0.12718389440933345, "learning_rate": 1.517617163894209e-06, "loss": 0.6397, "step": 5071 }, { "epoch": 2.5163212113689957, "grad_norm": 0.13884217395526113, "learning_rate": 1.5167185088276815e-06, "loss": 0.665, "step": 5072 }, { "epoch": 2.5168176740722354, "grad_norm": 0.12603973119926584, "learning_rate": 1.5158200040399635e-06, "loss": 0.6631, "step": 5073 }, { "epoch": 2.5173141367754748, "grad_norm": 0.1293877404461229, "learning_rate": 1.5149216496683788e-06, "loss": 0.7018, "step": 5074 }, { "epoch": 2.517810599478714, "grad_norm": 0.12959977324157554, "learning_rate": 1.514023445850225e-06, "loss": 0.6999, "step": 5075 }, { "epoch": 2.5183070621819534, "grad_norm": 0.13078603859075733, "learning_rate": 1.513125392722779e-06, "loss": 0.6854, "step": 5076 }, { "epoch": 2.518803524885193, "grad_norm": 0.13366544771971153, "learning_rate": 1.5122274904232925e-06, "loss": 0.7107, "step": 5077 }, { "epoch": 2.5192999875884325, "grad_norm": 0.1332349279647867, "learning_rate": 1.5113297390889963e-06, "loss": 0.7199, "step": 5078 }, { "epoch": 2.519796450291672, "grad_norm": 0.12791951008145022, "learning_rate": 1.5104321388570977e-06, "loss": 0.6795, "step": 5079 }, { "epoch": 2.520292912994911, "grad_norm": 0.1356457836131563, "learning_rate": 1.50953468986478e-06, "loss": 0.6902, "step": 5080 }, { "epoch": 2.5207893756981505, "grad_norm": 0.12588538763738047, "learning_rate": 1.5086373922492049e-06, "loss": 0.6544, "step": 5081 }, { "epoch": 2.52128583840139, "grad_norm": 0.1330568928734858, "learning_rate": 1.5077402461475083e-06, "loss": 0.6666, "step": 5082 }, { "epoch": 2.5217823011046296, "grad_norm": 0.1225633830068647, "learning_rate": 1.506843251696805e-06, "loss": 0.6217, "step": 5083 }, { "epoch": 2.522278763807869, "grad_norm": 0.12995958837521615, "learning_rate": 1.505946409034187e-06, "loss": 0.6922, "step": 5084 }, { "epoch": 2.5227752265111083, "grad_norm": 0.12735849858705797, "learning_rate": 1.5050497182967221e-06, "loss": 0.6844, "step": 5085 }, { "epoch": 2.5232716892143476, "grad_norm": 0.128231127592946, "learning_rate": 1.5041531796214554e-06, "loss": 0.7062, "step": 5086 }, { "epoch": 2.5237681519175874, "grad_norm": 0.12711845362503602, "learning_rate": 1.5032567931454073e-06, "loss": 0.6898, "step": 5087 }, { "epoch": 2.5242646146208267, "grad_norm": 0.13608076036992192, "learning_rate": 1.5023605590055768e-06, "loss": 0.691, "step": 5088 }, { "epoch": 2.524761077324066, "grad_norm": 0.1301368936095168, "learning_rate": 1.5014644773389391e-06, "loss": 0.7156, "step": 5089 }, { "epoch": 2.5252575400273054, "grad_norm": 0.13293944449573997, "learning_rate": 1.5005685482824462e-06, "loss": 0.7482, "step": 5090 }, { "epoch": 2.5257540027305447, "grad_norm": 0.12733134898732534, "learning_rate": 1.499672771973026e-06, "loss": 0.6956, "step": 5091 }, { "epoch": 2.526250465433784, "grad_norm": 0.12354114386385891, "learning_rate": 1.4987771485475836e-06, "loss": 0.6575, "step": 5092 }, { "epoch": 2.526746928137024, "grad_norm": 0.1256909063925675, "learning_rate": 1.497881678143e-06, "loss": 0.6983, "step": 5093 }, { "epoch": 2.527243390840263, "grad_norm": 0.12995383983928524, "learning_rate": 1.4969863608961343e-06, "loss": 0.7182, "step": 5094 }, { "epoch": 2.5277398535435025, "grad_norm": 0.13104868294903146, "learning_rate": 1.4960911969438213e-06, "loss": 0.7503, "step": 5095 }, { "epoch": 2.528236316246742, "grad_norm": 0.13569502975102957, "learning_rate": 1.495196186422872e-06, "loss": 0.6806, "step": 5096 }, { "epoch": 2.5287327789499816, "grad_norm": 0.13339083267038424, "learning_rate": 1.4943013294700758e-06, "loss": 0.7056, "step": 5097 }, { "epoch": 2.529229241653221, "grad_norm": 0.13361409013339606, "learning_rate": 1.4934066262221954e-06, "loss": 0.7391, "step": 5098 }, { "epoch": 2.5297257043564603, "grad_norm": 0.12680487158505377, "learning_rate": 1.492512076815973e-06, "loss": 0.6482, "step": 5099 }, { "epoch": 2.5302221670596996, "grad_norm": 0.12746892302880983, "learning_rate": 1.4916176813881257e-06, "loss": 0.6522, "step": 5100 }, { "epoch": 2.530718629762939, "grad_norm": 0.13139044491664464, "learning_rate": 1.4907234400753473e-06, "loss": 0.712, "step": 5101 }, { "epoch": 2.5312150924661783, "grad_norm": 0.12799076572044088, "learning_rate": 1.4898293530143095e-06, "loss": 0.6722, "step": 5102 }, { "epoch": 2.531711555169418, "grad_norm": 0.12797705381984992, "learning_rate": 1.4889354203416575e-06, "loss": 0.624, "step": 5103 }, { "epoch": 2.5322080178726574, "grad_norm": 0.1355225972612351, "learning_rate": 1.4880416421940155e-06, "loss": 0.7376, "step": 5104 }, { "epoch": 2.5327044805758967, "grad_norm": 0.1294579579051531, "learning_rate": 1.4871480187079828e-06, "loss": 0.7096, "step": 5105 }, { "epoch": 2.533200943279136, "grad_norm": 0.13381112874931977, "learning_rate": 1.4862545500201358e-06, "loss": 0.7277, "step": 5106 }, { "epoch": 2.533697405982376, "grad_norm": 0.1246715734533751, "learning_rate": 1.4853612362670271e-06, "loss": 0.6442, "step": 5107 }, { "epoch": 2.534193868685615, "grad_norm": 0.12830907556578108, "learning_rate": 1.4844680775851846e-06, "loss": 0.6337, "step": 5108 }, { "epoch": 2.5346903313888545, "grad_norm": 0.1278448286390337, "learning_rate": 1.4835750741111139e-06, "loss": 0.7132, "step": 5109 }, { "epoch": 2.535186794092094, "grad_norm": 0.1335432788354155, "learning_rate": 1.4826822259812957e-06, "loss": 0.6988, "step": 5110 }, { "epoch": 2.535683256795333, "grad_norm": 0.134293152215964, "learning_rate": 1.4817895333321875e-06, "loss": 0.7336, "step": 5111 }, { "epoch": 2.5361797194985725, "grad_norm": 0.13777460933447475, "learning_rate": 1.4808969963002234e-06, "loss": 0.7112, "step": 5112 }, { "epoch": 2.5366761822018122, "grad_norm": 0.12897145890973283, "learning_rate": 1.480004615021814e-06, "loss": 0.6908, "step": 5113 }, { "epoch": 2.5371726449050516, "grad_norm": 0.13400133466081912, "learning_rate": 1.4791123896333438e-06, "loss": 0.7064, "step": 5114 }, { "epoch": 2.537669107608291, "grad_norm": 0.13211791906308365, "learning_rate": 1.4782203202711764e-06, "loss": 0.7026, "step": 5115 }, { "epoch": 2.5381655703115302, "grad_norm": 0.13139779720721778, "learning_rate": 1.4773284070716504e-06, "loss": 0.7138, "step": 5116 }, { "epoch": 2.53866203301477, "grad_norm": 0.13308362459519374, "learning_rate": 1.4764366501710798e-06, "loss": 0.7263, "step": 5117 }, { "epoch": 2.5391584957180093, "grad_norm": 0.13141118126729773, "learning_rate": 1.4755450497057563e-06, "loss": 0.678, "step": 5118 }, { "epoch": 2.5396549584212487, "grad_norm": 0.13186502701805516, "learning_rate": 1.4746536058119454e-06, "loss": 0.7602, "step": 5119 }, { "epoch": 2.540151421124488, "grad_norm": 0.12869479855496058, "learning_rate": 1.473762318625891e-06, "loss": 0.7195, "step": 5120 }, { "epoch": 2.5406478838277273, "grad_norm": 0.13445753218214146, "learning_rate": 1.4728711882838115e-06, "loss": 0.7307, "step": 5121 }, { "epoch": 2.5411443465309667, "grad_norm": 0.12705563245150678, "learning_rate": 1.471980214921902e-06, "loss": 0.6385, "step": 5122 }, { "epoch": 2.5416408092342064, "grad_norm": 0.13028590027975537, "learning_rate": 1.4710893986763347e-06, "loss": 0.705, "step": 5123 }, { "epoch": 2.5421372719374458, "grad_norm": 0.12817908251977725, "learning_rate": 1.4701987396832546e-06, "loss": 0.7177, "step": 5124 }, { "epoch": 2.542633734640685, "grad_norm": 0.12496892532709507, "learning_rate": 1.4693082380787858e-06, "loss": 0.6705, "step": 5125 }, { "epoch": 2.5431301973439244, "grad_norm": 0.13059250706457962, "learning_rate": 1.4684178939990264e-06, "loss": 0.691, "step": 5126 }, { "epoch": 2.543626660047164, "grad_norm": 0.1292904883407009, "learning_rate": 1.467527707580052e-06, "loss": 0.7285, "step": 5127 }, { "epoch": 2.5441231227504035, "grad_norm": 0.12910095210438902, "learning_rate": 1.466637678957913e-06, "loss": 0.6785, "step": 5128 }, { "epoch": 2.544619585453643, "grad_norm": 0.13231894500519423, "learning_rate": 1.4657478082686363e-06, "loss": 0.734, "step": 5129 }, { "epoch": 2.545116048156882, "grad_norm": 0.13354137515536624, "learning_rate": 1.4648580956482238e-06, "loss": 0.6792, "step": 5130 }, { "epoch": 2.5456125108601215, "grad_norm": 0.13437657602685885, "learning_rate": 1.4639685412326543e-06, "loss": 0.6869, "step": 5131 }, { "epoch": 2.546108973563361, "grad_norm": 0.13419591499955405, "learning_rate": 1.463079145157882e-06, "loss": 0.7086, "step": 5132 }, { "epoch": 2.5466054362666006, "grad_norm": 0.1317550238055129, "learning_rate": 1.462189907559836e-06, "loss": 0.7011, "step": 5133 }, { "epoch": 2.54710189896984, "grad_norm": 0.12919424179545982, "learning_rate": 1.4613008285744234e-06, "loss": 0.7132, "step": 5134 }, { "epoch": 2.5475983616730793, "grad_norm": 0.13156727464529705, "learning_rate": 1.4604119083375242e-06, "loss": 0.6697, "step": 5135 }, { "epoch": 2.5480948243763186, "grad_norm": 0.13198996730417278, "learning_rate": 1.4595231469849963e-06, "loss": 0.6547, "step": 5136 }, { "epoch": 2.5485912870795584, "grad_norm": 0.13504489395934224, "learning_rate": 1.4586345446526735e-06, "loss": 0.7082, "step": 5137 }, { "epoch": 2.5490877497827977, "grad_norm": 0.13159429183393376, "learning_rate": 1.457746101476362e-06, "loss": 0.7077, "step": 5138 }, { "epoch": 2.549584212486037, "grad_norm": 0.1324983434375749, "learning_rate": 1.4568578175918502e-06, "loss": 0.6928, "step": 5139 }, { "epoch": 2.5500806751892764, "grad_norm": 0.13854603478769614, "learning_rate": 1.455969693134893e-06, "loss": 0.7396, "step": 5140 }, { "epoch": 2.5505771378925157, "grad_norm": 0.12842830146725112, "learning_rate": 1.4550817282412293e-06, "loss": 0.7549, "step": 5141 }, { "epoch": 2.551073600595755, "grad_norm": 0.129204728717719, "learning_rate": 1.454193923046569e-06, "loss": 0.7016, "step": 5142 }, { "epoch": 2.551570063298995, "grad_norm": 0.12750868562867726, "learning_rate": 1.4533062776866002e-06, "loss": 0.6743, "step": 5143 }, { "epoch": 2.552066526002234, "grad_norm": 0.12654769018231987, "learning_rate": 1.452418792296984e-06, "loss": 0.7008, "step": 5144 }, { "epoch": 2.5525629887054735, "grad_norm": 0.13141605016356414, "learning_rate": 1.4515314670133582e-06, "loss": 0.7169, "step": 5145 }, { "epoch": 2.553059451408713, "grad_norm": 0.13379502140501737, "learning_rate": 1.4506443019713374e-06, "loss": 0.6779, "step": 5146 }, { "epoch": 2.5535559141119526, "grad_norm": 0.13256467196234833, "learning_rate": 1.4497572973065091e-06, "loss": 0.7206, "step": 5147 }, { "epoch": 2.554052376815192, "grad_norm": 0.13184270552834443, "learning_rate": 1.4488704531544396e-06, "loss": 0.6414, "step": 5148 }, { "epoch": 2.5545488395184313, "grad_norm": 0.12861101671614525, "learning_rate": 1.4479837696506677e-06, "loss": 0.7362, "step": 5149 }, { "epoch": 2.5550453022216706, "grad_norm": 0.1262895343147098, "learning_rate": 1.4470972469307076e-06, "loss": 0.7061, "step": 5150 }, { "epoch": 2.55554176492491, "grad_norm": 0.12872834020768845, "learning_rate": 1.4462108851300524e-06, "loss": 0.7172, "step": 5151 }, { "epoch": 2.5560382276281493, "grad_norm": 0.13269323839535846, "learning_rate": 1.4453246843841662e-06, "loss": 0.7117, "step": 5152 }, { "epoch": 2.5565346903313886, "grad_norm": 0.12885571431373338, "learning_rate": 1.4444386448284925e-06, "loss": 0.6573, "step": 5153 }, { "epoch": 2.5570311530346284, "grad_norm": 0.13280071499296228, "learning_rate": 1.4435527665984474e-06, "loss": 0.6934, "step": 5154 }, { "epoch": 2.5575276157378677, "grad_norm": 0.13454797554586437, "learning_rate": 1.442667049829422e-06, "loss": 0.7246, "step": 5155 }, { "epoch": 2.558024078441107, "grad_norm": 0.12697947714667507, "learning_rate": 1.4417814946567873e-06, "loss": 0.7333, "step": 5156 }, { "epoch": 2.558520541144347, "grad_norm": 0.12516155676293964, "learning_rate": 1.4408961012158818e-06, "loss": 0.6683, "step": 5157 }, { "epoch": 2.559017003847586, "grad_norm": 0.1302587030682515, "learning_rate": 1.4400108696420265e-06, "loss": 0.6953, "step": 5158 }, { "epoch": 2.5595134665508255, "grad_norm": 0.12649071492393144, "learning_rate": 1.4391258000705143e-06, "loss": 0.6955, "step": 5159 }, { "epoch": 2.560009929254065, "grad_norm": 0.1319214562476525, "learning_rate": 1.4382408926366125e-06, "loss": 0.7035, "step": 5160 }, { "epoch": 2.560506391957304, "grad_norm": 0.12479301266102043, "learning_rate": 1.4373561474755675e-06, "loss": 0.6584, "step": 5161 }, { "epoch": 2.5610028546605434, "grad_norm": 0.12302337495146001, "learning_rate": 1.4364715647225963e-06, "loss": 0.6872, "step": 5162 }, { "epoch": 2.561499317363783, "grad_norm": 0.1313391656279115, "learning_rate": 1.435587144512895e-06, "loss": 0.7152, "step": 5163 }, { "epoch": 2.5619957800670226, "grad_norm": 0.13419103025976656, "learning_rate": 1.434702886981632e-06, "loss": 0.707, "step": 5164 }, { "epoch": 2.562492242770262, "grad_norm": 0.12781224085601264, "learning_rate": 1.4338187922639506e-06, "loss": 0.6512, "step": 5165 }, { "epoch": 2.562988705473501, "grad_norm": 0.1269467241970245, "learning_rate": 1.4329348604949733e-06, "loss": 0.7062, "step": 5166 }, { "epoch": 2.563485168176741, "grad_norm": 0.13011475251943433, "learning_rate": 1.4320510918097927e-06, "loss": 0.7054, "step": 5167 }, { "epoch": 2.5639816308799803, "grad_norm": 0.12562228736537093, "learning_rate": 1.4311674863434803e-06, "loss": 0.682, "step": 5168 }, { "epoch": 2.5644780935832197, "grad_norm": 0.1280244371960763, "learning_rate": 1.43028404423108e-06, "loss": 0.701, "step": 5169 }, { "epoch": 2.564974556286459, "grad_norm": 0.12628433888008067, "learning_rate": 1.4294007656076108e-06, "loss": 0.6687, "step": 5170 }, { "epoch": 2.5654710189896983, "grad_norm": 0.12632914581609597, "learning_rate": 1.42851765060807e-06, "loss": 0.6794, "step": 5171 }, { "epoch": 2.5659674816929376, "grad_norm": 0.12626167379165484, "learning_rate": 1.4276346993674267e-06, "loss": 0.6737, "step": 5172 }, { "epoch": 2.566463944396177, "grad_norm": 0.13881256752440968, "learning_rate": 1.4267519120206251e-06, "loss": 0.7359, "step": 5173 }, { "epoch": 2.5669604070994168, "grad_norm": 0.13129891241921918, "learning_rate": 1.425869288702585e-06, "loss": 0.72, "step": 5174 }, { "epoch": 2.567456869802656, "grad_norm": 0.13010534065898433, "learning_rate": 1.4249868295482021e-06, "loss": 0.6765, "step": 5175 }, { "epoch": 2.5679533325058954, "grad_norm": 0.13578139920958562, "learning_rate": 1.4241045346923463e-06, "loss": 0.7659, "step": 5176 }, { "epoch": 2.568449795209135, "grad_norm": 0.12966367090684447, "learning_rate": 1.4232224042698606e-06, "loss": 0.6878, "step": 5177 }, { "epoch": 2.5689462579123745, "grad_norm": 0.1295589151537278, "learning_rate": 1.4223404384155665e-06, "loss": 0.6967, "step": 5178 }, { "epoch": 2.569442720615614, "grad_norm": 0.12606606232317147, "learning_rate": 1.4214586372642563e-06, "loss": 0.6885, "step": 5179 }, { "epoch": 2.569939183318853, "grad_norm": 0.12952004740829784, "learning_rate": 1.4205770009507013e-06, "loss": 0.7703, "step": 5180 }, { "epoch": 2.5704356460220925, "grad_norm": 0.13061476133482602, "learning_rate": 1.4196955296096449e-06, "loss": 0.7473, "step": 5181 }, { "epoch": 2.570932108725332, "grad_norm": 0.12905289024380742, "learning_rate": 1.418814223375804e-06, "loss": 0.7198, "step": 5182 }, { "epoch": 2.571428571428571, "grad_norm": 0.13521590335808284, "learning_rate": 1.4179330823838749e-06, "loss": 0.6663, "step": 5183 }, { "epoch": 2.571925034131811, "grad_norm": 0.1357147047734266, "learning_rate": 1.4170521067685234e-06, "loss": 0.7672, "step": 5184 }, { "epoch": 2.5724214968350503, "grad_norm": 0.1420237272434203, "learning_rate": 1.4161712966643942e-06, "loss": 0.768, "step": 5185 }, { "epoch": 2.5729179595382896, "grad_norm": 0.14090038663886145, "learning_rate": 1.415290652206105e-06, "loss": 0.7108, "step": 5186 }, { "epoch": 2.573414422241529, "grad_norm": 0.130067394360935, "learning_rate": 1.4144101735282465e-06, "loss": 0.6818, "step": 5187 }, { "epoch": 2.5739108849447687, "grad_norm": 0.13055899063930992, "learning_rate": 1.4135298607653885e-06, "loss": 0.7142, "step": 5188 }, { "epoch": 2.574407347648008, "grad_norm": 0.12598384203770444, "learning_rate": 1.4126497140520696e-06, "loss": 0.6844, "step": 5189 }, { "epoch": 2.5749038103512474, "grad_norm": 0.13099481254462542, "learning_rate": 1.4117697335228082e-06, "loss": 0.6484, "step": 5190 }, { "epoch": 2.5754002730544867, "grad_norm": 0.12722812757473498, "learning_rate": 1.410889919312094e-06, "loss": 0.6863, "step": 5191 }, { "epoch": 2.575896735757726, "grad_norm": 0.13021667807315976, "learning_rate": 1.4100102715543934e-06, "loss": 0.7062, "step": 5192 }, { "epoch": 2.5763931984609654, "grad_norm": 0.13204286958959552, "learning_rate": 1.4091307903841467e-06, "loss": 0.7369, "step": 5193 }, { "epoch": 2.576889661164205, "grad_norm": 0.12825087423564113, "learning_rate": 1.4082514759357668e-06, "loss": 0.6818, "step": 5194 }, { "epoch": 2.5773861238674445, "grad_norm": 0.13195934566675152, "learning_rate": 1.4073723283436447e-06, "loss": 0.7205, "step": 5195 }, { "epoch": 2.577882586570684, "grad_norm": 0.13298051618730106, "learning_rate": 1.4064933477421435e-06, "loss": 0.7092, "step": 5196 }, { "epoch": 2.578379049273923, "grad_norm": 0.13045643130916357, "learning_rate": 1.4056145342656002e-06, "loss": 0.721, "step": 5197 }, { "epoch": 2.578875511977163, "grad_norm": 0.1287770798648141, "learning_rate": 1.4047358880483292e-06, "loss": 0.6623, "step": 5198 }, { "epoch": 2.5793719746804022, "grad_norm": 0.12703038628187402, "learning_rate": 1.403857409224615e-06, "loss": 0.6748, "step": 5199 }, { "epoch": 2.5798684373836416, "grad_norm": 0.13086879765206233, "learning_rate": 1.4029790979287217e-06, "loss": 0.6923, "step": 5200 }, { "epoch": 2.580364900086881, "grad_norm": 0.13490815408873744, "learning_rate": 1.402100954294884e-06, "loss": 0.7387, "step": 5201 }, { "epoch": 2.5808613627901202, "grad_norm": 0.13403369332196752, "learning_rate": 1.4012229784573111e-06, "loss": 0.7194, "step": 5202 }, { "epoch": 2.5813578254933596, "grad_norm": 0.13057102934580292, "learning_rate": 1.400345170550189e-06, "loss": 0.6975, "step": 5203 }, { "epoch": 2.5818542881965993, "grad_norm": 0.13217032680650126, "learning_rate": 1.3994675307076766e-06, "loss": 0.6857, "step": 5204 }, { "epoch": 2.5823507508998387, "grad_norm": 0.12870495240664498, "learning_rate": 1.3985900590639058e-06, "loss": 0.6732, "step": 5205 }, { "epoch": 2.582847213603078, "grad_norm": 0.1248568293868238, "learning_rate": 1.397712755752984e-06, "loss": 0.6753, "step": 5206 }, { "epoch": 2.5833436763063173, "grad_norm": 0.1286211773697058, "learning_rate": 1.3968356209089944e-06, "loss": 0.698, "step": 5207 }, { "epoch": 2.583840139009557, "grad_norm": 0.12476162094736613, "learning_rate": 1.3959586546659926e-06, "loss": 0.6571, "step": 5208 }, { "epoch": 2.5843366017127964, "grad_norm": 0.13256034900710464, "learning_rate": 1.3950818571580071e-06, "loss": 0.7052, "step": 5209 }, { "epoch": 2.5848330644160358, "grad_norm": 0.14170229500379294, "learning_rate": 1.3942052285190453e-06, "loss": 0.7395, "step": 5210 }, { "epoch": 2.585329527119275, "grad_norm": 0.13306092777696724, "learning_rate": 1.3933287688830827e-06, "loss": 0.7503, "step": 5211 }, { "epoch": 2.5858259898225144, "grad_norm": 0.12983382658565437, "learning_rate": 1.3924524783840748e-06, "loss": 0.6702, "step": 5212 }, { "epoch": 2.5863224525257538, "grad_norm": 0.12964608984313164, "learning_rate": 1.3915763571559477e-06, "loss": 0.7302, "step": 5213 }, { "epoch": 2.5868189152289935, "grad_norm": 0.130514060454796, "learning_rate": 1.3907004053326006e-06, "loss": 0.7262, "step": 5214 }, { "epoch": 2.587315377932233, "grad_norm": 0.1339908897788451, "learning_rate": 1.3898246230479119e-06, "loss": 0.7146, "step": 5215 }, { "epoch": 2.587811840635472, "grad_norm": 0.12515165316595245, "learning_rate": 1.3889490104357278e-06, "loss": 0.6528, "step": 5216 }, { "epoch": 2.5883083033387115, "grad_norm": 0.13700532883591948, "learning_rate": 1.3880735676298743e-06, "loss": 0.7071, "step": 5217 }, { "epoch": 2.5888047660419513, "grad_norm": 0.130354785696436, "learning_rate": 1.3871982947641478e-06, "loss": 0.6935, "step": 5218 }, { "epoch": 2.5893012287451906, "grad_norm": 0.13701897641660335, "learning_rate": 1.386323191972318e-06, "loss": 0.7427, "step": 5219 }, { "epoch": 2.58979769144843, "grad_norm": 0.1320735550699063, "learning_rate": 1.3854482593881342e-06, "loss": 0.7064, "step": 5220 }, { "epoch": 2.5902941541516693, "grad_norm": 0.12715230958238605, "learning_rate": 1.3845734971453114e-06, "loss": 0.6718, "step": 5221 }, { "epoch": 2.5907906168549086, "grad_norm": 0.13018988366015433, "learning_rate": 1.3836989053775462e-06, "loss": 0.7509, "step": 5222 }, { "epoch": 2.591287079558148, "grad_norm": 0.13221945422649725, "learning_rate": 1.3828244842185034e-06, "loss": 0.7166, "step": 5223 }, { "epoch": 2.5917835422613877, "grad_norm": 0.1304668288729789, "learning_rate": 1.381950233801827e-06, "loss": 0.6849, "step": 5224 }, { "epoch": 2.592280004964627, "grad_norm": 0.12573646819731626, "learning_rate": 1.3810761542611306e-06, "loss": 0.6589, "step": 5225 }, { "epoch": 2.5927764676678664, "grad_norm": 0.13280192854756245, "learning_rate": 1.380202245730003e-06, "loss": 0.7207, "step": 5226 }, { "epoch": 2.5932729303711057, "grad_norm": 0.13071113755387934, "learning_rate": 1.3793285083420077e-06, "loss": 0.7014, "step": 5227 }, { "epoch": 2.5937693930743455, "grad_norm": 0.12860260319733735, "learning_rate": 1.3784549422306808e-06, "loss": 0.6795, "step": 5228 }, { "epoch": 2.594265855777585, "grad_norm": 0.1270646298734641, "learning_rate": 1.3775815475295343e-06, "loss": 0.6719, "step": 5229 }, { "epoch": 2.594762318480824, "grad_norm": 0.131427270856042, "learning_rate": 1.3767083243720516e-06, "loss": 0.7102, "step": 5230 }, { "epoch": 2.5952587811840635, "grad_norm": 0.1291160109583672, "learning_rate": 1.37583527289169e-06, "loss": 0.6879, "step": 5231 }, { "epoch": 2.595755243887303, "grad_norm": 0.12483862070084152, "learning_rate": 1.374962393221883e-06, "loss": 0.6806, "step": 5232 }, { "epoch": 2.596251706590542, "grad_norm": 0.1265687299366481, "learning_rate": 1.3740896854960361e-06, "loss": 0.6648, "step": 5233 }, { "epoch": 2.596748169293782, "grad_norm": 0.12551276631561484, "learning_rate": 1.3732171498475269e-06, "loss": 0.6796, "step": 5234 }, { "epoch": 2.5972446319970213, "grad_norm": 0.1919668140078233, "learning_rate": 1.3723447864097105e-06, "loss": 0.7173, "step": 5235 }, { "epoch": 2.5977410947002606, "grad_norm": 0.13071121709395, "learning_rate": 1.3714725953159136e-06, "loss": 0.725, "step": 5236 }, { "epoch": 2.5982375574035, "grad_norm": 0.12628300917307875, "learning_rate": 1.3706005766994354e-06, "loss": 0.6583, "step": 5237 }, { "epoch": 2.5987340201067397, "grad_norm": 0.1324112740619143, "learning_rate": 1.3697287306935498e-06, "loss": 0.7062, "step": 5238 }, { "epoch": 2.599230482809979, "grad_norm": 0.13328148315750796, "learning_rate": 1.3688570574315058e-06, "loss": 0.7195, "step": 5239 }, { "epoch": 2.5997269455132184, "grad_norm": 0.1392294677729809, "learning_rate": 1.3679855570465244e-06, "loss": 0.7264, "step": 5240 }, { "epoch": 2.6002234082164577, "grad_norm": 0.1257272610807176, "learning_rate": 1.367114229671799e-06, "loss": 0.6915, "step": 5241 }, { "epoch": 2.600719870919697, "grad_norm": 0.12811010695255046, "learning_rate": 1.3662430754405004e-06, "loss": 0.7125, "step": 5242 }, { "epoch": 2.6012163336229364, "grad_norm": 0.1329892421572713, "learning_rate": 1.365372094485768e-06, "loss": 0.7393, "step": 5243 }, { "epoch": 2.601712796326176, "grad_norm": 0.1293006710207882, "learning_rate": 1.36450128694072e-06, "loss": 0.6771, "step": 5244 }, { "epoch": 2.6022092590294155, "grad_norm": 0.13954401243263015, "learning_rate": 1.3636306529384432e-06, "loss": 0.7291, "step": 5245 }, { "epoch": 2.602705721732655, "grad_norm": 0.12768761269525258, "learning_rate": 1.3627601926120005e-06, "loss": 0.6687, "step": 5246 }, { "epoch": 2.603202184435894, "grad_norm": 0.1329893303483065, "learning_rate": 1.3618899060944286e-06, "loss": 0.7366, "step": 5247 }, { "epoch": 2.603698647139134, "grad_norm": 0.13009174728461212, "learning_rate": 1.3610197935187358e-06, "loss": 0.6713, "step": 5248 }, { "epoch": 2.6041951098423732, "grad_norm": 0.12802975892687476, "learning_rate": 1.3601498550179059e-06, "loss": 0.7355, "step": 5249 }, { "epoch": 2.6046915725456126, "grad_norm": 0.1297273904569148, "learning_rate": 1.3592800907248949e-06, "loss": 0.7098, "step": 5250 }, { "epoch": 2.605188035248852, "grad_norm": 0.12806246500929, "learning_rate": 1.3584105007726312e-06, "loss": 0.6429, "step": 5251 }, { "epoch": 2.6056844979520912, "grad_norm": 0.13041176910308638, "learning_rate": 1.3575410852940202e-06, "loss": 0.6807, "step": 5252 }, { "epoch": 2.6061809606553306, "grad_norm": 0.12886597784723972, "learning_rate": 1.3566718444219342e-06, "loss": 0.7237, "step": 5253 }, { "epoch": 2.6066774233585703, "grad_norm": 0.12916633911425887, "learning_rate": 1.355802778289226e-06, "loss": 0.6739, "step": 5254 }, { "epoch": 2.6071738860618097, "grad_norm": 0.1287678914063443, "learning_rate": 1.3549338870287165e-06, "loss": 0.6716, "step": 5255 }, { "epoch": 2.607670348765049, "grad_norm": 0.13212685032768234, "learning_rate": 1.3540651707732036e-06, "loss": 0.7382, "step": 5256 }, { "epoch": 2.6081668114682883, "grad_norm": 0.12905546375739468, "learning_rate": 1.3531966296554555e-06, "loss": 0.6551, "step": 5257 }, { "epoch": 2.608663274171528, "grad_norm": 0.12906147644682286, "learning_rate": 1.3523282638082142e-06, "loss": 0.7328, "step": 5258 }, { "epoch": 2.6091597368747674, "grad_norm": 0.1313108855974763, "learning_rate": 1.3514600733641969e-06, "loss": 0.6417, "step": 5259 }, { "epoch": 2.6096561995780068, "grad_norm": 0.12974355187493028, "learning_rate": 1.3505920584560913e-06, "loss": 0.6928, "step": 5260 }, { "epoch": 2.610152662281246, "grad_norm": 0.12727154967902357, "learning_rate": 1.349724219216561e-06, "loss": 0.669, "step": 5261 }, { "epoch": 2.6106491249844854, "grad_norm": 0.12535216696134888, "learning_rate": 1.3488565557782407e-06, "loss": 0.682, "step": 5262 }, { "epoch": 2.6111455876877248, "grad_norm": 0.13133855825746554, "learning_rate": 1.347989068273738e-06, "loss": 0.7084, "step": 5263 }, { "epoch": 2.6116420503909645, "grad_norm": 0.13110682521576933, "learning_rate": 1.3471217568356354e-06, "loss": 0.7168, "step": 5264 }, { "epoch": 2.612138513094204, "grad_norm": 0.12162732379351464, "learning_rate": 1.3462546215964867e-06, "loss": 0.6539, "step": 5265 }, { "epoch": 2.612634975797443, "grad_norm": 0.13230036317983876, "learning_rate": 1.345387662688821e-06, "loss": 0.7189, "step": 5266 }, { "epoch": 2.6131314385006825, "grad_norm": 0.12335108832614605, "learning_rate": 1.3445208802451383e-06, "loss": 0.6612, "step": 5267 }, { "epoch": 2.6136279012039223, "grad_norm": 0.13204613627760084, "learning_rate": 1.3436542743979125e-06, "loss": 0.7287, "step": 5268 }, { "epoch": 2.6141243639071616, "grad_norm": 0.12765603511898485, "learning_rate": 1.34278784527959e-06, "loss": 0.6788, "step": 5269 }, { "epoch": 2.614620826610401, "grad_norm": 0.13326553938549146, "learning_rate": 1.3419215930225898e-06, "loss": 0.6903, "step": 5270 }, { "epoch": 2.6151172893136403, "grad_norm": 0.13006724151910573, "learning_rate": 1.341055517759307e-06, "loss": 0.6968, "step": 5271 }, { "epoch": 2.6156137520168796, "grad_norm": 0.12782169012677588, "learning_rate": 1.3401896196221061e-06, "loss": 0.6402, "step": 5272 }, { "epoch": 2.616110214720119, "grad_norm": 0.1298644050131224, "learning_rate": 1.3393238987433247e-06, "loss": 0.6786, "step": 5273 }, { "epoch": 2.6166066774233587, "grad_norm": 0.13093853621883342, "learning_rate": 1.338458355255276e-06, "loss": 0.7008, "step": 5274 }, { "epoch": 2.617103140126598, "grad_norm": 0.1251589926214746, "learning_rate": 1.3375929892902435e-06, "loss": 0.6642, "step": 5275 }, { "epoch": 2.6175996028298374, "grad_norm": 0.1332356566343829, "learning_rate": 1.3367278009804852e-06, "loss": 0.6859, "step": 5276 }, { "epoch": 2.6180960655330767, "grad_norm": 0.1351996093141888, "learning_rate": 1.3358627904582308e-06, "loss": 0.6886, "step": 5277 }, { "epoch": 2.6185925282363165, "grad_norm": 0.12760964101813727, "learning_rate": 1.3349979578556827e-06, "loss": 0.6775, "step": 5278 }, { "epoch": 2.619088990939556, "grad_norm": 0.13492506375570365, "learning_rate": 1.334133303305018e-06, "loss": 0.7233, "step": 5279 }, { "epoch": 2.619585453642795, "grad_norm": 0.13103285327539846, "learning_rate": 1.3332688269383842e-06, "loss": 0.729, "step": 5280 }, { "epoch": 2.6200819163460345, "grad_norm": 0.12874709415836544, "learning_rate": 1.3324045288879034e-06, "loss": 0.7383, "step": 5281 }, { "epoch": 2.620578379049274, "grad_norm": 0.12537613283777926, "learning_rate": 1.33154040928567e-06, "loss": 0.6694, "step": 5282 }, { "epoch": 2.621074841752513, "grad_norm": 0.1229814161755244, "learning_rate": 1.3306764682637487e-06, "loss": 0.642, "step": 5283 }, { "epoch": 2.621571304455753, "grad_norm": 0.13180449979729172, "learning_rate": 1.329812705954183e-06, "loss": 0.7413, "step": 5284 }, { "epoch": 2.6220677671589923, "grad_norm": 0.1299850828781537, "learning_rate": 1.3289491224889805e-06, "loss": 0.7245, "step": 5285 }, { "epoch": 2.6225642298622316, "grad_norm": 0.12657003185254417, "learning_rate": 1.328085718000129e-06, "loss": 0.691, "step": 5286 }, { "epoch": 2.623060692565471, "grad_norm": 0.12873747042196176, "learning_rate": 1.3272224926195847e-06, "loss": 0.6719, "step": 5287 }, { "epoch": 2.6235571552687107, "grad_norm": 0.12986302258803353, "learning_rate": 1.326359446479279e-06, "loss": 0.6856, "step": 5288 }, { "epoch": 2.62405361797195, "grad_norm": 0.13177548514946316, "learning_rate": 1.325496579711114e-06, "loss": 0.7408, "step": 5289 }, { "epoch": 2.6245500806751894, "grad_norm": 0.13080981754895, "learning_rate": 1.3246338924469646e-06, "loss": 0.6891, "step": 5290 }, { "epoch": 2.6250465433784287, "grad_norm": 0.1319670698468296, "learning_rate": 1.3237713848186799e-06, "loss": 0.7171, "step": 5291 }, { "epoch": 2.625543006081668, "grad_norm": 0.1268286576276871, "learning_rate": 1.3229090569580782e-06, "loss": 0.7105, "step": 5292 }, { "epoch": 2.6260394687849073, "grad_norm": 0.13031273872848315, "learning_rate": 1.3220469089969556e-06, "loss": 0.6647, "step": 5293 }, { "epoch": 2.6265359314881467, "grad_norm": 0.12705509580073604, "learning_rate": 1.3211849410670755e-06, "loss": 0.6708, "step": 5294 }, { "epoch": 2.6270323941913865, "grad_norm": 0.12662469871597498, "learning_rate": 1.3203231533001753e-06, "loss": 0.7167, "step": 5295 }, { "epoch": 2.627528856894626, "grad_norm": 0.1351319977157379, "learning_rate": 1.3194615458279675e-06, "loss": 0.7027, "step": 5296 }, { "epoch": 2.628025319597865, "grad_norm": 0.1293833244334666, "learning_rate": 1.3186001187821328e-06, "loss": 0.737, "step": 5297 }, { "epoch": 2.628521782301105, "grad_norm": 0.12681644363417152, "learning_rate": 1.317738872294329e-06, "loss": 0.687, "step": 5298 }, { "epoch": 2.6290182450043442, "grad_norm": 0.12782685543955466, "learning_rate": 1.3168778064961824e-06, "loss": 0.6944, "step": 5299 }, { "epoch": 2.6295147077075836, "grad_norm": 0.1283732539871523, "learning_rate": 1.3160169215192929e-06, "loss": 0.6593, "step": 5300 }, { "epoch": 2.630011170410823, "grad_norm": 0.12786728676839182, "learning_rate": 1.315156217495233e-06, "loss": 0.6914, "step": 5301 }, { "epoch": 2.630507633114062, "grad_norm": 0.12640922642050675, "learning_rate": 1.3142956945555474e-06, "loss": 0.6319, "step": 5302 }, { "epoch": 2.6310040958173015, "grad_norm": 0.12770955937417205, "learning_rate": 1.3134353528317539e-06, "loss": 0.6829, "step": 5303 }, { "epoch": 2.631500558520541, "grad_norm": 0.12759140923876178, "learning_rate": 1.312575192455341e-06, "loss": 0.7206, "step": 5304 }, { "epoch": 2.6319970212237807, "grad_norm": 0.13233654333615097, "learning_rate": 1.3117152135577721e-06, "loss": 0.7442, "step": 5305 }, { "epoch": 2.63249348392702, "grad_norm": 0.13201851791859284, "learning_rate": 1.3108554162704797e-06, "loss": 0.7381, "step": 5306 }, { "epoch": 2.6329899466302593, "grad_norm": 0.13006021868404868, "learning_rate": 1.3099958007248698e-06, "loss": 0.685, "step": 5307 }, { "epoch": 2.633486409333499, "grad_norm": 0.1308979224400638, "learning_rate": 1.3091363670523225e-06, "loss": 0.6923, "step": 5308 }, { "epoch": 2.6339828720367384, "grad_norm": 0.13246572766620834, "learning_rate": 1.3082771153841872e-06, "loss": 0.7535, "step": 5309 }, { "epoch": 2.6344793347399778, "grad_norm": 0.13028781856699762, "learning_rate": 1.307418045851786e-06, "loss": 0.6802, "step": 5310 }, { "epoch": 2.634975797443217, "grad_norm": 0.1291552670036102, "learning_rate": 1.3065591585864161e-06, "loss": 0.7137, "step": 5311 }, { "epoch": 2.6354722601464564, "grad_norm": 0.1297984636190616, "learning_rate": 1.3057004537193424e-06, "loss": 0.6697, "step": 5312 }, { "epoch": 2.6359687228496957, "grad_norm": 0.12833411800480857, "learning_rate": 1.3048419313818062e-06, "loss": 0.6836, "step": 5313 }, { "epoch": 2.636465185552935, "grad_norm": 0.13042366934420158, "learning_rate": 1.3039835917050177e-06, "loss": 0.6944, "step": 5314 }, { "epoch": 2.636961648256175, "grad_norm": 0.13642883453725888, "learning_rate": 1.30312543482016e-06, "loss": 0.7166, "step": 5315 }, { "epoch": 2.637458110959414, "grad_norm": 0.130255734346025, "learning_rate": 1.3022674608583907e-06, "loss": 0.7537, "step": 5316 }, { "epoch": 2.6379545736626535, "grad_norm": 0.13076117414943525, "learning_rate": 1.3014096699508338e-06, "loss": 0.7065, "step": 5317 }, { "epoch": 2.6384510363658933, "grad_norm": 0.13062995849439213, "learning_rate": 1.3005520622285922e-06, "loss": 0.6623, "step": 5318 }, { "epoch": 2.6389474990691326, "grad_norm": 0.12821638120486448, "learning_rate": 1.2996946378227351e-06, "loss": 0.6664, "step": 5319 }, { "epoch": 2.639443961772372, "grad_norm": 0.133639093888335, "learning_rate": 1.298837396864308e-06, "loss": 0.75, "step": 5320 }, { "epoch": 2.6399404244756113, "grad_norm": 0.12479238215751826, "learning_rate": 1.297980339484326e-06, "loss": 0.6877, "step": 5321 }, { "epoch": 2.6404368871788506, "grad_norm": 0.13212736303956885, "learning_rate": 1.297123465813775e-06, "loss": 0.7034, "step": 5322 }, { "epoch": 2.64093334988209, "grad_norm": 0.13499589181985006, "learning_rate": 1.2962667759836166e-06, "loss": 0.721, "step": 5323 }, { "epoch": 2.6414298125853293, "grad_norm": 0.12490032994276366, "learning_rate": 1.2954102701247801e-06, "loss": 0.66, "step": 5324 }, { "epoch": 2.641926275288569, "grad_norm": 0.14001714407558724, "learning_rate": 1.2945539483681708e-06, "loss": 0.6848, "step": 5325 }, { "epoch": 2.6424227379918084, "grad_norm": 0.13601215073821113, "learning_rate": 1.2936978108446624e-06, "loss": 0.7245, "step": 5326 }, { "epoch": 2.6429192006950477, "grad_norm": 0.12605012170458385, "learning_rate": 1.292841857685101e-06, "loss": 0.6667, "step": 5327 }, { "epoch": 2.643415663398287, "grad_norm": 0.12872625874119725, "learning_rate": 1.2919860890203073e-06, "loss": 0.6868, "step": 5328 }, { "epoch": 2.643912126101527, "grad_norm": 0.13008341539653537, "learning_rate": 1.2911305049810701e-06, "loss": 0.6629, "step": 5329 }, { "epoch": 2.644408588804766, "grad_norm": 0.12691494726221766, "learning_rate": 1.2902751056981533e-06, "loss": 0.6905, "step": 5330 }, { "epoch": 2.6449050515080055, "grad_norm": 0.1238746354104558, "learning_rate": 1.2894198913022903e-06, "loss": 0.6731, "step": 5331 }, { "epoch": 2.645401514211245, "grad_norm": 0.13315315264555727, "learning_rate": 1.2885648619241866e-06, "loss": 0.699, "step": 5332 }, { "epoch": 2.645897976914484, "grad_norm": 0.1313601573492306, "learning_rate": 1.28771001769452e-06, "loss": 0.6966, "step": 5333 }, { "epoch": 2.6463944396177235, "grad_norm": 0.12904285233884105, "learning_rate": 1.2868553587439386e-06, "loss": 0.6657, "step": 5334 }, { "epoch": 2.6468909023209632, "grad_norm": 0.1276652699366476, "learning_rate": 1.2860008852030653e-06, "loss": 0.7054, "step": 5335 }, { "epoch": 2.6473873650242026, "grad_norm": 0.12726233644290474, "learning_rate": 1.2851465972024908e-06, "loss": 0.6983, "step": 5336 }, { "epoch": 2.647883827727442, "grad_norm": 0.128524573267683, "learning_rate": 1.2842924948727809e-06, "loss": 0.6639, "step": 5337 }, { "epoch": 2.6483802904306812, "grad_norm": 0.1354082209207073, "learning_rate": 1.2834385783444708e-06, "loss": 0.7063, "step": 5338 }, { "epoch": 2.648876753133921, "grad_norm": 0.1286859619680868, "learning_rate": 1.282584847748067e-06, "loss": 0.6669, "step": 5339 }, { "epoch": 2.6493732158371603, "grad_norm": 0.12527601100039995, "learning_rate": 1.2817313032140504e-06, "loss": 0.6949, "step": 5340 }, { "epoch": 2.6498696785403997, "grad_norm": 0.13317900586790732, "learning_rate": 1.2808779448728701e-06, "loss": 0.6903, "step": 5341 }, { "epoch": 2.650366141243639, "grad_norm": 0.1339289905433541, "learning_rate": 1.2800247728549492e-06, "loss": 0.707, "step": 5342 }, { "epoch": 2.6508626039468783, "grad_norm": 0.13121373346042195, "learning_rate": 1.2791717872906812e-06, "loss": 0.7315, "step": 5343 }, { "epoch": 2.6513590666501177, "grad_norm": 0.13089098565705848, "learning_rate": 1.2783189883104301e-06, "loss": 0.6792, "step": 5344 }, { "epoch": 2.6518555293533574, "grad_norm": 0.12878111758105115, "learning_rate": 1.2774663760445343e-06, "loss": 0.6792, "step": 5345 }, { "epoch": 2.6523519920565968, "grad_norm": 0.14725422361310098, "learning_rate": 1.2766139506233012e-06, "loss": 0.6749, "step": 5346 }, { "epoch": 2.652848454759836, "grad_norm": 0.1301170341633137, "learning_rate": 1.2757617121770093e-06, "loss": 0.6623, "step": 5347 }, { "epoch": 2.6533449174630754, "grad_norm": 0.1317723532692615, "learning_rate": 1.2749096608359124e-06, "loss": 0.7481, "step": 5348 }, { "epoch": 2.653841380166315, "grad_norm": 0.12676960816569183, "learning_rate": 1.2740577967302292e-06, "loss": 0.6932, "step": 5349 }, { "epoch": 2.6543378428695545, "grad_norm": 0.12671113631004585, "learning_rate": 1.2732061199901563e-06, "loss": 0.6807, "step": 5350 }, { "epoch": 2.654834305572794, "grad_norm": 0.13126321692463977, "learning_rate": 1.2723546307458564e-06, "loss": 0.7329, "step": 5351 }, { "epoch": 2.655330768276033, "grad_norm": 0.1264003258300388, "learning_rate": 1.2715033291274686e-06, "loss": 0.7149, "step": 5352 }, { "epoch": 2.6558272309792725, "grad_norm": 0.13089334621865706, "learning_rate": 1.2706522152650997e-06, "loss": 0.7176, "step": 5353 }, { "epoch": 2.656323693682512, "grad_norm": 0.13062906649658093, "learning_rate": 1.2698012892888272e-06, "loss": 0.6898, "step": 5354 }, { "epoch": 2.6568201563857516, "grad_norm": 0.12660598434158638, "learning_rate": 1.2689505513287042e-06, "loss": 0.6998, "step": 5355 }, { "epoch": 2.657316619088991, "grad_norm": 0.13240501373271096, "learning_rate": 1.2681000015147505e-06, "loss": 0.7185, "step": 5356 }, { "epoch": 2.6578130817922303, "grad_norm": 0.13214361476854886, "learning_rate": 1.2672496399769596e-06, "loss": 0.717, "step": 5357 }, { "epoch": 2.6583095444954696, "grad_norm": 0.12662065221064372, "learning_rate": 1.2663994668452961e-06, "loss": 0.6901, "step": 5358 }, { "epoch": 2.6588060071987094, "grad_norm": 0.12807260004668625, "learning_rate": 1.2655494822496938e-06, "loss": 0.7518, "step": 5359 }, { "epoch": 2.6593024699019487, "grad_norm": 0.12843060423085104, "learning_rate": 1.2646996863200612e-06, "loss": 0.6849, "step": 5360 }, { "epoch": 2.659798932605188, "grad_norm": 0.13206054308179124, "learning_rate": 1.263850079186274e-06, "loss": 0.7003, "step": 5361 }, { "epoch": 2.6602953953084274, "grad_norm": 0.1374628315367967, "learning_rate": 1.2630006609781832e-06, "loss": 0.7265, "step": 5362 }, { "epoch": 2.6607918580116667, "grad_norm": 0.133031557785691, "learning_rate": 1.2621514318256073e-06, "loss": 0.728, "step": 5363 }, { "epoch": 2.661288320714906, "grad_norm": 0.13243919879974533, "learning_rate": 1.2613023918583379e-06, "loss": 0.699, "step": 5364 }, { "epoch": 2.661784783418146, "grad_norm": 0.13056450670934303, "learning_rate": 1.2604535412061367e-06, "loss": 0.6934, "step": 5365 }, { "epoch": 2.662281246121385, "grad_norm": 0.1327556834023822, "learning_rate": 1.259604879998736e-06, "loss": 0.6961, "step": 5366 }, { "epoch": 2.6627777088246245, "grad_norm": 0.12818630552200058, "learning_rate": 1.2587564083658424e-06, "loss": 0.7106, "step": 5367 }, { "epoch": 2.663274171527864, "grad_norm": 0.1310766749031488, "learning_rate": 1.257908126437129e-06, "loss": 0.7148, "step": 5368 }, { "epoch": 2.6637706342311036, "grad_norm": 0.127265232183585, "learning_rate": 1.257060034342244e-06, "loss": 0.6936, "step": 5369 }, { "epoch": 2.664267096934343, "grad_norm": 0.13015485116988296, "learning_rate": 1.2562121322108033e-06, "loss": 0.6881, "step": 5370 }, { "epoch": 2.6647635596375823, "grad_norm": 0.12987369619590036, "learning_rate": 1.2553644201723953e-06, "loss": 0.6782, "step": 5371 }, { "epoch": 2.6652600223408216, "grad_norm": 0.1286930218309498, "learning_rate": 1.25451689835658e-06, "loss": 0.7157, "step": 5372 }, { "epoch": 2.665756485044061, "grad_norm": 0.13613822958082458, "learning_rate": 1.2536695668928861e-06, "loss": 0.7231, "step": 5373 }, { "epoch": 2.6662529477473003, "grad_norm": 0.13210551836350637, "learning_rate": 1.2528224259108165e-06, "loss": 0.7344, "step": 5374 }, { "epoch": 2.66674941045054, "grad_norm": 0.12948567861798826, "learning_rate": 1.2519754755398422e-06, "loss": 0.7031, "step": 5375 }, { "epoch": 2.6672458731537794, "grad_norm": 0.1267189214191555, "learning_rate": 1.251128715909405e-06, "loss": 0.7144, "step": 5376 }, { "epoch": 2.6677423358570187, "grad_norm": 0.12858865733135458, "learning_rate": 1.25028214714892e-06, "loss": 0.6898, "step": 5377 }, { "epoch": 2.668238798560258, "grad_norm": 0.1259741786554077, "learning_rate": 1.2494357693877707e-06, "loss": 0.6758, "step": 5378 }, { "epoch": 2.668735261263498, "grad_norm": 0.13563486865602353, "learning_rate": 1.2485895827553132e-06, "loss": 0.7236, "step": 5379 }, { "epoch": 2.669231723966737, "grad_norm": 0.13321717377807327, "learning_rate": 1.2477435873808736e-06, "loss": 0.7188, "step": 5380 }, { "epoch": 2.6697281866699765, "grad_norm": 0.13087033023193448, "learning_rate": 1.246897783393748e-06, "loss": 0.7512, "step": 5381 }, { "epoch": 2.670224649373216, "grad_norm": 0.13658879188594283, "learning_rate": 1.2460521709232042e-06, "loss": 0.6728, "step": 5382 }, { "epoch": 2.670721112076455, "grad_norm": 0.13628616113751904, "learning_rate": 1.2452067500984797e-06, "loss": 0.6972, "step": 5383 }, { "epoch": 2.6712175747796945, "grad_norm": 0.13793409627064301, "learning_rate": 1.2443615210487853e-06, "loss": 0.7301, "step": 5384 }, { "epoch": 2.6717140374829342, "grad_norm": 0.12840778349061024, "learning_rate": 1.2435164839032999e-06, "loss": 0.6916, "step": 5385 }, { "epoch": 2.6722105001861736, "grad_norm": 0.13033494926277364, "learning_rate": 1.2426716387911728e-06, "loss": 0.6811, "step": 5386 }, { "epoch": 2.672706962889413, "grad_norm": 0.13130268215437454, "learning_rate": 1.2418269858415267e-06, "loss": 0.6996, "step": 5387 }, { "epoch": 2.6732034255926522, "grad_norm": 0.12904504134003447, "learning_rate": 1.2409825251834518e-06, "loss": 0.6958, "step": 5388 }, { "epoch": 2.673699888295892, "grad_norm": 0.12717110744137422, "learning_rate": 1.2401382569460118e-06, "loss": 0.6495, "step": 5389 }, { "epoch": 2.6741963509991313, "grad_norm": 0.1318115465132516, "learning_rate": 1.239294181258239e-06, "loss": 0.6998, "step": 5390 }, { "epoch": 2.6746928137023707, "grad_norm": 0.133476545289, "learning_rate": 1.2384502982491359e-06, "loss": 0.7364, "step": 5391 }, { "epoch": 2.67518927640561, "grad_norm": 0.13316247822909816, "learning_rate": 1.237606608047678e-06, "loss": 0.7539, "step": 5392 }, { "epoch": 2.6756857391088493, "grad_norm": 0.13762117719884678, "learning_rate": 1.2367631107828086e-06, "loss": 0.7731, "step": 5393 }, { "epoch": 2.6761822018120887, "grad_norm": 0.1294653958734037, "learning_rate": 1.2359198065834439e-06, "loss": 0.7375, "step": 5394 }, { "epoch": 2.6766786645153284, "grad_norm": 0.13185956190100087, "learning_rate": 1.2350766955784688e-06, "loss": 0.7211, "step": 5395 }, { "epoch": 2.6771751272185678, "grad_norm": 0.13191116919803744, "learning_rate": 1.2342337778967383e-06, "loss": 0.6744, "step": 5396 }, { "epoch": 2.677671589921807, "grad_norm": 0.12866861106797114, "learning_rate": 1.2333910536670818e-06, "loss": 0.7133, "step": 5397 }, { "epoch": 2.6781680526250464, "grad_norm": 0.13443812133789382, "learning_rate": 1.2325485230182923e-06, "loss": 0.6642, "step": 5398 }, { "epoch": 2.678664515328286, "grad_norm": 0.13194917442407558, "learning_rate": 1.2317061860791402e-06, "loss": 0.7635, "step": 5399 }, { "epoch": 2.6791609780315255, "grad_norm": 0.12949501549978698, "learning_rate": 1.230864042978361e-06, "loss": 0.708, "step": 5400 }, { "epoch": 2.679657440734765, "grad_norm": 0.12914002116089188, "learning_rate": 1.230022093844664e-06, "loss": 0.6673, "step": 5401 }, { "epoch": 2.680153903438004, "grad_norm": 0.1288450806713303, "learning_rate": 1.2291803388067284e-06, "loss": 0.6895, "step": 5402 }, { "epoch": 2.6806503661412435, "grad_norm": 0.13187999417119586, "learning_rate": 1.2283387779932005e-06, "loss": 0.7022, "step": 5403 }, { "epoch": 2.681146828844483, "grad_norm": 0.12707167814369877, "learning_rate": 1.2274974115327017e-06, "loss": 0.7049, "step": 5404 }, { "epoch": 2.6816432915477226, "grad_norm": 0.12903769384971894, "learning_rate": 1.2266562395538198e-06, "loss": 0.7194, "step": 5405 }, { "epoch": 2.682139754250962, "grad_norm": 0.13367318620502683, "learning_rate": 1.225815262185116e-06, "loss": 0.6971, "step": 5406 }, { "epoch": 2.6826362169542013, "grad_norm": 0.12972052003617227, "learning_rate": 1.2249744795551198e-06, "loss": 0.7434, "step": 5407 }, { "epoch": 2.6831326796574406, "grad_norm": 0.1270398790488789, "learning_rate": 1.2241338917923295e-06, "loss": 0.6903, "step": 5408 }, { "epoch": 2.6836291423606804, "grad_norm": 0.1273000126272403, "learning_rate": 1.223293499025218e-06, "loss": 0.6947, "step": 5409 }, { "epoch": 2.6841256050639197, "grad_norm": 0.12789714754456988, "learning_rate": 1.2224533013822237e-06, "loss": 0.7394, "step": 5410 }, { "epoch": 2.684622067767159, "grad_norm": 0.1284746698224543, "learning_rate": 1.2216132989917592e-06, "loss": 0.6935, "step": 5411 }, { "epoch": 2.6851185304703984, "grad_norm": 0.128337900001136, "learning_rate": 1.2207734919822047e-06, "loss": 0.6772, "step": 5412 }, { "epoch": 2.6856149931736377, "grad_norm": 0.1286919735265176, "learning_rate": 1.2199338804819114e-06, "loss": 0.696, "step": 5413 }, { "epoch": 2.686111455876877, "grad_norm": 0.12513034231984738, "learning_rate": 1.2190944646191999e-06, "loss": 0.6521, "step": 5414 }, { "epoch": 2.686607918580117, "grad_norm": 0.1371942316709031, "learning_rate": 1.2182552445223609e-06, "loss": 0.7441, "step": 5415 }, { "epoch": 2.687104381283356, "grad_norm": 0.12633983584561082, "learning_rate": 1.2174162203196575e-06, "loss": 0.6826, "step": 5416 }, { "epoch": 2.6876008439865955, "grad_norm": 0.1321860232959971, "learning_rate": 1.216577392139319e-06, "loss": 0.7607, "step": 5417 }, { "epoch": 2.688097306689835, "grad_norm": 0.12852572536801632, "learning_rate": 1.2157387601095492e-06, "loss": 0.692, "step": 5418 }, { "epoch": 2.6885937693930746, "grad_norm": 0.12900473130966916, "learning_rate": 1.214900324358518e-06, "loss": 0.7091, "step": 5419 }, { "epoch": 2.689090232096314, "grad_norm": 0.12971154741512667, "learning_rate": 1.2140620850143667e-06, "loss": 0.7053, "step": 5420 }, { "epoch": 2.6895866947995533, "grad_norm": 0.13521129476773464, "learning_rate": 1.213224042205208e-06, "loss": 0.7227, "step": 5421 }, { "epoch": 2.6900831575027926, "grad_norm": 0.13226811800646973, "learning_rate": 1.2123861960591224e-06, "loss": 0.7147, "step": 5422 }, { "epoch": 2.690579620206032, "grad_norm": 0.131271672560679, "learning_rate": 1.2115485467041608e-06, "loss": 0.7435, "step": 5423 }, { "epoch": 2.6910760829092712, "grad_norm": 0.12939692136576958, "learning_rate": 1.2107110942683459e-06, "loss": 0.6961, "step": 5424 }, { "epoch": 2.691572545612511, "grad_norm": 0.13543860120642587, "learning_rate": 1.2098738388796668e-06, "loss": 0.6976, "step": 5425 }, { "epoch": 2.6920690083157504, "grad_norm": 0.1270996570143555, "learning_rate": 1.2090367806660872e-06, "loss": 0.693, "step": 5426 }, { "epoch": 2.6925654710189897, "grad_norm": 0.13061886754156452, "learning_rate": 1.2081999197555366e-06, "loss": 0.6917, "step": 5427 }, { "epoch": 2.693061933722229, "grad_norm": 0.13261693716322412, "learning_rate": 1.2073632562759146e-06, "loss": 0.7223, "step": 5428 }, { "epoch": 2.693558396425469, "grad_norm": 0.12738184225408514, "learning_rate": 1.2065267903550953e-06, "loss": 0.6637, "step": 5429 }, { "epoch": 2.694054859128708, "grad_norm": 0.12686598161535245, "learning_rate": 1.2056905221209147e-06, "loss": 0.6801, "step": 5430 }, { "epoch": 2.6945513218319475, "grad_norm": 0.12651761385982638, "learning_rate": 1.2048544517011863e-06, "loss": 0.7168, "step": 5431 }, { "epoch": 2.695047784535187, "grad_norm": 0.12731010103172005, "learning_rate": 1.2040185792236874e-06, "loss": 0.6676, "step": 5432 }, { "epoch": 2.695544247238426, "grad_norm": 0.12581732529546605, "learning_rate": 1.2031829048161705e-06, "loss": 0.6796, "step": 5433 }, { "epoch": 2.6960407099416654, "grad_norm": 0.13000955582734638, "learning_rate": 1.2023474286063538e-06, "loss": 0.708, "step": 5434 }, { "epoch": 2.696537172644905, "grad_norm": 0.1320803282519354, "learning_rate": 1.2015121507219254e-06, "loss": 0.6949, "step": 5435 }, { "epoch": 2.6970336353481446, "grad_norm": 0.1317763220856854, "learning_rate": 1.2006770712905458e-06, "loss": 0.6895, "step": 5436 }, { "epoch": 2.697530098051384, "grad_norm": 0.12990531826150573, "learning_rate": 1.1998421904398423e-06, "loss": 0.7198, "step": 5437 }, { "epoch": 2.698026560754623, "grad_norm": 0.1383883572643517, "learning_rate": 1.199007508297414e-06, "loss": 0.7659, "step": 5438 }, { "epoch": 2.698523023457863, "grad_norm": 0.12440590803020435, "learning_rate": 1.1981730249908282e-06, "loss": 0.6478, "step": 5439 }, { "epoch": 2.6990194861611023, "grad_norm": 0.12784078674498142, "learning_rate": 1.1973387406476216e-06, "loss": 0.6741, "step": 5440 }, { "epoch": 2.6995159488643417, "grad_norm": 0.13239096818217247, "learning_rate": 1.196504655395303e-06, "loss": 0.7346, "step": 5441 }, { "epoch": 2.700012411567581, "grad_norm": 0.12753915589030973, "learning_rate": 1.1956707693613468e-06, "loss": 0.6775, "step": 5442 }, { "epoch": 2.7005088742708203, "grad_norm": 0.12968903089581899, "learning_rate": 1.194837082673201e-06, "loss": 0.7184, "step": 5443 }, { "epoch": 2.7010053369740596, "grad_norm": 0.12351868593377198, "learning_rate": 1.1940035954582803e-06, "loss": 0.7085, "step": 5444 }, { "epoch": 2.701501799677299, "grad_norm": 0.13181069411223056, "learning_rate": 1.1931703078439705e-06, "loss": 0.7328, "step": 5445 }, { "epoch": 2.7019982623805388, "grad_norm": 0.1330437926631701, "learning_rate": 1.1923372199576252e-06, "loss": 0.7267, "step": 5446 }, { "epoch": 2.702494725083778, "grad_norm": 0.12415515084900948, "learning_rate": 1.1915043319265684e-06, "loss": 0.6483, "step": 5447 }, { "epoch": 2.7029911877870174, "grad_norm": 0.12773854918997632, "learning_rate": 1.1906716438780952e-06, "loss": 0.6881, "step": 5448 }, { "epoch": 2.703487650490257, "grad_norm": 0.13125807339891024, "learning_rate": 1.1898391559394668e-06, "loss": 0.708, "step": 5449 }, { "epoch": 2.7039841131934965, "grad_norm": 0.12144720415007257, "learning_rate": 1.1890068682379175e-06, "loss": 0.6287, "step": 5450 }, { "epoch": 2.704480575896736, "grad_norm": 0.13122189521060085, "learning_rate": 1.1881747809006483e-06, "loss": 0.7784, "step": 5451 }, { "epoch": 2.704977038599975, "grad_norm": 0.12593554383340527, "learning_rate": 1.1873428940548293e-06, "loss": 0.7264, "step": 5452 }, { "epoch": 2.7054735013032145, "grad_norm": 0.13034835041902, "learning_rate": 1.1865112078276032e-06, "loss": 0.6816, "step": 5453 }, { "epoch": 2.705969964006454, "grad_norm": 0.12793803176190882, "learning_rate": 1.1856797223460776e-06, "loss": 0.6852, "step": 5454 }, { "epoch": 2.706466426709693, "grad_norm": 0.13001908847017454, "learning_rate": 1.1848484377373336e-06, "loss": 0.7136, "step": 5455 }, { "epoch": 2.706962889412933, "grad_norm": 0.1244529234238611, "learning_rate": 1.1840173541284198e-06, "loss": 0.6583, "step": 5456 }, { "epoch": 2.7074593521161723, "grad_norm": 0.12496088308943552, "learning_rate": 1.1831864716463517e-06, "loss": 0.6996, "step": 5457 }, { "epoch": 2.7079558148194116, "grad_norm": 0.13009009109117492, "learning_rate": 1.182355790418119e-06, "loss": 0.7313, "step": 5458 }, { "epoch": 2.7084522775226514, "grad_norm": 0.13302558329128752, "learning_rate": 1.181525310570677e-06, "loss": 0.7344, "step": 5459 }, { "epoch": 2.7089487402258907, "grad_norm": 0.13255724713593725, "learning_rate": 1.1806950322309503e-06, "loss": 0.7073, "step": 5460 }, { "epoch": 2.70944520292913, "grad_norm": 0.12830942629616623, "learning_rate": 1.1798649555258359e-06, "loss": 0.6774, "step": 5461 }, { "epoch": 2.7099416656323694, "grad_norm": 0.13135455282779482, "learning_rate": 1.1790350805821948e-06, "loss": 0.6984, "step": 5462 }, { "epoch": 2.7104381283356087, "grad_norm": 0.12869925272920885, "learning_rate": 1.1782054075268626e-06, "loss": 0.6924, "step": 5463 }, { "epoch": 2.710934591038848, "grad_norm": 0.12635830105891932, "learning_rate": 1.1773759364866394e-06, "loss": 0.6704, "step": 5464 }, { "epoch": 2.7114310537420874, "grad_norm": 0.1267057090577105, "learning_rate": 1.1765466675882983e-06, "loss": 0.6603, "step": 5465 }, { "epoch": 2.711927516445327, "grad_norm": 0.1379598054720685, "learning_rate": 1.1757176009585795e-06, "loss": 0.7162, "step": 5466 }, { "epoch": 2.7124239791485665, "grad_norm": 0.1267327103751104, "learning_rate": 1.1748887367241913e-06, "loss": 0.6841, "step": 5467 }, { "epoch": 2.712920441851806, "grad_norm": 0.12581262787382652, "learning_rate": 1.1740600750118136e-06, "loss": 0.7009, "step": 5468 }, { "epoch": 2.713416904555045, "grad_norm": 0.12485464413297796, "learning_rate": 1.173231615948093e-06, "loss": 0.6748, "step": 5469 }, { "epoch": 2.713913367258285, "grad_norm": 0.12620028737405145, "learning_rate": 1.1724033596596477e-06, "loss": 0.688, "step": 5470 }, { "epoch": 2.7144098299615242, "grad_norm": 0.12721872175292415, "learning_rate": 1.1715753062730622e-06, "loss": 0.7245, "step": 5471 }, { "epoch": 2.7149062926647636, "grad_norm": 0.13289322932822414, "learning_rate": 1.170747455914891e-06, "loss": 0.7097, "step": 5472 }, { "epoch": 2.715402755368003, "grad_norm": 0.12728705232851031, "learning_rate": 1.169919808711659e-06, "loss": 0.6834, "step": 5473 }, { "epoch": 2.7158992180712422, "grad_norm": 0.1267143167247784, "learning_rate": 1.169092364789857e-06, "loss": 0.6926, "step": 5474 }, { "epoch": 2.7163956807744816, "grad_norm": 0.13082421386045198, "learning_rate": 1.1682651242759483e-06, "loss": 0.7155, "step": 5475 }, { "epoch": 2.7168921434777213, "grad_norm": 0.1313656554797384, "learning_rate": 1.1674380872963629e-06, "loss": 0.689, "step": 5476 }, { "epoch": 2.7173886061809607, "grad_norm": 0.12776895465581478, "learning_rate": 1.1666112539774998e-06, "loss": 0.6669, "step": 5477 }, { "epoch": 2.7178850688842, "grad_norm": 0.12847066772569427, "learning_rate": 1.1657846244457272e-06, "loss": 0.7357, "step": 5478 }, { "epoch": 2.7183815315874393, "grad_norm": 0.13314280685450303, "learning_rate": 1.1649581988273814e-06, "loss": 0.7503, "step": 5479 }, { "epoch": 2.718877994290679, "grad_norm": 0.13052451777587085, "learning_rate": 1.16413197724877e-06, "loss": 0.7349, "step": 5480 }, { "epoch": 2.7193744569939184, "grad_norm": 0.1300407125579856, "learning_rate": 1.163305959836166e-06, "loss": 0.7083, "step": 5481 }, { "epoch": 2.7198709196971578, "grad_norm": 0.1280550112986663, "learning_rate": 1.1624801467158145e-06, "loss": 0.6762, "step": 5482 }, { "epoch": 2.720367382400397, "grad_norm": 0.12510160831325617, "learning_rate": 1.1616545380139272e-06, "loss": 0.6925, "step": 5483 }, { "epoch": 2.7208638451036364, "grad_norm": 0.13196997248908202, "learning_rate": 1.1608291338566841e-06, "loss": 0.7083, "step": 5484 }, { "epoch": 2.7213603078068758, "grad_norm": 0.1340590026668367, "learning_rate": 1.1600039343702368e-06, "loss": 0.6946, "step": 5485 }, { "epoch": 2.7218567705101155, "grad_norm": 0.12829686594605474, "learning_rate": 1.1591789396807021e-06, "loss": 0.705, "step": 5486 }, { "epoch": 2.722353233213355, "grad_norm": 0.12881800317843775, "learning_rate": 1.158354149914169e-06, "loss": 0.6883, "step": 5487 }, { "epoch": 2.722849695916594, "grad_norm": 0.12960535888237937, "learning_rate": 1.1575295651966926e-06, "loss": 0.6959, "step": 5488 }, { "epoch": 2.7233461586198335, "grad_norm": 0.12426299232658077, "learning_rate": 1.156705185654296e-06, "loss": 0.679, "step": 5489 }, { "epoch": 2.7238426213230733, "grad_norm": 0.1267642649577807, "learning_rate": 1.1558810114129746e-06, "loss": 0.6957, "step": 5490 }, { "epoch": 2.7243390840263126, "grad_norm": 0.13057816997613797, "learning_rate": 1.1550570425986884e-06, "loss": 0.6982, "step": 5491 }, { "epoch": 2.724835546729552, "grad_norm": 0.13036966095415778, "learning_rate": 1.1542332793373699e-06, "loss": 0.7108, "step": 5492 }, { "epoch": 2.7253320094327913, "grad_norm": 0.12845031666688522, "learning_rate": 1.1534097217549167e-06, "loss": 0.7045, "step": 5493 }, { "epoch": 2.7258284721360306, "grad_norm": 0.13431116362677278, "learning_rate": 1.1525863699771967e-06, "loss": 0.7211, "step": 5494 }, { "epoch": 2.72632493483927, "grad_norm": 0.12701692487109137, "learning_rate": 1.1517632241300457e-06, "loss": 0.6845, "step": 5495 }, { "epoch": 2.7268213975425097, "grad_norm": 0.12491394627771943, "learning_rate": 1.1509402843392681e-06, "loss": 0.658, "step": 5496 }, { "epoch": 2.727317860245749, "grad_norm": 0.12829490733233098, "learning_rate": 1.150117550730638e-06, "loss": 0.7134, "step": 5497 }, { "epoch": 2.7278143229489884, "grad_norm": 0.127816041512245, "learning_rate": 1.1492950234298965e-06, "loss": 0.6658, "step": 5498 }, { "epoch": 2.7283107856522277, "grad_norm": 0.12694117469603244, "learning_rate": 1.1484727025627532e-06, "loss": 0.7073, "step": 5499 }, { "epoch": 2.7288072483554675, "grad_norm": 0.1273683408910965, "learning_rate": 1.147650588254888e-06, "loss": 0.7044, "step": 5500 }, { "epoch": 2.729303711058707, "grad_norm": 0.12716535637041657, "learning_rate": 1.1468286806319461e-06, "loss": 0.7027, "step": 5501 }, { "epoch": 2.729800173761946, "grad_norm": 0.13652878890308245, "learning_rate": 1.146006979819545e-06, "loss": 0.7242, "step": 5502 }, { "epoch": 2.7302966364651855, "grad_norm": 0.13060886134753424, "learning_rate": 1.1451854859432674e-06, "loss": 0.6823, "step": 5503 }, { "epoch": 2.730793099168425, "grad_norm": 0.12949358227740704, "learning_rate": 1.1443641991286644e-06, "loss": 0.6812, "step": 5504 }, { "epoch": 2.731289561871664, "grad_norm": 0.1484208011714529, "learning_rate": 1.1435431195012586e-06, "loss": 0.7169, "step": 5505 }, { "epoch": 2.731786024574904, "grad_norm": 0.12279528726656044, "learning_rate": 1.1427222471865368e-06, "loss": 0.6499, "step": 5506 }, { "epoch": 2.7322824872781433, "grad_norm": 0.12686401827050103, "learning_rate": 1.1419015823099582e-06, "loss": 0.6969, "step": 5507 }, { "epoch": 2.7327789499813826, "grad_norm": 0.1304912606677775, "learning_rate": 1.1410811249969475e-06, "loss": 0.7185, "step": 5508 }, { "epoch": 2.733275412684622, "grad_norm": 0.1307385183998749, "learning_rate": 1.140260875372898e-06, "loss": 0.7018, "step": 5509 }, { "epoch": 2.7337718753878617, "grad_norm": 0.12879059404077767, "learning_rate": 1.1394408335631721e-06, "loss": 0.7036, "step": 5510 }, { "epoch": 2.734268338091101, "grad_norm": 0.13168818270683702, "learning_rate": 1.138620999693099e-06, "loss": 0.733, "step": 5511 }, { "epoch": 2.7347648007943404, "grad_norm": 0.1302130132221126, "learning_rate": 1.1378013738879787e-06, "loss": 0.7118, "step": 5512 }, { "epoch": 2.7352612634975797, "grad_norm": 0.12664057418388594, "learning_rate": 1.1369819562730763e-06, "loss": 0.692, "step": 5513 }, { "epoch": 2.735757726200819, "grad_norm": 0.1787042284319172, "learning_rate": 1.1361627469736286e-06, "loss": 0.6932, "step": 5514 }, { "epoch": 2.7362541889040584, "grad_norm": 0.13276835251453653, "learning_rate": 1.1353437461148378e-06, "loss": 0.685, "step": 5515 }, { "epoch": 2.736750651607298, "grad_norm": 0.12840095500860307, "learning_rate": 1.1345249538218736e-06, "loss": 0.6855, "step": 5516 }, { "epoch": 2.7372471143105375, "grad_norm": 0.13118594667050903, "learning_rate": 1.1337063702198775e-06, "loss": 0.7603, "step": 5517 }, { "epoch": 2.737743577013777, "grad_norm": 0.12576209372358757, "learning_rate": 1.1328879954339546e-06, "loss": 0.6698, "step": 5518 }, { "epoch": 2.738240039717016, "grad_norm": 0.133093950748946, "learning_rate": 1.132069829589183e-06, "loss": 0.6886, "step": 5519 }, { "epoch": 2.738736502420256, "grad_norm": 0.12864037736931044, "learning_rate": 1.1312518728106048e-06, "loss": 0.6766, "step": 5520 }, { "epoch": 2.7392329651234952, "grad_norm": 0.12945382840659705, "learning_rate": 1.1304341252232307e-06, "loss": 0.6828, "step": 5521 }, { "epoch": 2.7397294278267346, "grad_norm": 0.1308218826995222, "learning_rate": 1.129616586952042e-06, "loss": 0.6656, "step": 5522 }, { "epoch": 2.740225890529974, "grad_norm": 0.1314567316421182, "learning_rate": 1.1287992581219846e-06, "loss": 0.7036, "step": 5523 }, { "epoch": 2.7407223532332132, "grad_norm": 0.1366700099747632, "learning_rate": 1.1279821388579762e-06, "loss": 0.7349, "step": 5524 }, { "epoch": 2.7412188159364526, "grad_norm": 0.1309681880536747, "learning_rate": 1.1271652292848988e-06, "loss": 0.6795, "step": 5525 }, { "epoch": 2.7417152786396923, "grad_norm": 0.12837208221450544, "learning_rate": 1.126348529527605e-06, "loss": 0.6809, "step": 5526 }, { "epoch": 2.7422117413429317, "grad_norm": 0.131214461157075, "learning_rate": 1.1255320397109132e-06, "loss": 0.7304, "step": 5527 }, { "epoch": 2.742708204046171, "grad_norm": 0.12712062614786912, "learning_rate": 1.1247157599596103e-06, "loss": 0.6985, "step": 5528 }, { "epoch": 2.7432046667494103, "grad_norm": 0.13592701398261714, "learning_rate": 1.1238996903984537e-06, "loss": 0.763, "step": 5529 }, { "epoch": 2.74370112945265, "grad_norm": 0.12718223517036215, "learning_rate": 1.1230838311521642e-06, "loss": 0.6984, "step": 5530 }, { "epoch": 2.7441975921558894, "grad_norm": 0.13534565978463103, "learning_rate": 1.1222681823454349e-06, "loss": 0.7765, "step": 5531 }, { "epoch": 2.7446940548591288, "grad_norm": 0.13626701847329473, "learning_rate": 1.121452744102924e-06, "loss": 0.7439, "step": 5532 }, { "epoch": 2.745190517562368, "grad_norm": 0.13284297253885471, "learning_rate": 1.1206375165492564e-06, "loss": 0.7347, "step": 5533 }, { "epoch": 2.7456869802656074, "grad_norm": 0.13079426764125932, "learning_rate": 1.119822499809029e-06, "loss": 0.6999, "step": 5534 }, { "epoch": 2.7461834429688468, "grad_norm": 0.12570608638802389, "learning_rate": 1.1190076940068031e-06, "loss": 0.6831, "step": 5535 }, { "epoch": 2.7466799056720865, "grad_norm": 0.12564077502899773, "learning_rate": 1.1181930992671078e-06, "loss": 0.6883, "step": 5536 }, { "epoch": 2.747176368375326, "grad_norm": 0.12497737842928656, "learning_rate": 1.1173787157144425e-06, "loss": 0.6551, "step": 5537 }, { "epoch": 2.747672831078565, "grad_norm": 0.13189163059908987, "learning_rate": 1.116564543473271e-06, "loss": 0.7169, "step": 5538 }, { "epoch": 2.7481692937818045, "grad_norm": 0.13114812942001947, "learning_rate": 1.1157505826680285e-06, "loss": 0.6959, "step": 5539 }, { "epoch": 2.7486657564850443, "grad_norm": 0.13908344915755239, "learning_rate": 1.1149368334231146e-06, "loss": 0.7409, "step": 5540 }, { "epoch": 2.7491622191882836, "grad_norm": 0.13448810312163784, "learning_rate": 1.1141232958628976e-06, "loss": 0.7048, "step": 5541 }, { "epoch": 2.749658681891523, "grad_norm": 0.13514802207833446, "learning_rate": 1.1133099701117143e-06, "loss": 0.7112, "step": 5542 }, { "epoch": 2.7501551445947623, "grad_norm": 0.12623353087443057, "learning_rate": 1.112496856293867e-06, "loss": 0.7261, "step": 5543 }, { "epoch": 2.7506516072980016, "grad_norm": 0.12458653015367169, "learning_rate": 1.1116839545336292e-06, "loss": 0.6775, "step": 5544 }, { "epoch": 2.7506516072980016, "eval_loss": 0.7236723899841309, "eval_runtime": 135.7629, "eval_samples_per_second": 223.574, "eval_steps_per_second": 27.953, "step": 5544 }, { "epoch": 2.751148070001241, "grad_norm": 0.1306204466783865, "learning_rate": 1.1108712649552384e-06, "loss": 0.6945, "step": 5545 }, { "epoch": 2.7516445327044807, "grad_norm": 0.12369576532652772, "learning_rate": 1.1100587876829024e-06, "loss": 0.6289, "step": 5546 }, { "epoch": 2.75214099540772, "grad_norm": 0.12789165154828827, "learning_rate": 1.1092465228407949e-06, "loss": 0.6195, "step": 5547 }, { "epoch": 2.7526374581109594, "grad_norm": 0.1266994829459876, "learning_rate": 1.1084344705530561e-06, "loss": 0.7413, "step": 5548 }, { "epoch": 2.7531339208141987, "grad_norm": 0.1268540066373866, "learning_rate": 1.1076226309437977e-06, "loss": 0.6826, "step": 5549 }, { "epoch": 2.7536303835174385, "grad_norm": 0.12618417321246705, "learning_rate": 1.1068110041370938e-06, "loss": 0.6762, "step": 5550 }, { "epoch": 2.754126846220678, "grad_norm": 0.1325579883265857, "learning_rate": 1.1059995902569911e-06, "loss": 0.7399, "step": 5551 }, { "epoch": 2.754623308923917, "grad_norm": 0.12889480405547812, "learning_rate": 1.1051883894274998e-06, "loss": 0.7098, "step": 5552 }, { "epoch": 2.7551197716271565, "grad_norm": 0.135934126639656, "learning_rate": 1.104377401772598e-06, "loss": 0.7098, "step": 5553 }, { "epoch": 2.755616234330396, "grad_norm": 0.1289564414190799, "learning_rate": 1.1035666274162344e-06, "loss": 0.7435, "step": 5554 }, { "epoch": 2.756112697033635, "grad_norm": 0.12736214807761737, "learning_rate": 1.1027560664823208e-06, "loss": 0.6656, "step": 5555 }, { "epoch": 2.756609159736875, "grad_norm": 0.12848892854207852, "learning_rate": 1.10194571909474e-06, "loss": 0.7014, "step": 5556 }, { "epoch": 2.7571056224401143, "grad_norm": 0.1321006717534794, "learning_rate": 1.10113558537734e-06, "loss": 0.7015, "step": 5557 }, { "epoch": 2.7576020851433536, "grad_norm": 0.12854999523007427, "learning_rate": 1.100325665453937e-06, "loss": 0.6767, "step": 5558 }, { "epoch": 2.758098547846593, "grad_norm": 0.1279414734388252, "learning_rate": 1.0995159594483138e-06, "loss": 0.696, "step": 5559 }, { "epoch": 2.7585950105498327, "grad_norm": 0.13005801919260618, "learning_rate": 1.09870646748422e-06, "loss": 0.73, "step": 5560 }, { "epoch": 2.759091473253072, "grad_norm": 0.1360761488502302, "learning_rate": 1.0978971896853758e-06, "loss": 0.7399, "step": 5561 }, { "epoch": 2.7595879359563114, "grad_norm": 0.13408584165708903, "learning_rate": 1.0970881261754641e-06, "loss": 0.7369, "step": 5562 }, { "epoch": 2.7600843986595507, "grad_norm": 0.12871987297525, "learning_rate": 1.096279277078139e-06, "loss": 0.7009, "step": 5563 }, { "epoch": 2.76058086136279, "grad_norm": 0.12990364606695212, "learning_rate": 1.0954706425170198e-06, "loss": 0.6468, "step": 5564 }, { "epoch": 2.7610773240660293, "grad_norm": 0.1252919783584692, "learning_rate": 1.094662222615692e-06, "loss": 0.7253, "step": 5565 }, { "epoch": 2.761573786769269, "grad_norm": 0.1326764590455894, "learning_rate": 1.0938540174977115e-06, "loss": 0.7167, "step": 5566 }, { "epoch": 2.7620702494725085, "grad_norm": 0.13247387675260933, "learning_rate": 1.0930460272865976e-06, "loss": 0.7151, "step": 5567 }, { "epoch": 2.762566712175748, "grad_norm": 0.13190057787514423, "learning_rate": 1.0922382521058405e-06, "loss": 0.7643, "step": 5568 }, { "epoch": 2.763063174878987, "grad_norm": 0.12934627161113424, "learning_rate": 1.091430692078895e-06, "loss": 0.7093, "step": 5569 }, { "epoch": 2.763559637582227, "grad_norm": 0.1265845815519934, "learning_rate": 1.0906233473291827e-06, "loss": 0.6742, "step": 5570 }, { "epoch": 2.764056100285466, "grad_norm": 0.1344552360678481, "learning_rate": 1.0898162179800948e-06, "loss": 0.7388, "step": 5571 }, { "epoch": 2.7645525629887056, "grad_norm": 0.12498891326605503, "learning_rate": 1.0890093041549873e-06, "loss": 0.6614, "step": 5572 }, { "epoch": 2.765049025691945, "grad_norm": 0.1289570046975174, "learning_rate": 1.0882026059771845e-06, "loss": 0.7288, "step": 5573 }, { "epoch": 2.765545488395184, "grad_norm": 0.12524071202980924, "learning_rate": 1.0873961235699759e-06, "loss": 0.6666, "step": 5574 }, { "epoch": 2.7660419510984235, "grad_norm": 0.13322599761967555, "learning_rate": 1.0865898570566212e-06, "loss": 0.7645, "step": 5575 }, { "epoch": 2.766538413801663, "grad_norm": 0.13519871420996185, "learning_rate": 1.0857838065603447e-06, "loss": 0.7605, "step": 5576 }, { "epoch": 2.7670348765049027, "grad_norm": 0.13070973410690823, "learning_rate": 1.084977972204337e-06, "loss": 0.6832, "step": 5577 }, { "epoch": 2.767531339208142, "grad_norm": 0.13143077641170847, "learning_rate": 1.0841723541117594e-06, "loss": 0.7178, "step": 5578 }, { "epoch": 2.7680278019113813, "grad_norm": 0.13523115464259342, "learning_rate": 1.083366952405736e-06, "loss": 0.7738, "step": 5579 }, { "epoch": 2.768524264614621, "grad_norm": 0.13073847691026474, "learning_rate": 1.0825617672093592e-06, "loss": 0.7124, "step": 5580 }, { "epoch": 2.7690207273178604, "grad_norm": 0.12972826652950495, "learning_rate": 1.0817567986456904e-06, "loss": 0.6924, "step": 5581 }, { "epoch": 2.7695171900210998, "grad_norm": 0.13008197120598594, "learning_rate": 1.0809520468377541e-06, "loss": 0.7238, "step": 5582 }, { "epoch": 2.770013652724339, "grad_norm": 0.1297808815362704, "learning_rate": 1.0801475119085455e-06, "loss": 0.7345, "step": 5583 }, { "epoch": 2.7705101154275784, "grad_norm": 0.13507574439437295, "learning_rate": 1.0793431939810243e-06, "loss": 0.7121, "step": 5584 }, { "epoch": 2.7710065781308177, "grad_norm": 0.1273641822305587, "learning_rate": 1.0785390931781164e-06, "loss": 0.6972, "step": 5585 }, { "epoch": 2.771503040834057, "grad_norm": 0.13271462621019983, "learning_rate": 1.0777352096227174e-06, "loss": 0.7522, "step": 5586 }, { "epoch": 2.771999503537297, "grad_norm": 0.13302101809288855, "learning_rate": 1.076931543437687e-06, "loss": 0.7542, "step": 5587 }, { "epoch": 2.772495966240536, "grad_norm": 0.12899005010841755, "learning_rate": 1.0761280947458536e-06, "loss": 0.6468, "step": 5588 }, { "epoch": 2.7729924289437755, "grad_norm": 0.12421829548371563, "learning_rate": 1.0753248636700109e-06, "loss": 0.6886, "step": 5589 }, { "epoch": 2.7734888916470153, "grad_norm": 0.12349016996371331, "learning_rate": 1.0745218503329196e-06, "loss": 0.7158, "step": 5590 }, { "epoch": 2.7739853543502546, "grad_norm": 0.12644575429599572, "learning_rate": 1.0737190548573082e-06, "loss": 0.6762, "step": 5591 }, { "epoch": 2.774481817053494, "grad_norm": 0.134593415267307, "learning_rate": 1.0729164773658692e-06, "loss": 0.7065, "step": 5592 }, { "epoch": 2.7749782797567333, "grad_norm": 0.1260483274577327, "learning_rate": 1.0721141179812664e-06, "loss": 0.6827, "step": 5593 }, { "epoch": 2.7754747424599726, "grad_norm": 0.12812302335606984, "learning_rate": 1.0713119768261248e-06, "loss": 0.6981, "step": 5594 }, { "epoch": 2.775971205163212, "grad_norm": 0.12876922838504537, "learning_rate": 1.0705100540230418e-06, "loss": 0.7289, "step": 5595 }, { "epoch": 2.7764676678664513, "grad_norm": 0.12571235400921374, "learning_rate": 1.0697083496945766e-06, "loss": 0.6929, "step": 5596 }, { "epoch": 2.776964130569691, "grad_norm": 0.13199476837674784, "learning_rate": 1.0689068639632563e-06, "loss": 0.7835, "step": 5597 }, { "epoch": 2.7774605932729304, "grad_norm": 0.1273711543359699, "learning_rate": 1.0681055969515769e-06, "loss": 0.7014, "step": 5598 }, { "epoch": 2.7779570559761697, "grad_norm": 0.1269043028906525, "learning_rate": 1.0673045487819975e-06, "loss": 0.69, "step": 5599 }, { "epoch": 2.7784535186794095, "grad_norm": 0.1319110742532422, "learning_rate": 1.066503719576947e-06, "loss": 0.7461, "step": 5600 }, { "epoch": 2.778949981382649, "grad_norm": 0.12579895622801868, "learning_rate": 1.0657031094588184e-06, "loss": 0.6868, "step": 5601 }, { "epoch": 2.779446444085888, "grad_norm": 0.12882226269354222, "learning_rate": 1.064902718549972e-06, "loss": 0.7045, "step": 5602 }, { "epoch": 2.7799429067891275, "grad_norm": 0.1270352468548072, "learning_rate": 1.0641025469727356e-06, "loss": 0.7087, "step": 5603 }, { "epoch": 2.780439369492367, "grad_norm": 0.12900061673731678, "learning_rate": 1.0633025948494014e-06, "loss": 0.7028, "step": 5604 }, { "epoch": 2.780935832195606, "grad_norm": 0.13016595296281186, "learning_rate": 1.0625028623022305e-06, "loss": 0.692, "step": 5605 }, { "epoch": 2.7814322948988455, "grad_norm": 0.12885825274574203, "learning_rate": 1.0617033494534486e-06, "loss": 0.6744, "step": 5606 }, { "epoch": 2.7819287576020852, "grad_norm": 0.12870191436048342, "learning_rate": 1.0609040564252484e-06, "loss": 0.6885, "step": 5607 }, { "epoch": 2.7824252203053246, "grad_norm": 0.12629534065315853, "learning_rate": 1.0601049833397892e-06, "loss": 0.6859, "step": 5608 }, { "epoch": 2.782921683008564, "grad_norm": 0.13520278743342204, "learning_rate": 1.0593061303191954e-06, "loss": 0.7816, "step": 5609 }, { "epoch": 2.7834181457118032, "grad_norm": 0.12636004627171285, "learning_rate": 1.0585074974855605e-06, "loss": 0.7254, "step": 5610 }, { "epoch": 2.783914608415043, "grad_norm": 0.1258511623485038, "learning_rate": 1.0577090849609415e-06, "loss": 0.668, "step": 5611 }, { "epoch": 2.7844110711182823, "grad_norm": 0.1268446329679461, "learning_rate": 1.0569108928673642e-06, "loss": 0.6671, "step": 5612 }, { "epoch": 2.7849075338215217, "grad_norm": 0.13239241247612957, "learning_rate": 1.0561129213268187e-06, "loss": 0.7689, "step": 5613 }, { "epoch": 2.785403996524761, "grad_norm": 0.1255651588944757, "learning_rate": 1.0553151704612614e-06, "loss": 0.6884, "step": 5614 }, { "epoch": 2.7859004592280003, "grad_norm": 0.12247730286780059, "learning_rate": 1.0545176403926172e-06, "loss": 0.6955, "step": 5615 }, { "epoch": 2.7863969219312397, "grad_norm": 0.12958608013612596, "learning_rate": 1.0537203312427752e-06, "loss": 0.6482, "step": 5616 }, { "epoch": 2.7868933846344794, "grad_norm": 0.1228441492665452, "learning_rate": 1.0529232431335903e-06, "loss": 0.6562, "step": 5617 }, { "epoch": 2.7873898473377188, "grad_norm": 0.13327477747426705, "learning_rate": 1.0521263761868866e-06, "loss": 0.6961, "step": 5618 }, { "epoch": 2.787886310040958, "grad_norm": 0.1350215485186969, "learning_rate": 1.0513297305244507e-06, "loss": 0.718, "step": 5619 }, { "epoch": 2.7883827727441974, "grad_norm": 0.1308450394330969, "learning_rate": 1.0505333062680383e-06, "loss": 0.7255, "step": 5620 }, { "epoch": 2.788879235447437, "grad_norm": 0.1280745953181878, "learning_rate": 1.04973710353937e-06, "loss": 0.7049, "step": 5621 }, { "epoch": 2.7893756981506765, "grad_norm": 0.1311328777593363, "learning_rate": 1.048941122460132e-06, "loss": 0.7186, "step": 5622 }, { "epoch": 2.789872160853916, "grad_norm": 0.12796332817853948, "learning_rate": 1.0481453631519775e-06, "loss": 0.6986, "step": 5623 }, { "epoch": 2.790368623557155, "grad_norm": 0.13077728648409898, "learning_rate": 1.0473498257365247e-06, "loss": 0.7143, "step": 5624 }, { "epoch": 2.7908650862603945, "grad_norm": 0.12948794453473453, "learning_rate": 1.0465545103353605e-06, "loss": 0.7077, "step": 5625 }, { "epoch": 2.791361548963634, "grad_norm": 0.12937599313169448, "learning_rate": 1.0457594170700342e-06, "loss": 0.7617, "step": 5626 }, { "epoch": 2.7918580116668736, "grad_norm": 0.12552421196790298, "learning_rate": 1.044964546062065e-06, "loss": 0.6971, "step": 5627 }, { "epoch": 2.792354474370113, "grad_norm": 0.128355301555478, "learning_rate": 1.0441698974329351e-06, "loss": 0.6843, "step": 5628 }, { "epoch": 2.7928509370733523, "grad_norm": 0.13577852067425666, "learning_rate": 1.043375471304093e-06, "loss": 0.6869, "step": 5629 }, { "epoch": 2.7933473997765916, "grad_norm": 0.12864716094515163, "learning_rate": 1.0425812677969558e-06, "loss": 0.7449, "step": 5630 }, { "epoch": 2.7938438624798314, "grad_norm": 0.1293266492985022, "learning_rate": 1.0417872870329029e-06, "loss": 0.6797, "step": 5631 }, { "epoch": 2.7943403251830707, "grad_norm": 0.1225513439668755, "learning_rate": 1.0409935291332838e-06, "loss": 0.6412, "step": 5632 }, { "epoch": 2.79483678788631, "grad_norm": 0.12592530737282465, "learning_rate": 1.04019999421941e-06, "loss": 0.6889, "step": 5633 }, { "epoch": 2.7953332505895494, "grad_norm": 0.12572434567997356, "learning_rate": 1.0394066824125604e-06, "loss": 0.6637, "step": 5634 }, { "epoch": 2.7958297132927887, "grad_norm": 0.12830761221708809, "learning_rate": 1.0386135938339812e-06, "loss": 0.7142, "step": 5635 }, { "epoch": 2.796326175996028, "grad_norm": 0.12804406659034892, "learning_rate": 1.0378207286048817e-06, "loss": 0.6682, "step": 5636 }, { "epoch": 2.796822638699268, "grad_norm": 0.1306245576613395, "learning_rate": 1.0370280868464405e-06, "loss": 0.6909, "step": 5637 }, { "epoch": 2.797319101402507, "grad_norm": 0.13224450154587467, "learning_rate": 1.0362356686797996e-06, "loss": 0.7285, "step": 5638 }, { "epoch": 2.7978155641057465, "grad_norm": 0.1265750971979006, "learning_rate": 1.0354434742260665e-06, "loss": 0.6563, "step": 5639 }, { "epoch": 2.798312026808986, "grad_norm": 0.1255205324161622, "learning_rate": 1.0346515036063165e-06, "loss": 0.6817, "step": 5640 }, { "epoch": 2.7988084895122256, "grad_norm": 0.12545489772850293, "learning_rate": 1.033859756941588e-06, "loss": 0.6674, "step": 5641 }, { "epoch": 2.799304952215465, "grad_norm": 0.12709075941299228, "learning_rate": 1.0330682343528886e-06, "loss": 0.6704, "step": 5642 }, { "epoch": 2.7998014149187043, "grad_norm": 0.1267872987559885, "learning_rate": 1.0322769359611883e-06, "loss": 0.6957, "step": 5643 }, { "epoch": 2.8002978776219436, "grad_norm": 0.13612904687065247, "learning_rate": 1.0314858618874263e-06, "loss": 0.7679, "step": 5644 }, { "epoch": 2.800794340325183, "grad_norm": 0.12963249148574502, "learning_rate": 1.0306950122525042e-06, "loss": 0.66, "step": 5645 }, { "epoch": 2.8012908030284223, "grad_norm": 0.12775287910822225, "learning_rate": 1.0299043871772904e-06, "loss": 0.6721, "step": 5646 }, { "epoch": 2.801787265731662, "grad_norm": 0.13139858927008313, "learning_rate": 1.0291139867826205e-06, "loss": 0.6754, "step": 5647 }, { "epoch": 2.8022837284349014, "grad_norm": 0.13065899559643898, "learning_rate": 1.0283238111892929e-06, "loss": 0.6853, "step": 5648 }, { "epoch": 2.8027801911381407, "grad_norm": 0.13055758162862297, "learning_rate": 1.0275338605180751e-06, "loss": 0.7194, "step": 5649 }, { "epoch": 2.80327665384138, "grad_norm": 0.13385534399710408, "learning_rate": 1.0267441348896978e-06, "loss": 0.703, "step": 5650 }, { "epoch": 2.80377311654462, "grad_norm": 0.12413925075970825, "learning_rate": 1.0259546344248567e-06, "loss": 0.6408, "step": 5651 }, { "epoch": 2.804269579247859, "grad_norm": 0.13054886800472554, "learning_rate": 1.0251653592442157e-06, "loss": 0.7052, "step": 5652 }, { "epoch": 2.8047660419510985, "grad_norm": 0.13047999060457022, "learning_rate": 1.0243763094684029e-06, "loss": 0.6872, "step": 5653 }, { "epoch": 2.805262504654338, "grad_norm": 0.1273801577355027, "learning_rate": 1.0235874852180109e-06, "loss": 0.6741, "step": 5654 }, { "epoch": 2.805758967357577, "grad_norm": 0.1324495467661752, "learning_rate": 1.0227988866135995e-06, "loss": 0.7374, "step": 5655 }, { "epoch": 2.8062554300608165, "grad_norm": 0.12792054360102523, "learning_rate": 1.0220105137756923e-06, "loss": 0.7069, "step": 5656 }, { "epoch": 2.8067518927640562, "grad_norm": 0.1351583667186328, "learning_rate": 1.021222366824781e-06, "loss": 0.7174, "step": 5657 }, { "epoch": 2.8072483554672956, "grad_norm": 0.13071189164436514, "learning_rate": 1.0204344458813201e-06, "loss": 0.7505, "step": 5658 }, { "epoch": 2.807744818170535, "grad_norm": 0.12840479043760866, "learning_rate": 1.0196467510657315e-06, "loss": 0.7161, "step": 5659 }, { "epoch": 2.8082412808737742, "grad_norm": 0.12700151293719608, "learning_rate": 1.0188592824984018e-06, "loss": 0.713, "step": 5660 }, { "epoch": 2.808737743577014, "grad_norm": 0.1246492088887146, "learning_rate": 1.0180720402996814e-06, "loss": 0.698, "step": 5661 }, { "epoch": 2.8092342062802533, "grad_norm": 0.13557496235898936, "learning_rate": 1.0172850245898893e-06, "loss": 0.7027, "step": 5662 }, { "epoch": 2.8097306689834927, "grad_norm": 0.13048869432963878, "learning_rate": 1.0164982354893072e-06, "loss": 0.7034, "step": 5663 }, { "epoch": 2.810227131686732, "grad_norm": 0.1309842813142355, "learning_rate": 1.0157116731181844e-06, "loss": 0.7185, "step": 5664 }, { "epoch": 2.8107235943899713, "grad_norm": 0.12466311030204848, "learning_rate": 1.0149253375967336e-06, "loss": 0.6622, "step": 5665 }, { "epoch": 2.8112200570932107, "grad_norm": 0.12962837245694803, "learning_rate": 1.0141392290451327e-06, "loss": 0.6915, "step": 5666 }, { "epoch": 2.8117165197964504, "grad_norm": 0.12989363238833468, "learning_rate": 1.0133533475835275e-06, "loss": 0.7217, "step": 5667 }, { "epoch": 2.8122129824996898, "grad_norm": 0.12706569640249818, "learning_rate": 1.0125676933320258e-06, "loss": 0.6702, "step": 5668 }, { "epoch": 2.812709445202929, "grad_norm": 0.1304053742764276, "learning_rate": 1.0117822664107039e-06, "loss": 0.6888, "step": 5669 }, { "epoch": 2.8132059079061684, "grad_norm": 0.12731183078943037, "learning_rate": 1.0109970669396008e-06, "loss": 0.7023, "step": 5670 }, { "epoch": 2.813702370609408, "grad_norm": 0.12732905106726986, "learning_rate": 1.0102120950387217e-06, "loss": 0.7226, "step": 5671 }, { "epoch": 2.8141988333126475, "grad_norm": 0.1306176241196579, "learning_rate": 1.0094273508280369e-06, "loss": 0.7277, "step": 5672 }, { "epoch": 2.814695296015887, "grad_norm": 0.12671882523359002, "learning_rate": 1.008642834427481e-06, "loss": 0.7285, "step": 5673 }, { "epoch": 2.815191758719126, "grad_norm": 0.13283779420386785, "learning_rate": 1.0078585459569568e-06, "loss": 0.7146, "step": 5674 }, { "epoch": 2.8156882214223655, "grad_norm": 0.12814418090121601, "learning_rate": 1.0070744855363283e-06, "loss": 0.7038, "step": 5675 }, { "epoch": 2.816184684125605, "grad_norm": 0.12617325806218707, "learning_rate": 1.0062906532854284e-06, "loss": 0.6782, "step": 5676 }, { "epoch": 2.8166811468288446, "grad_norm": 0.1304282198590571, "learning_rate": 1.0055070493240521e-06, "loss": 0.7038, "step": 5677 }, { "epoch": 2.817177609532084, "grad_norm": 0.12890829179767882, "learning_rate": 1.00472367377196e-06, "loss": 0.6802, "step": 5678 }, { "epoch": 2.8176740722353233, "grad_norm": 0.13108770095262734, "learning_rate": 1.0039405267488805e-06, "loss": 0.7088, "step": 5679 }, { "epoch": 2.8181705349385626, "grad_norm": 0.12760496474052235, "learning_rate": 1.003157608374503e-06, "loss": 0.6967, "step": 5680 }, { "epoch": 2.8186669976418024, "grad_norm": 0.12724173087136592, "learning_rate": 1.002374918768486e-06, "loss": 0.6888, "step": 5681 }, { "epoch": 2.8191634603450417, "grad_norm": 0.1281546403873397, "learning_rate": 1.0015924580504502e-06, "loss": 0.6831, "step": 5682 }, { "epoch": 2.819659923048281, "grad_norm": 0.1306599446069587, "learning_rate": 1.000810226339981e-06, "loss": 0.6949, "step": 5683 }, { "epoch": 2.8201563857515204, "grad_norm": 0.13275998082707438, "learning_rate": 1.000028223756632e-06, "loss": 0.7333, "step": 5684 }, { "epoch": 2.8206528484547597, "grad_norm": 0.1259944276365557, "learning_rate": 9.99246450419918e-07, "loss": 0.7038, "step": 5685 }, { "epoch": 2.821149311157999, "grad_norm": 0.1258776313988055, "learning_rate": 9.984649064493228e-07, "loss": 0.6935, "step": 5686 }, { "epoch": 2.821645773861239, "grad_norm": 0.1277780519819066, "learning_rate": 9.976835919642897e-07, "loss": 0.6888, "step": 5687 }, { "epoch": 2.822142236564478, "grad_norm": 0.1255875204317941, "learning_rate": 9.969025070842326e-07, "loss": 0.7692, "step": 5688 }, { "epoch": 2.8226386992677175, "grad_norm": 0.12833215364936337, "learning_rate": 9.961216519285272e-07, "loss": 0.6744, "step": 5689 }, { "epoch": 2.823135161970957, "grad_norm": 0.1316740562970518, "learning_rate": 9.953410266165131e-07, "loss": 0.7523, "step": 5690 }, { "epoch": 2.8236316246741966, "grad_norm": 0.12733567410451435, "learning_rate": 9.945606312674991e-07, "loss": 0.6814, "step": 5691 }, { "epoch": 2.824128087377436, "grad_norm": 0.13152963950218258, "learning_rate": 9.937804660007545e-07, "loss": 0.7384, "step": 5692 }, { "epoch": 2.8246245500806753, "grad_norm": 0.12905971140335962, "learning_rate": 9.930005309355143e-07, "loss": 0.6819, "step": 5693 }, { "epoch": 2.8251210127839146, "grad_norm": 0.1293289412286106, "learning_rate": 9.922208261909811e-07, "loss": 0.6689, "step": 5694 }, { "epoch": 2.825617475487154, "grad_norm": 0.12656866487417895, "learning_rate": 9.91441351886318e-07, "loss": 0.671, "step": 5695 }, { "epoch": 2.8261139381903932, "grad_norm": 0.12682407823609218, "learning_rate": 9.906621081406575e-07, "loss": 0.7023, "step": 5696 }, { "epoch": 2.826610400893633, "grad_norm": 0.12481510953803818, "learning_rate": 9.898830950730935e-07, "loss": 0.7291, "step": 5697 }, { "epoch": 2.8271068635968724, "grad_norm": 0.13429126569598215, "learning_rate": 9.891043128026846e-07, "loss": 0.75, "step": 5698 }, { "epoch": 2.8276033263001117, "grad_norm": 0.12713270388437029, "learning_rate": 9.883257614484568e-07, "loss": 0.6696, "step": 5699 }, { "epoch": 2.828099789003351, "grad_norm": 0.13204195693146775, "learning_rate": 9.875474411293977e-07, "loss": 0.7444, "step": 5700 }, { "epoch": 2.828596251706591, "grad_norm": 0.12538390670171082, "learning_rate": 9.86769351964463e-07, "loss": 0.6943, "step": 5701 }, { "epoch": 2.82909271440983, "grad_norm": 0.12942426088477443, "learning_rate": 9.859914940725698e-07, "loss": 0.6697, "step": 5702 }, { "epoch": 2.8295891771130695, "grad_norm": 0.13073956206544834, "learning_rate": 9.852138675726014e-07, "loss": 0.7027, "step": 5703 }, { "epoch": 2.830085639816309, "grad_norm": 0.13192378873945426, "learning_rate": 9.844364725834058e-07, "loss": 0.6702, "step": 5704 }, { "epoch": 2.830582102519548, "grad_norm": 0.13045843127382814, "learning_rate": 9.836593092237942e-07, "loss": 0.7186, "step": 5705 }, { "epoch": 2.8310785652227874, "grad_norm": 0.13209103372271133, "learning_rate": 9.828823776125455e-07, "loss": 0.6726, "step": 5706 }, { "epoch": 2.831575027926027, "grad_norm": 0.12836854761747843, "learning_rate": 9.82105677868399e-07, "loss": 0.6604, "step": 5707 }, { "epoch": 2.8320714906292666, "grad_norm": 0.12440008092199902, "learning_rate": 9.813292101100634e-07, "loss": 0.6939, "step": 5708 }, { "epoch": 2.832567953332506, "grad_norm": 0.13509346075273426, "learning_rate": 9.805529744562076e-07, "loss": 0.7395, "step": 5709 }, { "epoch": 2.833064416035745, "grad_norm": 0.12453204951253816, "learning_rate": 9.797769710254664e-07, "loss": 0.6882, "step": 5710 }, { "epoch": 2.833560878738985, "grad_norm": 0.12697842177012855, "learning_rate": 9.790011999364412e-07, "loss": 0.7063, "step": 5711 }, { "epoch": 2.8340573414422243, "grad_norm": 0.12754534007734417, "learning_rate": 9.782256613076945e-07, "loss": 0.7482, "step": 5712 }, { "epoch": 2.8345538041454637, "grad_norm": 0.13069481659575172, "learning_rate": 9.774503552577563e-07, "loss": 0.6936, "step": 5713 }, { "epoch": 2.835050266848703, "grad_norm": 0.13974772554638237, "learning_rate": 9.766752819051192e-07, "loss": 0.717, "step": 5714 }, { "epoch": 2.8355467295519423, "grad_norm": 0.1251939515003143, "learning_rate": 9.759004413682396e-07, "loss": 0.6509, "step": 5715 }, { "epoch": 2.8360431922551816, "grad_norm": 0.1312396524534019, "learning_rate": 9.751258337655418e-07, "loss": 0.7324, "step": 5716 }, { "epoch": 2.836539654958421, "grad_norm": 0.13060634236111196, "learning_rate": 9.743514592154093e-07, "loss": 0.732, "step": 5717 }, { "epoch": 2.8370361176616608, "grad_norm": 0.12983746553461947, "learning_rate": 9.735773178361965e-07, "loss": 0.7217, "step": 5718 }, { "epoch": 2.8375325803649, "grad_norm": 0.1313030207597098, "learning_rate": 9.728034097462144e-07, "loss": 0.7298, "step": 5719 }, { "epoch": 2.8380290430681394, "grad_norm": 0.1305730115816129, "learning_rate": 9.720297350637453e-07, "loss": 0.73, "step": 5720 }, { "epoch": 2.838525505771379, "grad_norm": 0.12959419549059026, "learning_rate": 9.712562939070322e-07, "loss": 0.7013, "step": 5721 }, { "epoch": 2.8390219684746185, "grad_norm": 0.13353606316263741, "learning_rate": 9.704830863942819e-07, "loss": 0.6893, "step": 5722 }, { "epoch": 2.839518431177858, "grad_norm": 0.12927989012251478, "learning_rate": 9.697101126436689e-07, "loss": 0.6941, "step": 5723 }, { "epoch": 2.840014893881097, "grad_norm": 0.12460555850658463, "learning_rate": 9.689373727733282e-07, "loss": 0.6749, "step": 5724 }, { "epoch": 2.8405113565843365, "grad_norm": 0.13017346927566242, "learning_rate": 9.681648669013618e-07, "loss": 0.6884, "step": 5725 }, { "epoch": 2.841007819287576, "grad_norm": 0.12587233851865584, "learning_rate": 9.673925951458347e-07, "loss": 0.6782, "step": 5726 }, { "epoch": 2.841504281990815, "grad_norm": 0.12769491177786568, "learning_rate": 9.66620557624775e-07, "loss": 0.7175, "step": 5727 }, { "epoch": 2.842000744694055, "grad_norm": 0.13179110270409086, "learning_rate": 9.658487544561778e-07, "loss": 0.7183, "step": 5728 }, { "epoch": 2.8424972073972943, "grad_norm": 0.13106533480871427, "learning_rate": 9.650771857580007e-07, "loss": 0.7449, "step": 5729 }, { "epoch": 2.8429936701005336, "grad_norm": 0.12932936151209334, "learning_rate": 9.64305851648164e-07, "loss": 0.7064, "step": 5730 }, { "epoch": 2.8434901328037734, "grad_norm": 0.13312505296398447, "learning_rate": 9.63534752244556e-07, "loss": 0.763, "step": 5731 }, { "epoch": 2.8439865955070127, "grad_norm": 0.12320453586612351, "learning_rate": 9.627638876650245e-07, "loss": 0.6423, "step": 5732 }, { "epoch": 2.844483058210252, "grad_norm": 0.12794077575330426, "learning_rate": 9.61993258027386e-07, "loss": 0.6768, "step": 5733 }, { "epoch": 2.8449795209134914, "grad_norm": 0.1266752168624091, "learning_rate": 9.612228634494184e-07, "loss": 0.6743, "step": 5734 }, { "epoch": 2.8454759836167307, "grad_norm": 0.12653827532865938, "learning_rate": 9.604527040488631e-07, "loss": 0.6873, "step": 5735 }, { "epoch": 2.84597244631997, "grad_norm": 0.13105455883016892, "learning_rate": 9.596827799434278e-07, "loss": 0.7279, "step": 5736 }, { "epoch": 2.8464689090232094, "grad_norm": 0.12824418068445376, "learning_rate": 9.589130912507812e-07, "loss": 0.6876, "step": 5737 }, { "epoch": 2.846965371726449, "grad_norm": 0.12298579417263769, "learning_rate": 9.581436380885604e-07, "loss": 0.6416, "step": 5738 }, { "epoch": 2.8474618344296885, "grad_norm": 0.12704521892342857, "learning_rate": 9.573744205743613e-07, "loss": 0.7144, "step": 5739 }, { "epoch": 2.847958297132928, "grad_norm": 0.13151764091738496, "learning_rate": 9.566054388257492e-07, "loss": 0.6985, "step": 5740 }, { "epoch": 2.8484547598361676, "grad_norm": 0.12281688304650965, "learning_rate": 9.558366929602492e-07, "loss": 0.6525, "step": 5741 }, { "epoch": 2.848951222539407, "grad_norm": 0.12985743528766544, "learning_rate": 9.55068183095351e-07, "loss": 0.7118, "step": 5742 }, { "epoch": 2.8494476852426462, "grad_norm": 0.12697246716589147, "learning_rate": 9.542999093485108e-07, "loss": 0.686, "step": 5743 }, { "epoch": 2.8499441479458856, "grad_norm": 0.12891175104431518, "learning_rate": 9.535318718371453e-07, "loss": 0.7149, "step": 5744 }, { "epoch": 2.850440610649125, "grad_norm": 0.12952157865563893, "learning_rate": 9.527640706786381e-07, "loss": 0.6881, "step": 5745 }, { "epoch": 2.8509370733523642, "grad_norm": 0.13148893826296196, "learning_rate": 9.519965059903349e-07, "loss": 0.6798, "step": 5746 }, { "epoch": 2.8514335360556036, "grad_norm": 0.13521982035020255, "learning_rate": 9.512291778895444e-07, "loss": 0.7152, "step": 5747 }, { "epoch": 2.8519299987588433, "grad_norm": 0.13512198272074397, "learning_rate": 9.504620864935421e-07, "loss": 0.7148, "step": 5748 }, { "epoch": 2.8524264614620827, "grad_norm": 0.1266177814142838, "learning_rate": 9.496952319195643e-07, "loss": 0.6658, "step": 5749 }, { "epoch": 2.852922924165322, "grad_norm": 0.12979818368870435, "learning_rate": 9.489286142848148e-07, "loss": 0.7075, "step": 5750 }, { "epoch": 2.8534193868685613, "grad_norm": 0.12554115281420927, "learning_rate": 9.481622337064552e-07, "loss": 0.6843, "step": 5751 }, { "epoch": 2.853915849571801, "grad_norm": 0.1320343536961316, "learning_rate": 9.473960903016175e-07, "loss": 0.6923, "step": 5752 }, { "epoch": 2.8544123122750404, "grad_norm": 0.12386913184376706, "learning_rate": 9.466301841873929e-07, "loss": 0.6594, "step": 5753 }, { "epoch": 2.8549087749782798, "grad_norm": 0.13316531050788066, "learning_rate": 9.458645154808377e-07, "loss": 0.7124, "step": 5754 }, { "epoch": 2.855405237681519, "grad_norm": 0.1300731124347825, "learning_rate": 9.450990842989732e-07, "loss": 0.709, "step": 5755 }, { "epoch": 2.8559017003847584, "grad_norm": 0.12945665805477527, "learning_rate": 9.443338907587821e-07, "loss": 0.7232, "step": 5756 }, { "epoch": 2.8563981630879978, "grad_norm": 0.13305413501745098, "learning_rate": 9.435689349772135e-07, "loss": 0.7081, "step": 5757 }, { "epoch": 2.8568946257912375, "grad_norm": 0.1283285050578409, "learning_rate": 9.428042170711776e-07, "loss": 0.7441, "step": 5758 }, { "epoch": 2.857391088494477, "grad_norm": 0.13256787722435986, "learning_rate": 9.420397371575485e-07, "loss": 0.7147, "step": 5759 }, { "epoch": 2.857887551197716, "grad_norm": 0.13501707551704034, "learning_rate": 9.412754953531664e-07, "loss": 0.7473, "step": 5760 }, { "epoch": 2.8583840139009555, "grad_norm": 0.12779455860264854, "learning_rate": 9.405114917748318e-07, "loss": 0.6941, "step": 5761 }, { "epoch": 2.8588804766041953, "grad_norm": 0.13350164440946205, "learning_rate": 9.397477265393121e-07, "loss": 0.7079, "step": 5762 }, { "epoch": 2.8593769393074346, "grad_norm": 0.12695906616841662, "learning_rate": 9.389841997633356e-07, "loss": 0.7138, "step": 5763 }, { "epoch": 2.859873402010674, "grad_norm": 0.1339764757838581, "learning_rate": 9.382209115635942e-07, "loss": 0.686, "step": 5764 }, { "epoch": 2.8603698647139133, "grad_norm": 0.13278274095995127, "learning_rate": 9.374578620567462e-07, "loss": 0.7039, "step": 5765 }, { "epoch": 2.8608663274171526, "grad_norm": 0.1277777239083202, "learning_rate": 9.366950513594106e-07, "loss": 0.7353, "step": 5766 }, { "epoch": 2.861362790120392, "grad_norm": 0.12832944909129318, "learning_rate": 9.359324795881708e-07, "loss": 0.726, "step": 5767 }, { "epoch": 2.8618592528236317, "grad_norm": 0.12594067772100392, "learning_rate": 9.351701468595734e-07, "loss": 0.7546, "step": 5768 }, { "epoch": 2.862355715526871, "grad_norm": 0.128006328679492, "learning_rate": 9.344080532901282e-07, "loss": 0.708, "step": 5769 }, { "epoch": 2.8628521782301104, "grad_norm": 0.1275045024635113, "learning_rate": 9.336461989963102e-07, "loss": 0.6734, "step": 5770 }, { "epoch": 2.8633486409333497, "grad_norm": 0.1414589237121449, "learning_rate": 9.328845840945555e-07, "loss": 0.7276, "step": 5771 }, { "epoch": 2.8638451036365895, "grad_norm": 0.12748603017529134, "learning_rate": 9.321232087012664e-07, "loss": 0.6894, "step": 5772 }, { "epoch": 2.864341566339829, "grad_norm": 0.12968727130669763, "learning_rate": 9.313620729328055e-07, "loss": 0.6977, "step": 5773 }, { "epoch": 2.864838029043068, "grad_norm": 0.12740734697560116, "learning_rate": 9.306011769054999e-07, "loss": 0.702, "step": 5774 }, { "epoch": 2.8653344917463075, "grad_norm": 0.13202429187909928, "learning_rate": 9.298405207356418e-07, "loss": 0.7235, "step": 5775 }, { "epoch": 2.865830954449547, "grad_norm": 0.12516118343049698, "learning_rate": 9.290801045394837e-07, "loss": 0.6962, "step": 5776 }, { "epoch": 2.866327417152786, "grad_norm": 0.12466908791148393, "learning_rate": 9.283199284332448e-07, "loss": 0.6828, "step": 5777 }, { "epoch": 2.866823879856026, "grad_norm": 0.13168847619221924, "learning_rate": 9.275599925331047e-07, "loss": 0.7276, "step": 5778 }, { "epoch": 2.8673203425592653, "grad_norm": 0.12380760924529202, "learning_rate": 9.268002969552068e-07, "loss": 0.7109, "step": 5779 }, { "epoch": 2.8678168052625046, "grad_norm": 0.1273101006035769, "learning_rate": 9.260408418156597e-07, "loss": 0.7053, "step": 5780 }, { "epoch": 2.868313267965744, "grad_norm": 0.12716786520734547, "learning_rate": 9.252816272305328e-07, "loss": 0.6563, "step": 5781 }, { "epoch": 2.8688097306689837, "grad_norm": 0.12534292127359717, "learning_rate": 9.245226533158624e-07, "loss": 0.6822, "step": 5782 }, { "epoch": 2.869306193372223, "grad_norm": 0.12864761504663866, "learning_rate": 9.237639201876417e-07, "loss": 0.7303, "step": 5783 }, { "epoch": 2.8698026560754624, "grad_norm": 0.12941975850294216, "learning_rate": 9.230054279618336e-07, "loss": 0.6791, "step": 5784 }, { "epoch": 2.8702991187787017, "grad_norm": 0.12715616195640328, "learning_rate": 9.222471767543608e-07, "loss": 0.677, "step": 5785 }, { "epoch": 2.870795581481941, "grad_norm": 0.1354882762503265, "learning_rate": 9.214891666811088e-07, "loss": 0.7547, "step": 5786 }, { "epoch": 2.8712920441851804, "grad_norm": 0.12912527608146743, "learning_rate": 9.207313978579289e-07, "loss": 0.6888, "step": 5787 }, { "epoch": 2.87178850688842, "grad_norm": 0.1349499512563821, "learning_rate": 9.199738704006322e-07, "loss": 0.7308, "step": 5788 }, { "epoch": 2.8722849695916595, "grad_norm": 0.1306875820845483, "learning_rate": 9.192165844249967e-07, "loss": 0.6544, "step": 5789 }, { "epoch": 2.872781432294899, "grad_norm": 0.1284137449808761, "learning_rate": 9.184595400467605e-07, "loss": 0.6812, "step": 5790 }, { "epoch": 2.873277894998138, "grad_norm": 0.12848694849478118, "learning_rate": 9.177027373816244e-07, "loss": 0.7192, "step": 5791 }, { "epoch": 2.873774357701378, "grad_norm": 0.12937550210725762, "learning_rate": 9.169461765452556e-07, "loss": 0.6748, "step": 5792 }, { "epoch": 2.8742708204046172, "grad_norm": 0.12782097735362807, "learning_rate": 9.161898576532805e-07, "loss": 0.6797, "step": 5793 }, { "epoch": 2.8747672831078566, "grad_norm": 0.1305376367487405, "learning_rate": 9.154337808212921e-07, "loss": 0.6827, "step": 5794 }, { "epoch": 2.875263745811096, "grad_norm": 0.12880050176675548, "learning_rate": 9.146779461648437e-07, "loss": 0.6894, "step": 5795 }, { "epoch": 2.8757602085143352, "grad_norm": 0.1259680592660518, "learning_rate": 9.139223537994519e-07, "loss": 0.6695, "step": 5796 }, { "epoch": 2.8762566712175746, "grad_norm": 0.12642296565251043, "learning_rate": 9.131670038405979e-07, "loss": 0.674, "step": 5797 }, { "epoch": 2.8767531339208143, "grad_norm": 0.12515161938813915, "learning_rate": 9.124118964037246e-07, "loss": 0.6419, "step": 5798 }, { "epoch": 2.8772495966240537, "grad_norm": 0.13621267258149164, "learning_rate": 9.11657031604238e-07, "loss": 0.764, "step": 5799 }, { "epoch": 2.877746059327293, "grad_norm": 0.1267504655200429, "learning_rate": 9.109024095575062e-07, "loss": 0.6827, "step": 5800 }, { "epoch": 2.8782425220305323, "grad_norm": 0.12834380780687052, "learning_rate": 9.101480303788623e-07, "loss": 0.7056, "step": 5801 }, { "epoch": 2.878738984733772, "grad_norm": 0.14054019031479334, "learning_rate": 9.093938941836012e-07, "loss": 0.7741, "step": 5802 }, { "epoch": 2.8792354474370114, "grad_norm": 0.1292406739225414, "learning_rate": 9.086400010869787e-07, "loss": 0.6723, "step": 5803 }, { "epoch": 2.8797319101402508, "grad_norm": 0.13155167047598984, "learning_rate": 9.078863512042174e-07, "loss": 0.6905, "step": 5804 }, { "epoch": 2.88022837284349, "grad_norm": 0.128899843641201, "learning_rate": 9.071329446504997e-07, "loss": 0.6903, "step": 5805 }, { "epoch": 2.8807248355467294, "grad_norm": 0.12938294267895303, "learning_rate": 9.063797815409711e-07, "loss": 0.7468, "step": 5806 }, { "epoch": 2.8812212982499688, "grad_norm": 0.1302289199669989, "learning_rate": 9.056268619907418e-07, "loss": 0.704, "step": 5807 }, { "epoch": 2.8817177609532085, "grad_norm": 0.12864445639975267, "learning_rate": 9.048741861148822e-07, "loss": 0.6994, "step": 5808 }, { "epoch": 2.882214223656448, "grad_norm": 0.12590829953054358, "learning_rate": 9.041217540284278e-07, "loss": 0.6584, "step": 5809 }, { "epoch": 2.882710686359687, "grad_norm": 0.13423088059524024, "learning_rate": 9.033695658463757e-07, "loss": 0.705, "step": 5810 }, { "epoch": 2.8832071490629265, "grad_norm": 0.12841940675440747, "learning_rate": 9.026176216836844e-07, "loss": 0.6997, "step": 5811 }, { "epoch": 2.8837036117661663, "grad_norm": 0.1301253908505875, "learning_rate": 9.018659216552783e-07, "loss": 0.7129, "step": 5812 }, { "epoch": 2.8842000744694056, "grad_norm": 0.1280984017417436, "learning_rate": 9.011144658760413e-07, "loss": 0.7191, "step": 5813 }, { "epoch": 2.884696537172645, "grad_norm": 0.12267985245047328, "learning_rate": 9.003632544608235e-07, "loss": 0.6914, "step": 5814 }, { "epoch": 2.8851929998758843, "grad_norm": 0.1287583871760695, "learning_rate": 8.996122875244325e-07, "loss": 0.728, "step": 5815 }, { "epoch": 2.8856894625791236, "grad_norm": 0.12382625301443378, "learning_rate": 8.98861565181644e-07, "loss": 0.6676, "step": 5816 }, { "epoch": 2.886185925282363, "grad_norm": 0.13281271821429838, "learning_rate": 8.981110875471929e-07, "loss": 0.7116, "step": 5817 }, { "epoch": 2.8866823879856027, "grad_norm": 0.13692553089465404, "learning_rate": 8.973608547357768e-07, "loss": 0.7455, "step": 5818 }, { "epoch": 2.887178850688842, "grad_norm": 0.1289222222449465, "learning_rate": 8.966108668620585e-07, "loss": 0.7099, "step": 5819 }, { "epoch": 2.8876753133920814, "grad_norm": 0.1312399620437457, "learning_rate": 8.958611240406601e-07, "loss": 0.7066, "step": 5820 }, { "epoch": 2.8881717760953207, "grad_norm": 0.13075456926805842, "learning_rate": 8.951116263861693e-07, "loss": 0.6928, "step": 5821 }, { "epoch": 2.8886682387985605, "grad_norm": 0.128303038865835, "learning_rate": 8.943623740131338e-07, "loss": 0.6977, "step": 5822 }, { "epoch": 2.8891647015018, "grad_norm": 0.13374921271432447, "learning_rate": 8.936133670360644e-07, "loss": 0.6792, "step": 5823 }, { "epoch": 2.889661164205039, "grad_norm": 0.13148186697294473, "learning_rate": 8.928646055694362e-07, "loss": 0.7183, "step": 5824 }, { "epoch": 2.8901576269082785, "grad_norm": 0.1288745562512283, "learning_rate": 8.921160897276837e-07, "loss": 0.707, "step": 5825 }, { "epoch": 2.890654089611518, "grad_norm": 0.13355621762460396, "learning_rate": 8.913678196252071e-07, "loss": 0.7678, "step": 5826 }, { "epoch": 2.891150552314757, "grad_norm": 0.1241939860039103, "learning_rate": 8.90619795376367e-07, "loss": 0.6613, "step": 5827 }, { "epoch": 2.891647015017997, "grad_norm": 0.13480780396182257, "learning_rate": 8.898720170954858e-07, "loss": 0.7461, "step": 5828 }, { "epoch": 2.8921434777212363, "grad_norm": 0.13949524462587928, "learning_rate": 8.891244848968514e-07, "loss": 0.8129, "step": 5829 }, { "epoch": 2.8926399404244756, "grad_norm": 0.13174343677934816, "learning_rate": 8.883771988947099e-07, "loss": 0.7369, "step": 5830 }, { "epoch": 2.893136403127715, "grad_norm": 0.1329364719930937, "learning_rate": 8.876301592032749e-07, "loss": 0.7148, "step": 5831 }, { "epoch": 2.8936328658309547, "grad_norm": 0.1272203938665379, "learning_rate": 8.868833659367163e-07, "loss": 0.6465, "step": 5832 }, { "epoch": 2.894129328534194, "grad_norm": 0.1293924264978683, "learning_rate": 8.861368192091713e-07, "loss": 0.6836, "step": 5833 }, { "epoch": 2.8946257912374334, "grad_norm": 0.13220400859603937, "learning_rate": 8.853905191347373e-07, "loss": 0.7098, "step": 5834 }, { "epoch": 2.8951222539406727, "grad_norm": 0.13015024295581795, "learning_rate": 8.846444658274733e-07, "loss": 0.6838, "step": 5835 }, { "epoch": 2.895618716643912, "grad_norm": 0.13216797055404755, "learning_rate": 8.838986594014034e-07, "loss": 0.7106, "step": 5836 }, { "epoch": 2.8961151793471513, "grad_norm": 0.1278107756500494, "learning_rate": 8.831530999705104e-07, "loss": 0.7313, "step": 5837 }, { "epoch": 2.896611642050391, "grad_norm": 0.12908014728449954, "learning_rate": 8.824077876487424e-07, "loss": 0.7177, "step": 5838 }, { "epoch": 2.8971081047536305, "grad_norm": 0.15225280744669853, "learning_rate": 8.816627225500082e-07, "loss": 0.6728, "step": 5839 }, { "epoch": 2.89760456745687, "grad_norm": 0.12843463953521542, "learning_rate": 8.80917904788178e-07, "loss": 0.6796, "step": 5840 }, { "epoch": 2.898101030160109, "grad_norm": 0.125053238809787, "learning_rate": 8.801733344770869e-07, "loss": 0.6684, "step": 5841 }, { "epoch": 2.898597492863349, "grad_norm": 0.12924011340437813, "learning_rate": 8.794290117305296e-07, "loss": 0.6866, "step": 5842 }, { "epoch": 2.899093955566588, "grad_norm": 0.12939827655850278, "learning_rate": 8.786849366622629e-07, "loss": 0.6713, "step": 5843 }, { "epoch": 2.8995904182698276, "grad_norm": 0.1369447430816981, "learning_rate": 8.77941109386009e-07, "loss": 0.7056, "step": 5844 }, { "epoch": 2.900086880973067, "grad_norm": 0.13371285950295925, "learning_rate": 8.771975300154478e-07, "loss": 0.703, "step": 5845 }, { "epoch": 2.900583343676306, "grad_norm": 0.13319591109450518, "learning_rate": 8.764541986642253e-07, "loss": 0.6762, "step": 5846 }, { "epoch": 2.9010798063795455, "grad_norm": 0.12748929289799496, "learning_rate": 8.757111154459472e-07, "loss": 0.6883, "step": 5847 }, { "epoch": 2.9015762690827853, "grad_norm": 0.13341860100028502, "learning_rate": 8.749682804741813e-07, "loss": 0.7439, "step": 5848 }, { "epoch": 2.9020727317860247, "grad_norm": 0.13140731624898971, "learning_rate": 8.742256938624585e-07, "loss": 0.6919, "step": 5849 }, { "epoch": 2.902569194489264, "grad_norm": 0.126423779251477, "learning_rate": 8.7348335572427e-07, "loss": 0.6907, "step": 5850 }, { "epoch": 2.9030656571925033, "grad_norm": 0.12587303898494598, "learning_rate": 8.727412661730724e-07, "loss": 0.6969, "step": 5851 }, { "epoch": 2.903562119895743, "grad_norm": 0.12579584782238049, "learning_rate": 8.719994253222805e-07, "loss": 0.7108, "step": 5852 }, { "epoch": 2.9040585825989824, "grad_norm": 0.13036032819321902, "learning_rate": 8.712578332852739e-07, "loss": 0.6665, "step": 5853 }, { "epoch": 2.9045550453022217, "grad_norm": 0.12737767537759337, "learning_rate": 8.70516490175393e-07, "loss": 0.714, "step": 5854 }, { "epoch": 2.905051508005461, "grad_norm": 0.12518922341713093, "learning_rate": 8.697753961059385e-07, "loss": 0.6743, "step": 5855 }, { "epoch": 2.9055479707087004, "grad_norm": 0.13072186624967902, "learning_rate": 8.690345511901771e-07, "loss": 0.7178, "step": 5856 }, { "epoch": 2.9060444334119397, "grad_norm": 0.12797230065596052, "learning_rate": 8.682939555413334e-07, "loss": 0.6979, "step": 5857 }, { "epoch": 2.906540896115179, "grad_norm": 0.13125850276158907, "learning_rate": 8.675536092725967e-07, "loss": 0.6864, "step": 5858 }, { "epoch": 2.907037358818419, "grad_norm": 0.12641599740685236, "learning_rate": 8.668135124971166e-07, "loss": 0.6801, "step": 5859 }, { "epoch": 2.907533821521658, "grad_norm": 0.127803193061895, "learning_rate": 8.660736653280041e-07, "loss": 0.6713, "step": 5860 }, { "epoch": 2.9080302842248975, "grad_norm": 0.13027630370244397, "learning_rate": 8.653340678783347e-07, "loss": 0.6756, "step": 5861 }, { "epoch": 2.9085267469281373, "grad_norm": 0.12973586850281724, "learning_rate": 8.645947202611423e-07, "loss": 0.6992, "step": 5862 }, { "epoch": 2.9090232096313766, "grad_norm": 0.1325303303913486, "learning_rate": 8.638556225894271e-07, "loss": 0.7145, "step": 5863 }, { "epoch": 2.909519672334616, "grad_norm": 0.12990059009224683, "learning_rate": 8.631167749761443e-07, "loss": 0.6904, "step": 5864 }, { "epoch": 2.9100161350378553, "grad_norm": 0.131277660839979, "learning_rate": 8.623781775342183e-07, "loss": 0.7115, "step": 5865 }, { "epoch": 2.9105125977410946, "grad_norm": 0.1353236422896994, "learning_rate": 8.616398303765303e-07, "loss": 0.7022, "step": 5866 }, { "epoch": 2.911009060444334, "grad_norm": 0.13235776611917602, "learning_rate": 8.609017336159243e-07, "loss": 0.6841, "step": 5867 }, { "epoch": 2.9115055231475733, "grad_norm": 0.1352005618433371, "learning_rate": 8.601638873652082e-07, "loss": 0.7363, "step": 5868 }, { "epoch": 2.912001985850813, "grad_norm": 0.1259476367266435, "learning_rate": 8.594262917371485e-07, "loss": 0.6729, "step": 5869 }, { "epoch": 2.9124984485540524, "grad_norm": 0.1260089860773777, "learning_rate": 8.586889468444761e-07, "loss": 0.6744, "step": 5870 }, { "epoch": 2.9129949112572917, "grad_norm": 0.13405412341583484, "learning_rate": 8.57951852799882e-07, "loss": 0.7332, "step": 5871 }, { "epoch": 2.9134913739605315, "grad_norm": 0.12637262916741504, "learning_rate": 8.572150097160179e-07, "loss": 0.7096, "step": 5872 }, { "epoch": 2.913987836663771, "grad_norm": 0.12478537511556445, "learning_rate": 8.564784177055005e-07, "loss": 0.6937, "step": 5873 }, { "epoch": 2.91448429936701, "grad_norm": 0.13209726484500378, "learning_rate": 8.557420768809041e-07, "loss": 0.7309, "step": 5874 }, { "epoch": 2.9149807620702495, "grad_norm": 0.13508309020119716, "learning_rate": 8.550059873547686e-07, "loss": 0.6698, "step": 5875 }, { "epoch": 2.915477224773489, "grad_norm": 0.12122947126692105, "learning_rate": 8.542701492395924e-07, "loss": 0.6677, "step": 5876 }, { "epoch": 2.915973687476728, "grad_norm": 0.12335372065030592, "learning_rate": 8.535345626478355e-07, "loss": 0.6776, "step": 5877 }, { "epoch": 2.9164701501799675, "grad_norm": 0.12779589751503279, "learning_rate": 8.527992276919228e-07, "loss": 0.6466, "step": 5878 }, { "epoch": 2.9169666128832072, "grad_norm": 0.12792448170766138, "learning_rate": 8.520641444842373e-07, "loss": 0.6864, "step": 5879 }, { "epoch": 2.9174630755864466, "grad_norm": 0.12567621559611838, "learning_rate": 8.513293131371245e-07, "loss": 0.7195, "step": 5880 }, { "epoch": 2.917959538289686, "grad_norm": 0.12846917163481486, "learning_rate": 8.50594733762892e-07, "loss": 0.7167, "step": 5881 }, { "epoch": 2.9184560009929257, "grad_norm": 0.13195032168838147, "learning_rate": 8.498604064738072e-07, "loss": 0.7381, "step": 5882 }, { "epoch": 2.918952463696165, "grad_norm": 0.13038989198058165, "learning_rate": 8.491263313821021e-07, "loss": 0.6773, "step": 5883 }, { "epoch": 2.9194489263994043, "grad_norm": 0.12624725579988283, "learning_rate": 8.483925085999667e-07, "loss": 0.6843, "step": 5884 }, { "epoch": 2.9199453891026437, "grad_norm": 0.1289543904324568, "learning_rate": 8.476589382395558e-07, "loss": 0.7618, "step": 5885 }, { "epoch": 2.920441851805883, "grad_norm": 0.12430274931985501, "learning_rate": 8.469256204129827e-07, "loss": 0.6734, "step": 5886 }, { "epoch": 2.9209383145091223, "grad_norm": 0.13183276150434192, "learning_rate": 8.461925552323231e-07, "loss": 0.6935, "step": 5887 }, { "epoch": 2.9214347772123617, "grad_norm": 0.12471273296702598, "learning_rate": 8.45459742809615e-07, "loss": 0.6639, "step": 5888 }, { "epoch": 2.9219312399156014, "grad_norm": 0.13312244162746337, "learning_rate": 8.44727183256856e-07, "loss": 0.6958, "step": 5889 }, { "epoch": 2.9224277026188408, "grad_norm": 0.12804074982988345, "learning_rate": 8.439948766860076e-07, "loss": 0.7105, "step": 5890 }, { "epoch": 2.92292416532208, "grad_norm": 0.13266023301103108, "learning_rate": 8.432628232089904e-07, "loss": 0.7281, "step": 5891 }, { "epoch": 2.9234206280253194, "grad_norm": 0.12978177617631043, "learning_rate": 8.42531022937686e-07, "loss": 0.6977, "step": 5892 }, { "epoch": 2.923917090728559, "grad_norm": 0.1309078003351543, "learning_rate": 8.417994759839401e-07, "loss": 0.6945, "step": 5893 }, { "epoch": 2.9244135534317985, "grad_norm": 0.12245231712292412, "learning_rate": 8.41068182459556e-07, "loss": 0.6611, "step": 5894 }, { "epoch": 2.924910016135038, "grad_norm": 0.1281010559347382, "learning_rate": 8.403371424763032e-07, "loss": 0.6796, "step": 5895 }, { "epoch": 2.925406478838277, "grad_norm": 0.13285716334194994, "learning_rate": 8.396063561459058e-07, "loss": 0.7713, "step": 5896 }, { "epoch": 2.9259029415415165, "grad_norm": 0.12525104544597734, "learning_rate": 8.388758235800551e-07, "loss": 0.669, "step": 5897 }, { "epoch": 2.926399404244756, "grad_norm": 0.12692092156468257, "learning_rate": 8.381455448904008e-07, "loss": 0.716, "step": 5898 }, { "epoch": 2.9268958669479956, "grad_norm": 0.12577897045335742, "learning_rate": 8.374155201885533e-07, "loss": 0.7094, "step": 5899 }, { "epoch": 2.927392329651235, "grad_norm": 0.1270829254888127, "learning_rate": 8.366857495860869e-07, "loss": 0.7035, "step": 5900 }, { "epoch": 2.9278887923544743, "grad_norm": 0.134187041399689, "learning_rate": 8.359562331945337e-07, "loss": 0.7231, "step": 5901 }, { "epoch": 2.9283852550577136, "grad_norm": 0.13133787410693823, "learning_rate": 8.352269711253902e-07, "loss": 0.7019, "step": 5902 }, { "epoch": 2.9288817177609534, "grad_norm": 0.13216722658500193, "learning_rate": 8.344979634901115e-07, "loss": 0.7305, "step": 5903 }, { "epoch": 2.9293781804641927, "grad_norm": 0.12991923696083893, "learning_rate": 8.337692104001138e-07, "loss": 0.72, "step": 5904 }, { "epoch": 2.929874643167432, "grad_norm": 0.13021761831749384, "learning_rate": 8.330407119667775e-07, "loss": 0.7079, "step": 5905 }, { "epoch": 2.9303711058706714, "grad_norm": 0.12848310312169373, "learning_rate": 8.323124683014394e-07, "loss": 0.7022, "step": 5906 }, { "epoch": 2.9308675685739107, "grad_norm": 0.12155773897471595, "learning_rate": 8.315844795154024e-07, "loss": 0.7003, "step": 5907 }, { "epoch": 2.93136403127715, "grad_norm": 0.12783767747738128, "learning_rate": 8.308567457199265e-07, "loss": 0.6956, "step": 5908 }, { "epoch": 2.93186049398039, "grad_norm": 0.12517915661300882, "learning_rate": 8.301292670262334e-07, "loss": 0.6805, "step": 5909 }, { "epoch": 2.932356956683629, "grad_norm": 0.12818781769460139, "learning_rate": 8.294020435455085e-07, "loss": 0.6837, "step": 5910 }, { "epoch": 2.9328534193868685, "grad_norm": 0.12740869036254676, "learning_rate": 8.286750753888953e-07, "loss": 0.7211, "step": 5911 }, { "epoch": 2.933349882090108, "grad_norm": 0.12327113825475457, "learning_rate": 8.279483626674992e-07, "loss": 0.6925, "step": 5912 }, { "epoch": 2.9338463447933476, "grad_norm": 0.12387658865943076, "learning_rate": 8.272219054923855e-07, "loss": 0.6407, "step": 5913 }, { "epoch": 2.934342807496587, "grad_norm": 0.12861942462803927, "learning_rate": 8.264957039745835e-07, "loss": 0.6996, "step": 5914 }, { "epoch": 2.9348392701998263, "grad_norm": 0.12836506417919924, "learning_rate": 8.257697582250807e-07, "loss": 0.7062, "step": 5915 }, { "epoch": 2.9353357329030656, "grad_norm": 0.13217483481431683, "learning_rate": 8.250440683548253e-07, "loss": 0.7346, "step": 5916 }, { "epoch": 2.935832195606305, "grad_norm": 0.12771709009606644, "learning_rate": 8.243186344747286e-07, "loss": 0.7331, "step": 5917 }, { "epoch": 2.9363286583095443, "grad_norm": 0.12511348202532716, "learning_rate": 8.235934566956616e-07, "loss": 0.6774, "step": 5918 }, { "epoch": 2.936825121012784, "grad_norm": 0.1304719690078209, "learning_rate": 8.228685351284547e-07, "loss": 0.7227, "step": 5919 }, { "epoch": 2.9373215837160234, "grad_norm": 0.12839066350051087, "learning_rate": 8.221438698839021e-07, "loss": 0.6943, "step": 5920 }, { "epoch": 2.9378180464192627, "grad_norm": 0.12944145552846098, "learning_rate": 8.214194610727557e-07, "loss": 0.7047, "step": 5921 }, { "epoch": 2.938314509122502, "grad_norm": 0.12851146335009603, "learning_rate": 8.206953088057318e-07, "loss": 0.7166, "step": 5922 }, { "epoch": 2.938810971825742, "grad_norm": 0.13209306286470085, "learning_rate": 8.199714131935041e-07, "loss": 0.676, "step": 5923 }, { "epoch": 2.939307434528981, "grad_norm": 0.13328728796070516, "learning_rate": 8.192477743467078e-07, "loss": 0.6941, "step": 5924 }, { "epoch": 2.9398038972322205, "grad_norm": 0.12676782708503537, "learning_rate": 8.185243923759414e-07, "loss": 0.716, "step": 5925 }, { "epoch": 2.94030035993546, "grad_norm": 0.12845269590237002, "learning_rate": 8.178012673917604e-07, "loss": 0.6856, "step": 5926 }, { "epoch": 2.940796822638699, "grad_norm": 0.12791983707358917, "learning_rate": 8.170783995046852e-07, "loss": 0.7007, "step": 5927 }, { "epoch": 2.9412932853419385, "grad_norm": 0.12653344264597674, "learning_rate": 8.163557888251916e-07, "loss": 0.6654, "step": 5928 }, { "epoch": 2.9417897480451782, "grad_norm": 0.1311894343517124, "learning_rate": 8.15633435463721e-07, "loss": 0.6941, "step": 5929 }, { "epoch": 2.9422862107484176, "grad_norm": 0.13045379771212226, "learning_rate": 8.149113395306732e-07, "loss": 0.7029, "step": 5930 }, { "epoch": 2.942782673451657, "grad_norm": 0.1310448907057462, "learning_rate": 8.141895011364082e-07, "loss": 0.6971, "step": 5931 }, { "epoch": 2.9432791361548962, "grad_norm": 0.12684756193510152, "learning_rate": 8.134679203912485e-07, "loss": 0.6932, "step": 5932 }, { "epoch": 2.943775598858136, "grad_norm": 0.13624799129435985, "learning_rate": 8.127465974054749e-07, "loss": 0.7143, "step": 5933 }, { "epoch": 2.9442720615613753, "grad_norm": 0.13275208970745384, "learning_rate": 8.120255322893319e-07, "loss": 0.7652, "step": 5934 }, { "epoch": 2.9447685242646147, "grad_norm": 0.13231133707193993, "learning_rate": 8.113047251530215e-07, "loss": 0.7276, "step": 5935 }, { "epoch": 2.945264986967854, "grad_norm": 0.12564669641658913, "learning_rate": 8.105841761067068e-07, "loss": 0.6761, "step": 5936 }, { "epoch": 2.9457614496710933, "grad_norm": 0.12956694166664687, "learning_rate": 8.098638852605139e-07, "loss": 0.6975, "step": 5937 }, { "epoch": 2.9462579123743327, "grad_norm": 0.12673479579130228, "learning_rate": 8.091438527245263e-07, "loss": 0.659, "step": 5938 }, { "epoch": 2.9467543750775724, "grad_norm": 0.12543122612508772, "learning_rate": 8.084240786087907e-07, "loss": 0.7008, "step": 5939 }, { "epoch": 2.9472508377808118, "grad_norm": 0.13367896037829807, "learning_rate": 8.077045630233121e-07, "loss": 0.7256, "step": 5940 }, { "epoch": 2.947747300484051, "grad_norm": 0.13359993578888146, "learning_rate": 8.069853060780566e-07, "loss": 0.6878, "step": 5941 }, { "epoch": 2.9482437631872904, "grad_norm": 0.12701732001687444, "learning_rate": 8.062663078829524e-07, "loss": 0.7399, "step": 5942 }, { "epoch": 2.94874022589053, "grad_norm": 0.12747058578699347, "learning_rate": 8.055475685478861e-07, "loss": 0.6999, "step": 5943 }, { "epoch": 2.9492366885937695, "grad_norm": 0.12657957164210343, "learning_rate": 8.048290881827053e-07, "loss": 0.7354, "step": 5944 }, { "epoch": 2.949733151297009, "grad_norm": 0.12534187082541948, "learning_rate": 8.041108668972178e-07, "loss": 0.6951, "step": 5945 }, { "epoch": 2.950229614000248, "grad_norm": 0.12398448643069547, "learning_rate": 8.033929048011934e-07, "loss": 0.6824, "step": 5946 }, { "epoch": 2.9507260767034875, "grad_norm": 0.13140028368522533, "learning_rate": 8.026752020043604e-07, "loss": 0.6993, "step": 5947 }, { "epoch": 2.951222539406727, "grad_norm": 0.1339567215216009, "learning_rate": 8.019577586164077e-07, "loss": 0.6883, "step": 5948 }, { "epoch": 2.9517190021099666, "grad_norm": 0.13254826072565667, "learning_rate": 8.012405747469861e-07, "loss": 0.7031, "step": 5949 }, { "epoch": 2.952215464813206, "grad_norm": 0.13203437830444636, "learning_rate": 8.005236505057045e-07, "loss": 0.6924, "step": 5950 }, { "epoch": 2.9527119275164453, "grad_norm": 0.13328318102042208, "learning_rate": 7.998069860021346e-07, "loss": 0.735, "step": 5951 }, { "epoch": 2.9532083902196846, "grad_norm": 0.12989959931327805, "learning_rate": 7.990905813458063e-07, "loss": 0.7524, "step": 5952 }, { "epoch": 2.9537048529229244, "grad_norm": 0.12932924465141693, "learning_rate": 7.983744366462101e-07, "loss": 0.7075, "step": 5953 }, { "epoch": 2.9542013156261637, "grad_norm": 0.1270536630172262, "learning_rate": 7.976585520127982e-07, "loss": 0.6864, "step": 5954 }, { "epoch": 2.954697778329403, "grad_norm": 0.13045118616961768, "learning_rate": 7.969429275549819e-07, "loss": 0.6788, "step": 5955 }, { "epoch": 2.9551942410326424, "grad_norm": 0.1273750155772813, "learning_rate": 7.962275633821321e-07, "loss": 0.7083, "step": 5956 }, { "epoch": 2.9556907037358817, "grad_norm": 0.129306537947579, "learning_rate": 7.955124596035818e-07, "loss": 0.7025, "step": 5957 }, { "epoch": 2.956187166439121, "grad_norm": 0.13380324003505628, "learning_rate": 7.947976163286219e-07, "loss": 0.7551, "step": 5958 }, { "epoch": 2.956683629142361, "grad_norm": 0.1316194370579435, "learning_rate": 7.940830336665071e-07, "loss": 0.7246, "step": 5959 }, { "epoch": 2.9571800918456, "grad_norm": 0.12465311080275596, "learning_rate": 7.933687117264469e-07, "loss": 0.6732, "step": 5960 }, { "epoch": 2.9576765545488395, "grad_norm": 0.1308457845292269, "learning_rate": 7.926546506176158e-07, "loss": 0.7373, "step": 5961 }, { "epoch": 2.958173017252079, "grad_norm": 0.13252382874716395, "learning_rate": 7.919408504491464e-07, "loss": 0.7194, "step": 5962 }, { "epoch": 2.9586694799553186, "grad_norm": 0.12466359487318711, "learning_rate": 7.912273113301306e-07, "loss": 0.6609, "step": 5963 }, { "epoch": 2.959165942658558, "grad_norm": 0.13237910505477793, "learning_rate": 7.905140333696229e-07, "loss": 0.7391, "step": 5964 }, { "epoch": 2.9596624053617973, "grad_norm": 0.13205570481341808, "learning_rate": 7.898010166766348e-07, "loss": 0.7033, "step": 5965 }, { "epoch": 2.9601588680650366, "grad_norm": 0.12754936984318843, "learning_rate": 7.890882613601411e-07, "loss": 0.7007, "step": 5966 }, { "epoch": 2.960655330768276, "grad_norm": 0.12850522933784753, "learning_rate": 7.883757675290746e-07, "loss": 0.7058, "step": 5967 }, { "epoch": 2.9611517934715152, "grad_norm": 0.12454681696499198, "learning_rate": 7.87663535292327e-07, "loss": 0.6232, "step": 5968 }, { "epoch": 2.961648256174755, "grad_norm": 0.12513438738198798, "learning_rate": 7.869515647587539e-07, "loss": 0.6939, "step": 5969 }, { "epoch": 2.9621447188779944, "grad_norm": 0.12503092799347673, "learning_rate": 7.862398560371665e-07, "loss": 0.6937, "step": 5970 }, { "epoch": 2.9626411815812337, "grad_norm": 0.1313520263634869, "learning_rate": 7.855284092363399e-07, "loss": 0.7132, "step": 5971 }, { "epoch": 2.963137644284473, "grad_norm": 0.1324830049247509, "learning_rate": 7.848172244650065e-07, "loss": 0.708, "step": 5972 }, { "epoch": 2.963634106987713, "grad_norm": 0.13238004154599406, "learning_rate": 7.841063018318587e-07, "loss": 0.7363, "step": 5973 }, { "epoch": 2.964130569690952, "grad_norm": 0.12851537121562095, "learning_rate": 7.833956414455512e-07, "loss": 0.7088, "step": 5974 }, { "epoch": 2.9646270323941915, "grad_norm": 0.12750870312778442, "learning_rate": 7.826852434146964e-07, "loss": 0.6739, "step": 5975 }, { "epoch": 2.965123495097431, "grad_norm": 0.13032488783779714, "learning_rate": 7.819751078478669e-07, "loss": 0.6985, "step": 5976 }, { "epoch": 2.96561995780067, "grad_norm": 0.1280481984781406, "learning_rate": 7.812652348535948e-07, "loss": 0.7029, "step": 5977 }, { "epoch": 2.9661164205039094, "grad_norm": 0.12481852362878608, "learning_rate": 7.805556245403748e-07, "loss": 0.6592, "step": 5978 }, { "epoch": 2.966612883207149, "grad_norm": 0.12694865638365466, "learning_rate": 7.798462770166585e-07, "loss": 0.6451, "step": 5979 }, { "epoch": 2.9671093459103886, "grad_norm": 0.12968124976820739, "learning_rate": 7.791371923908569e-07, "loss": 0.7499, "step": 5980 }, { "epoch": 2.967605808613628, "grad_norm": 0.12879024482404638, "learning_rate": 7.784283707713447e-07, "loss": 0.7268, "step": 5981 }, { "epoch": 2.968102271316867, "grad_norm": 0.13142308258429045, "learning_rate": 7.77719812266452e-07, "loss": 0.7254, "step": 5982 }, { "epoch": 2.968598734020107, "grad_norm": 0.12715116614343377, "learning_rate": 7.770115169844719e-07, "loss": 0.6715, "step": 5983 }, { "epoch": 2.9690951967233463, "grad_norm": 0.12867668360826426, "learning_rate": 7.763034850336554e-07, "loss": 0.7037, "step": 5984 }, { "epoch": 2.9695916594265857, "grad_norm": 0.13174571937124394, "learning_rate": 7.75595716522213e-07, "loss": 0.7087, "step": 5985 }, { "epoch": 2.970088122129825, "grad_norm": 0.12868392659804886, "learning_rate": 7.748882115583175e-07, "loss": 0.7026, "step": 5986 }, { "epoch": 2.9705845848330643, "grad_norm": 0.1304279274224821, "learning_rate": 7.741809702500983e-07, "loss": 0.68, "step": 5987 }, { "epoch": 2.9710810475363036, "grad_norm": 0.13024362648385276, "learning_rate": 7.734739927056467e-07, "loss": 0.7152, "step": 5988 }, { "epoch": 2.9715775102395434, "grad_norm": 0.13003977622755503, "learning_rate": 7.727672790330129e-07, "loss": 0.6805, "step": 5989 }, { "epoch": 2.9720739729427827, "grad_norm": 0.1307381982553933, "learning_rate": 7.720608293402055e-07, "loss": 0.7367, "step": 5990 }, { "epoch": 2.972570435646022, "grad_norm": 0.13211408639458272, "learning_rate": 7.713546437351965e-07, "loss": 0.7141, "step": 5991 }, { "epoch": 2.9730668983492614, "grad_norm": 0.1247955923407189, "learning_rate": 7.706487223259121e-07, "loss": 0.6826, "step": 5992 }, { "epoch": 2.973563361052501, "grad_norm": 0.13116304767044074, "learning_rate": 7.69943065220243e-07, "loss": 0.7277, "step": 5993 }, { "epoch": 2.9740598237557405, "grad_norm": 0.13479680364255306, "learning_rate": 7.692376725260369e-07, "loss": 0.6846, "step": 5994 }, { "epoch": 2.97455628645898, "grad_norm": 0.12782009680839385, "learning_rate": 7.685325443511015e-07, "loss": 0.6846, "step": 5995 }, { "epoch": 2.975052749162219, "grad_norm": 0.1336362278759992, "learning_rate": 7.678276808032054e-07, "loss": 0.7207, "step": 5996 }, { "epoch": 2.9755492118654585, "grad_norm": 0.12954084216492223, "learning_rate": 7.671230819900741e-07, "loss": 0.722, "step": 5997 }, { "epoch": 2.976045674568698, "grad_norm": 0.12870683674641, "learning_rate": 7.66418748019396e-07, "loss": 0.6928, "step": 5998 }, { "epoch": 2.976542137271937, "grad_norm": 0.1283984345940394, "learning_rate": 7.657146789988165e-07, "loss": 0.6856, "step": 5999 }, { "epoch": 2.977038599975177, "grad_norm": 0.12793776246424288, "learning_rate": 7.650108750359403e-07, "loss": 0.6919, "step": 6000 }, { "epoch": 2.9775350626784163, "grad_norm": 0.12577650631266427, "learning_rate": 7.643073362383341e-07, "loss": 0.6853, "step": 6001 }, { "epoch": 2.9780315253816556, "grad_norm": 0.12392465457564755, "learning_rate": 7.636040627135211e-07, "loss": 0.722, "step": 6002 }, { "epoch": 2.9785279880848954, "grad_norm": 0.12418251160738356, "learning_rate": 7.629010545689869e-07, "loss": 0.7056, "step": 6003 }, { "epoch": 2.9790244507881347, "grad_norm": 0.1271367787777443, "learning_rate": 7.621983119121742e-07, "loss": 0.6494, "step": 6004 }, { "epoch": 2.979520913491374, "grad_norm": 0.12870235470062621, "learning_rate": 7.614958348504853e-07, "loss": 0.7274, "step": 6005 }, { "epoch": 2.9800173761946134, "grad_norm": 0.12786870456842656, "learning_rate": 7.607936234912841e-07, "loss": 0.6715, "step": 6006 }, { "epoch": 2.9805138388978527, "grad_norm": 0.12959870966522002, "learning_rate": 7.600916779418916e-07, "loss": 0.7442, "step": 6007 }, { "epoch": 2.981010301601092, "grad_norm": 0.12795538478138033, "learning_rate": 7.593899983095884e-07, "loss": 0.6919, "step": 6008 }, { "epoch": 2.9815067643043314, "grad_norm": 0.14008517233459958, "learning_rate": 7.586885847016148e-07, "loss": 0.7013, "step": 6009 }, { "epoch": 2.982003227007571, "grad_norm": 0.1273352199473159, "learning_rate": 7.579874372251722e-07, "loss": 0.7015, "step": 6010 }, { "epoch": 2.9824996897108105, "grad_norm": 0.12663695622291235, "learning_rate": 7.572865559874188e-07, "loss": 0.7045, "step": 6011 }, { "epoch": 2.98299615241405, "grad_norm": 0.1309777399910531, "learning_rate": 7.565859410954718e-07, "loss": 0.7366, "step": 6012 }, { "epoch": 2.9834926151172896, "grad_norm": 0.1281630321138167, "learning_rate": 7.558855926564112e-07, "loss": 0.7116, "step": 6013 }, { "epoch": 2.983989077820529, "grad_norm": 0.1280931136947841, "learning_rate": 7.551855107772724e-07, "loss": 0.7073, "step": 6014 }, { "epoch": 2.9844855405237682, "grad_norm": 0.12459877040568573, "learning_rate": 7.544856955650532e-07, "loss": 0.6964, "step": 6015 }, { "epoch": 2.9849820032270076, "grad_norm": 0.12667593238205743, "learning_rate": 7.537861471267077e-07, "loss": 0.6639, "step": 6016 }, { "epoch": 2.985478465930247, "grad_norm": 0.12902244841475374, "learning_rate": 7.530868655691509e-07, "loss": 0.7196, "step": 6017 }, { "epoch": 2.9859749286334862, "grad_norm": 0.12779617048573458, "learning_rate": 7.523878509992578e-07, "loss": 0.6979, "step": 6018 }, { "epoch": 2.9864713913367256, "grad_norm": 0.12907365853546054, "learning_rate": 7.516891035238596e-07, "loss": 0.682, "step": 6019 }, { "epoch": 2.9869678540399653, "grad_norm": 0.12929753241306882, "learning_rate": 7.509906232497513e-07, "loss": 0.7007, "step": 6020 }, { "epoch": 2.9874643167432047, "grad_norm": 0.12701020359185758, "learning_rate": 7.502924102836826e-07, "loss": 0.6893, "step": 6021 }, { "epoch": 2.987960779446444, "grad_norm": 0.1237167641591724, "learning_rate": 7.495944647323639e-07, "loss": 0.709, "step": 6022 }, { "epoch": 2.988457242149684, "grad_norm": 0.13193601872797872, "learning_rate": 7.488967867024671e-07, "loss": 0.7199, "step": 6023 }, { "epoch": 2.988953704852923, "grad_norm": 0.12998564461199474, "learning_rate": 7.481993763006184e-07, "loss": 0.6595, "step": 6024 }, { "epoch": 2.9894501675561624, "grad_norm": 0.13111171647856992, "learning_rate": 7.475022336334075e-07, "loss": 0.6868, "step": 6025 }, { "epoch": 2.9899466302594018, "grad_norm": 0.12835689645376358, "learning_rate": 7.468053588073803e-07, "loss": 0.7071, "step": 6026 }, { "epoch": 2.990443092962641, "grad_norm": 0.1259901601242702, "learning_rate": 7.461087519290447e-07, "loss": 0.7059, "step": 6027 }, { "epoch": 2.9909395556658804, "grad_norm": 0.12268405384296684, "learning_rate": 7.454124131048646e-07, "loss": 0.6555, "step": 6028 }, { "epoch": 2.9914360183691198, "grad_norm": 0.13427530622463077, "learning_rate": 7.447163424412638e-07, "loss": 0.7329, "step": 6029 }, { "epoch": 2.9919324810723595, "grad_norm": 0.12630933697427535, "learning_rate": 7.440205400446271e-07, "loss": 0.7285, "step": 6030 }, { "epoch": 2.992428943775599, "grad_norm": 0.1282172265353785, "learning_rate": 7.433250060212957e-07, "loss": 0.6894, "step": 6031 }, { "epoch": 2.992925406478838, "grad_norm": 0.1242625795254969, "learning_rate": 7.426297404775701e-07, "loss": 0.6954, "step": 6032 }, { "epoch": 2.9934218691820775, "grad_norm": 0.12573869185290845, "learning_rate": 7.419347435197125e-07, "loss": 0.6515, "step": 6033 }, { "epoch": 2.9939183318853173, "grad_norm": 0.12632264327088447, "learning_rate": 7.412400152539398e-07, "loss": 0.6976, "step": 6034 }, { "epoch": 2.9944147945885566, "grad_norm": 0.12492304063826659, "learning_rate": 7.405455557864322e-07, "loss": 0.6691, "step": 6035 }, { "epoch": 2.994911257291796, "grad_norm": 0.12637656925192833, "learning_rate": 7.398513652233255e-07, "loss": 0.7189, "step": 6036 }, { "epoch": 2.9954077199950353, "grad_norm": 0.12460961740826068, "learning_rate": 7.391574436707153e-07, "loss": 0.6838, "step": 6037 }, { "epoch": 2.9959041826982746, "grad_norm": 0.13406612342755167, "learning_rate": 7.384637912346573e-07, "loss": 0.7276, "step": 6038 }, { "epoch": 2.996400645401514, "grad_norm": 0.12791763802640216, "learning_rate": 7.377704080211651e-07, "loss": 0.7028, "step": 6039 }, { "epoch": 2.9968971081047537, "grad_norm": 0.12705616081597926, "learning_rate": 7.370772941362106e-07, "loss": 0.7045, "step": 6040 }, { "epoch": 2.997393570807993, "grad_norm": 0.13088558569032857, "learning_rate": 7.363844496857247e-07, "loss": 0.6877, "step": 6041 }, { "epoch": 2.9978900335112324, "grad_norm": 0.12444783921112863, "learning_rate": 7.356918747755989e-07, "loss": 0.688, "step": 6042 } ], "logging_steps": 1, "max_steps": 8056, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 2014, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1358095334768640.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }